meta/doxygen/whitespace__tokenizer_8h_source.html

 #ifndef META_WHITESPACE_TOKENIZER_H_

 #define META_WHITESPACE_TOKENIZER_H_


 #include "analyzers/token_stream.h"

 #include "util/clonable.h"


 namespace meta

 {

 namespace corpus

 {

 class document;

 }

 }


 namespace meta

 {

 namespace analyzers

 {

 namespace tokenizers

 {


 class whitespace_tokenizer : public util::clonable<token_stream,

                                                    whitespace_tokenizer>

 {

   public:

     whitespace_tokenizer();


     void set_content(const std::string& content) override;


     std::string next() override;


     operator bool() const override;


     const static std::string id;


   private:

     std::string content_;


     uint64_t idx_;

 };

 }

 }

 }

 #endif

meta::analyzers::tokenizers::whitespace_tokenizer::id
static const std::string id
Identifier for this tokenizer.
Definition: whitespace_tokenizer.h:63

meta::analyzers::tokenizers::whitespace_tokenizer::idx_
uint64_t idx_
Character index into the current buffer.
Definition: whitespace_tokenizer.h:70

meta::analyzers::tokenizers::whitespace_tokenizer::set_content
void set_content(const std::string &content) override
Sets the content for the tokenizer to parse.
Definition: whitespace_tokenizer.cpp:26

meta::analyzers::tokenizers::whitespace_tokenizer::content_
std::string content_
Buffered string content for this tokenizer.
Definition: whitespace_tokenizer.h:67

meta::util::multilevel_clonable
Template class to facilitate polymorphic cloning.
Definition: clonable.h:28

meta::analyzers::tokenizers::whitespace_tokenizer::next
std::string next() override
Definition: whitespace_tokenizer.cpp:32

clonable.h

meta
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24

meta::analyzers::tokenizers::whitespace_tokenizer
Converts documents into streams of whitespace delimited tokens.
Definition: whitespace_tokenizer.h:35

meta::analyzers::tokenizers::whitespace_tokenizer::whitespace_tokenizer
whitespace_tokenizer()
Creates a whitespace_tokenizer.
Definition: whitespace_tokenizer.cpp:22

token_stream.h