ModErn Text Analysis
META Enumerates Textual Applications
tree_analyzer.h
Go to the documentation of this file.
1 
10 #ifndef META_TREE_ANALYZER_H_
11 #define META_TREE_ANALYZER_H_
12 
13 #include "corpus/document.h"
14 #include "analyzers/analyzer.h"
17 #include "parser/sr_parser.h"
18 #include "sequence/perceptron.h"
19 #include "util/clonable.h"
20 
21 namespace meta
22 {
23 namespace analyzers
24 {
25 
29 class tree_analyzer : public util::clonable<analyzer, tree_analyzer>
30 {
31  public:
35  tree_analyzer(std::unique_ptr<token_stream> stream,
36  const std::string& tagger_prefix,
37  const std::string& parser_prefix);
38 
43  tree_analyzer(const tree_analyzer& other);
44 
49  void tokenize(corpus::document& doc) override;
50 
54  void add(std::unique_ptr<const tree_featurizer> featurizer);
55 
59  const static std::string id;
60 
61  private:
65  std::shared_ptr<std::vector<std::unique_ptr<const tree_featurizer>>>
67 
71  std::unique_ptr<token_stream> stream_;
72 
79  std::shared_ptr<const sequence::perceptron> tagger_;
80 
86  std::shared_ptr<const parser::sr_parser> parser_;
87 };
88 
92 template <>
93 std::unique_ptr<analyzer> make_analyzer<tree_analyzer>(const cpptoml::table&,
94  const cpptoml::table&);
95 }
96 
97 namespace parser
98 {
102 void register_analyzers();
103 }
104 }
105 #endif
std::unique_ptr< token_stream > stream_
The token stream for extracting tokens.
Definition: tree_analyzer.h:71
void register_analyzers()
Register analyzers provided by the meta-parser-analyzers library.
Definition: tree_analyzer.cpp:108
std::shared_ptr< const sequence::perceptron > tagger_
The tagger used for tagging individual sentences.
Definition: tree_analyzer.h:79
void tokenize(corpus::document &doc) override
Tokenizes a file into a document.
Definition: tree_analyzer.cpp:48
static const std::string id
Identifier for this analyzer.
Definition: tree_analyzer.h:59
std::unique_ptr< analyzer > make_analyzer< tree_analyzer >(const cpptoml::table &, const cpptoml::table &)
Specialization of the factory method for creating tree analyzers.
Definition: tree_analyzer.cpp:77
std::shared_ptr< const parser::sr_parser > parser_
The parser to parse individual sentences.
Definition: tree_analyzer.h:86
std::shared_ptr< std::vector< std::unique_ptr< const tree_featurizer > > > featurizers_
A list of tree_featurizers to run on each parse tree.
Definition: tree_analyzer.h:66
Template class to facilitate polymorphic cloning.
Definition: clonable.h:28
Base class tokenizing using parse tree features.
Definition: tree_analyzer.h:29
void add(std::unique_ptr< const tree_featurizer > featurizer)
Adds a tree featurizer to the list.
Definition: tree_analyzer.cpp:43
Represents an indexable document.
Definition: document.h:31
tree_analyzer(std::unique_ptr< token_stream > stream, const std::string &tagger_prefix, const std::string &parser_prefix)
Creates a tree analyzer.
Definition: tree_analyzer.cpp:19
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24