ModErn Text Analysis
META Enumerates Textual Applications
length_filter.h
Go to the documentation of this file.
1 
9 #ifndef META_LENGTH_FILTER_H_
10 #define META_LENGTH_FILTER_H_
11 
12 #include <memory>
13 
15 #include "util/clonable.h"
16 #include "util/optional.h"
17 
18 namespace cpptoml
19 {
20 class table;
21 }
22 
23 namespace meta
24 {
25 namespace analyzers
26 {
27 namespace filters
28 {
29 
34 class length_filter : public util::clonable<token_stream, length_filter>
35 {
36  public:
45  length_filter(std::unique_ptr<token_stream> source, uint64_t min,
46  uint64_t max);
47 
52  length_filter(const length_filter& other);
53 
58  void set_content(const std::string& content) override;
59 
63  std::string next() override;
64 
68  operator bool() const override;
69 
71  const static std::string id;
72 
73  private:
77  void next_token();
78 
80  std::unique_ptr<token_stream> source_;
81 
84 
86  uint64_t min_length_;
87 
89  uint64_t max_length_;
90 };
91 
95 template <>
96 std::unique_ptr<token_stream>
97  make_filter<length_filter>(std::unique_ptr<token_stream>,
98  const cpptoml::table&);
99 }
100 }
101 }
102 #endif
std::string next() override
Definition: length_filter.cpp:42
std::unique_ptr< token_stream > make_filter< length_filter >(std::unique_ptr< token_stream >, const cpptoml::table &)
Specialization of the factory method for creating length_filters.
Definition: length_filter.cpp:82
length_filter(std::unique_ptr< token_stream > source, uint64_t min, uint64_t max)
Constructs a length filter, reading tokens from the given source and eliminating any that are shorter...
Definition: length_filter.cpp:19
void set_content(const std::string &content) override
Sets the content for the beginning of the filter chain.
Definition: length_filter.cpp:35
std::unique_ptr< token_stream > source_
The source to read tokens from.
Definition: length_filter.h:80
uint64_t max_length_
The maximum length of a token that can be emitted by this filter.
Definition: length_filter.h:89
Template class to facilitate polymorphic cloning.
Definition: clonable.h:28
uint64_t min_length_
The minimum length of a token that can be emitted by this filter.
Definition: length_filter.h:86
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24
static const std::string id
Identifier for this filter.
Definition: length_filter.h:71
util::optional< std::string > token_
The next buffered token.
Definition: length_filter.h:83
void next_token()
Advances internal state to the next valid token.
Definition: length_filter.cpp:54
Definition: analyzer.h:19
Filter that only retains tokens that are within a certain length range, inclusive.
Definition: length_filter.h:34