ModErn Text Analysis
META Enumerates Textual Applications
winnow.h
Go to the documentation of this file.
1 
10 #ifndef META_WINNOW_H_
11 #define META_WINNOW_H_
12 
13 #include <vector>
14 #include <unordered_map>
15 #include "index/forward_index.h"
18 #include "meta.h"
19 
20 namespace meta
21 {
22 namespace classify
23 {
24 
30 class winnow : public classifier
31 {
32  public:
34  const static constexpr double default_m = 1.5;
36  const static constexpr double default_gamma = 0.05;
38  const static constexpr size_t default_max_iter = 100;
39 
49  winnow(std::shared_ptr<index::forward_index> idx, double m = default_m,
50  double gamma = default_gamma, size_t max_iter = default_max_iter);
51 
61  void train(const std::vector<doc_id>& docs) override;
62 
72  class_label classify(doc_id d_id) override;
73 
78  void reset() override;
79 
83  const static std::string id;
84 
85  private:
93  double get_weight(const class_label& label, const term_id& term) const;
94 
100  void zero_weights(const std::vector<doc_id>& docs);
101 
105  std::unordered_map<class_label, std::unordered_map<term_id, double>>
107 
109  const double m_;
110 
112  const double gamma_;
113 
115  const size_t max_iter_;
116 };
117 
122 template <>
123 std::unique_ptr<classifier>
124  make_classifier<winnow>(const cpptoml::table& config,
125  std::shared_ptr<index::forward_index> idx);
126 }
127 }
128 #endif
void zero_weights(const std::vector< doc_id > &docs)
Initializes the weight vectors to zero for every class label.
Definition: winnow.cpp:38
Contains top-level namespace documentation for the META toolkit.
static const constexpr size_t default_max_iter
The default number of allowed iterations.
Definition: winnow.h:38
class_label label(const std::string &text)
Extracts a class_label from a string in libsvm format.
Definition: libsvm_parser.cpp:18
std::unique_ptr< classifier > make_classifier< winnow >(const cpptoml::table &config, std::shared_ptr< index::forward_index > idx)
Specialization of the factory method used for creating winnow classifiers.
Definition: winnow.cpp:106
class_label classify(doc_id d_id) override
Classifies the given document.
Definition: winnow.cpp:79
const size_t max_iter_
The maximum number of iterations for training.
Definition: winnow.h:115
std::unordered_map< class_label, std::unordered_map< term_id, double > > weights_
The weight vectors for each class label.
Definition: winnow.h:106
void train(const std::vector< doc_id > &docs) override
Trains the winnow on the given training documents.
Definition: winnow.cpp:44
Implements the Winnow classifier, a simplistic linear classifier for linearly-separable data...
Definition: winnow.h:30
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24
A classifier uses a document's feature space to identify which group it belongs to.
Definition: classifier.h:24
void reset() override
Resets all learned information for this winnow so it may be re-learned.
Definition: winnow.cpp:99
static const std::string id
The identifier for this classifier.
Definition: winnow.h:83
const double gamma_
, the error threshold.
Definition: winnow.h:112
double get_weight(const class_label &label, const term_id &term) const
Definition: winnow.cpp:27
const double m_
, the multiplicative learning rate.
Definition: winnow.h:109
static const constexpr double default_gamma
The default parameter.
Definition: winnow.h:36
static const constexpr double default_m
The default parameter.
Definition: winnow.h:34
winnow(std::shared_ptr< index::forward_index > idx, double m=default_m, double gamma=default_gamma, size_t max_iter=default_max_iter)
Constructs a winnow classifier with the given multiplier, error threshold, and maximum iterations...
Definition: winnow.cpp:20