ModErn Text Analysis
META Enumerates Textual Applications
dual_perceptron.h
Go to the documentation of this file.
1 
9 #ifndef META_CLASSIFY_DUAL_PERCEPTRON_H_
10 #define META_CLASSIFY_DUAL_PERCEPTRON_H_
11 
15 #include "util/functional.h"
16 #include "meta.h"
17 
18 namespace meta
19 {
20 namespace classify
21 {
22 
29 {
30  public:
32  const static constexpr double default_alpha = 0.1;
33 
35  const static constexpr double default_gamma = 0.05;
36 
38  const static constexpr double default_bias = 0;
39 
41  const static constexpr uint64_t default_max_iter = 100;
42 
44  const static std::string id;
45 
58  template <class Kernel>
59  dual_perceptron(std::shared_ptr<index::forward_index> idx,
60  Kernel&& kernel_fn = kernel::polynomial{},
61  double alpha = default_alpha, double gamma = default_gamma,
62  double bias = default_bias,
63  uint64_t max_iter = default_max_iter)
64  : classifier{std::move(idx)},
65  alpha_{alpha},
66  gamma_{gamma},
67  bias_{bias},
68  max_iter_{max_iter}
69  {
70  std::function<double(pdata, pdata)> fun
71  = [=](const pdata& a, const pdata& b) {
72  return kernel_fn(a, b);
73  };
74  kernel_ = functional::memoize(fun);
75  }
76 
91  void train(const std::vector<doc_id>& docs) override;
92 
102  class_label classify(doc_id d_id) override;
103 
108  void reset() override;
109 
110  private:
118  void decrease_weight(const class_label& label, const doc_id& id);
119 
123  std::unordered_map<class_label, std::unordered_map<doc_id, uint64_t>>
125 
129  using pdata = decltype(idx_->search_primary(doc_id{}));
130 
134  std::function<double(pdata, pdata)> kernel_;
135 
139  const double alpha_;
140 
145  const double gamma_;
146 
150  const double bias_;
151 
155  const uint64_t max_iter_;
156 };
157 
161 template <>
162 std::unique_ptr<classifier> make_classifier<dual_perceptron>(
163  const cpptoml::table&, std::shared_ptr<index::forward_index>);
164 }
165 }
166 #endif
Contains top-level namespace documentation for the META toolkit.
static const std::string id
The identifier for this classifier.
Definition: dual_perceptron.h:44
void decrease_weight(const class_label &label, const doc_id &id)
Decreases the "weight" (mistake count) for a given class label and document.
Definition: dual_perceptron.cpp:59
A polynomial kernel function for a linear classifier to adapt it to data that is not linearly separab...
Definition: polynomial.h:28
std::unordered_map< class_label, std::unordered_map< doc_id, uint64_t > > weights_
The "weight" (mistake count) vectors for each class label.
Definition: dual_perceptron.h:124
class_label label(const std::string &text)
Extracts a class_label from a string in libsvm format.
Definition: libsvm_parser.cpp:18
const double gamma_
, the error threshold (in terms of percentage of mistakes on the training data in one iteration of tr...
Definition: dual_perceptron.h:145
void train(const std::vector< doc_id > &docs) override
Trains the perceptron on the given training documents.
Definition: dual_perceptron.cpp:24
class_label classify(doc_id d_id) override
Classifies the given document.
Definition: dual_perceptron.cpp:70
Implements a perceptron classifier, but using the dual formulation of the problem.
Definition: dual_perceptron.h:28
dual_perceptron(std::shared_ptr< index::forward_index > idx, Kernel &&kernel_fn=kernel::polynomial{}, double alpha=default_alpha, double gamma=default_gamma, double bias=default_bias, uint64_t max_iter=default_max_iter)
Constructs a dual_perceptron classifier over the given index and with the given paramters.
Definition: dual_perceptron.h:59
std::shared_ptr< index::forward_index > idx_
the index that the classifer is run on
Definition: classifier.h:77
void reset() override
Resets all learned information for this perceptron so it may be re-learned.
Definition: dual_perceptron.cpp:94
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24
A classifier uses a document's feature space to identify which group it belongs to.
Definition: classifier.h:24
decltype(idx_->search_primary(doc_id{})) pdata
Convenience typedef for the postings data type.
Definition: dual_perceptron.h:129
std::function< double(pdata, pdata)> kernel_
The kernel function to be used in lieu of a dot product.
Definition: dual_perceptron.h:134
static const constexpr double default_alpha
The default parameter.
Definition: dual_perceptron.h:32
std::unique_ptr< classifier > make_classifier< dual_perceptron >(const cpptoml::table &, std::shared_ptr< index::forward_index >)
Specialization of the factory function used to create dual_perceptrons.
Definition: dual_perceptron.cpp:101
static const constexpr double default_bias
The default parameter.
Definition: dual_perceptron.h:38
const uint64_t max_iter_
The maximum number of iterations for training.
Definition: dual_perceptron.h:155
const double bias_
, the bias factor.
Definition: dual_perceptron.h:150
static const constexpr double default_gamma
The default parameter.
Definition: dual_perceptron.h:35
static const constexpr uint64_t default_max_iter
The default number of allowed iterations.
Definition: dual_perceptron.h:41
const double alpha_
, the learning rate
Definition: dual_perceptron.h:139