ModErn Text Analysis
META Enumerates Textual Applications
nearest_centroid.h
Go to the documentation of this file.
1 
9 #ifndef META_NEAREST_CENTROID_H_
10 #define META_NEAREST_CENTROID_H_
11 
12 #include "index/inverted_index.h"
13 #include "index/forward_index.h"
16 
17 namespace meta
18 {
19 namespace classify
20 {
21 
32 {
33  public:
35  const static std::string id;
36 
40  nearest_centroid(std::shared_ptr<index::inverted_index> idx,
41  std::shared_ptr<index::forward_index> f_idx);
42 
47  void train(const std::vector<doc_id>& docs) override;
48 
55  class_label classify(doc_id d_id) override;
56 
60  void reset() override;
61 
62  private:
68  double cosine_sim(const std::vector<std::pair<term_id, double>>& doc,
69  const std::unordered_map<term_id, double>& centroid);
70 
72  std::shared_ptr<index::inverted_index> inv_idx_;
73 
75  std::unordered_map<class_label, std::unordered_map<term_id, double>>
77 
78  public:
82  class nearest_centroid_exception : public std::runtime_error
83  {
84  public:
85  using std::runtime_error::runtime_error;
86  };
87 };
88 
93 template <>
94 std::unique_ptr<classifier> make_multi_index_classifier<nearest_centroid>(
95  const cpptoml::table&, std::shared_ptr<index::forward_index>,
96  std::shared_ptr<index::inverted_index>);
97 }
98 }
99 #endif
std::unordered_map< class_label, std::unordered_map< term_id, double > > centroids_
The document centroids for this learner.
Definition: nearest_centroid.h:76
Basic exception for nearest_centroid interactions.
Definition: nearest_centroid.h:82
void reset() override
Resets any learning information associated with this classifier.
Definition: nearest_centroid.cpp:101
static const std::string id
Identifier for this classifier.
Definition: nearest_centroid.h:35
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24
Implements the nearest centroid classification algorithm.
Definition: nearest_centroid.h:31
A classifier uses a document's feature space to identify which group it belongs to.
Definition: classifier.h:24
class_label classify(doc_id d_id) override
Classifies a document into a specific group, as determined by training data.
Definition: nearest_centroid.cpp:52
double cosine_sim(const std::vector< std::pair< term_id, double >> &doc, const std::unordered_map< term_id, double > &centroid)
Definition: nearest_centroid.cpp:78
std::unique_ptr< classifier > make_multi_index_classifier< nearest_centroid >(const cpptoml::table &, std::shared_ptr< index::forward_index >, std::shared_ptr< index::inverted_index >)
Specialization of the factory method used to create nearest_centroid classifiers. ...
Definition: nearest_centroid.cpp:107
nearest_centroid(std::shared_ptr< index::inverted_index > idx, std::shared_ptr< index::forward_index > f_idx)
Definition: nearest_centroid.cpp:21
std::shared_ptr< index::inverted_index > inv_idx_
Inverted index used for ranking.
Definition: nearest_centroid.h:72
void train(const std::vector< doc_id > &docs) override
Creates a classification model based on training documents.
Definition: nearest_centroid.cpp:27