ModErn Text Analysis
META Enumerates Textual Applications
knn.h
Go to the documentation of this file.
1 
9 #ifndef META_KNN_H_
10 #define META_KNN_H_
11 
12 #include <unordered_set>
13 #include "index/inverted_index.h"
14 #include "index/forward_index.h"
15 #include "index/ranker/ranker.h"
18 
19 namespace meta
20 {
21 namespace classify
22 {
23 
27 class knn : public classifier
28 {
29  public:
33  const static std::string id;
34 
42  knn(std::shared_ptr<index::inverted_index> idx,
43  std::shared_ptr<index::forward_index> f_idx, uint16_t k,
44  std::unique_ptr<index::ranker> ranker, bool weighted = false);
45 
50  void train(const std::vector<doc_id>& docs) override;
51 
58  class_label classify(doc_id d_id) override;
59 
63  void reset() override;
64 
65  private:
71  class_label select_best_label(
72  const std::vector<std::pair<doc_id, double>>& scored,
73  const std::vector<std::pair<class_label, uint16_t>>& sorted) const;
74 
76  std::shared_ptr<index::inverted_index> inv_idx_;
77 
79  uint16_t k_;
80 
84  std::unique_ptr<index::ranker> ranker_;
85 
87  std::unordered_set<doc_id> legal_docs_;
88 
90  const bool weighted_;
91 
92  public:
96  class knn_exception : public std::runtime_error
97  {
98  public:
99  using std::runtime_error::runtime_error;
100  };
101 };
102 
106 template <>
107 std::unique_ptr<classifier>
108  make_multi_index_classifier<knn>(const cpptoml::table&,
109  std::shared_ptr<index::forward_index>,
110  std::shared_ptr<index::inverted_index>);
111 }
112 }
113 #endif
std::unique_ptr< index::ranker > ranker_
The ranker that is used to score the queries in the index.
Definition: knn.h:84
void reset() override
Resets any learning information associated with this classifier.
Definition: knn.cpp:116
const bool weighted_
Whether we want the neighbors to be weighted by distance or not.
Definition: knn.h:90
Implements the k-Nearest Neighbor lazy learning classification algorithm.
Definition: knn.h:27
Basic exception for knn interactions.
Definition: knn.h:96
std::unique_ptr< classifier > make_multi_index_classifier< knn >(const cpptoml::table &, std::shared_ptr< index::forward_index >, std::shared_ptr< index::inverted_index >)
Specialization of the factory method used to create knn classifiers.
Definition: knn.cpp:122
static const std::string id
Identifier for this classifier.
Definition: knn.h:33
uint16_t k_
the value of k in k-NN
Definition: knn.h:79
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24
void train(const std::vector< doc_id > &docs) override
Creates a classification model based on training documents.
Definition: knn.cpp:33
A classifier uses a document's feature space to identify which group it belongs to.
Definition: classifier.h:24
class_label select_best_label(const std::vector< std::pair< doc_id, double >> &scored, const std::vector< std::pair< class_label, uint16_t >> &sorted) const
Definition: knn.cpp:84
knn(std::shared_ptr< index::inverted_index > idx, std::shared_ptr< index::forward_index > f_idx, uint16_t k, std::unique_ptr< index::ranker > ranker, bool weighted=false)
Definition: knn.cpp:22
class_label classify(doc_id d_id) override
Classifies a document into a specific group, as determined by training data.
Definition: knn.cpp:38
std::shared_ptr< index::inverted_index > inv_idx_
the inverted index used for ranking
Definition: knn.h:76
std::unordered_set< doc_id > legal_docs_
documents that are "legal" to be used in the results
Definition: knn.h:87