ModErn Text Analysis
META Enumerates Textual Applications
inverted_index.h
Go to the documentation of this file.
1 
11 #ifndef META_INVERTED_INDEX_H_
12 #define META_INVERTED_INDEX_H_
13 
14 #include <queue>
15 #include <stdexcept>
16 
17 #include "index/disk_index.h"
18 #include "index/make_index.h"
19 
20 namespace meta
21 {
22 
23 namespace corpus
24 {
25 class corpus;
26 class document;
27 }
28 
29 namespace index
30 {
31 
32 template <class>
33 class chunk_handler;
34 
35 template <class, class>
36 class postings_data;
37 }
38 }
39 
40 namespace meta
41 {
42 namespace index
43 {
44 
54 class inverted_index : public disk_index
55 {
56  public:
60  class inverted_index_exception : public std::runtime_error
61  {
62  public:
63  using std::runtime_error::runtime_error;
64  };
65 
66  using primary_key_type = term_id;
67  using secondary_key_type = doc_id;
71 
76  template <class Index, class... Args>
77  friend std::shared_ptr<Index> make_index(const std::string&, Args&&...);
78 
83  template <class Index, template <class, class> class Cache, class... Args>
84  friend std::shared_ptr<cached_index<Index, Cache>>
85  make_index(const std::string& config_file, Args&&... args);
86 
87  protected:
92  inverted_index(const cpptoml::table& config);
93 
94  public:
99 
104 
108  inverted_index(const inverted_index&) = delete;
109 
113  inverted_index& operator=(const inverted_index&) = delete;
114 
118  virtual ~inverted_index();
119 
123  void tokenize(corpus::document& doc);
124 
129  virtual std::shared_ptr<postings_data_type>
130  search_primary(term_id t_id) const;
131 
137  uint64_t doc_freq(term_id t_id) const;
138 
143  uint64_t term_freq(term_id t_id, doc_id d_id) const;
144 
148  uint64_t total_corpus_terms();
149 
154  uint64_t total_num_occurences(term_id t_id) const;
155 
159  double avg_doc_length();
160 
161  private:
167  void create_index(const std::string& config_file);
168 
173  void load_index();
174 
178  bool valid() const;
179 
181  class impl;
184 };
185 }
186 }
187 
188 #endif
inverted_index & operator=(inverted_index &&)
Move assigns a inverted_index.
virtual std::shared_ptr< postings_data_type > search_primary(term_id t_id) const
Definition: inverted_index.cpp:312
void load_index()
This function loads a disk index from its filesystem representation.
Definition: inverted_index.cpp:149
The inverted_index class stores information on a corpus indexed by term_ids.
Definition: inverted_index.h:54
Holds generic data structures and functions that inverted_index and forward_index both use...
Definition: disk_index.h:55
uint64_t total_corpus_terms()
Definition: inverted_index.cpp:275
util::pimpl< impl > inv_impl_
Implementation of this index.
Definition: inverted_index.h:181
double avg_doc_length()
Definition: inverted_index.cpp:297
uint64_t total_num_occurences(term_id t_id) const
Definition: inverted_index.cpp:286
uint64_t doc_freq(term_id t_id) const
Definition: inverted_index.cpp:307
Class to assist in simple pointer-to-implementation classes.
Definition: pimpl.h:26
friend std::shared_ptr< Index > make_index(const std::string &, Args &&...)
inverted_index is a friend of the factory method used to create it.
uint64_t term_freq(term_id t_id, doc_id d_id) const
Definition: inverted_index.cpp:269
inverted_index(const cpptoml::table &config)
Definition: inverted_index.cpp:87
Represents an indexable document.
Definition: document.h:31
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24
virtual ~inverted_index()
Default destructor.
Basic exception for inverted_index interactions.
Definition: inverted_index.h:60
A class to represent the per-PrimaryKey data in an index's postings file.
Definition: forward_index.h:30
void tokenize(corpus::document &doc)
Definition: inverted_index.cpp:302
void create_index(const std::string &config_file)
This function initializes the disk index; it is called by the make_index factory function.
Definition: inverted_index.cpp:113
bool valid() const
Definition: inverted_index.cpp:98
Implementation of an inverted_index.
Definition: inverted_index.cpp:30