ModErn Text Analysis
META Enumerates Textual Applications
disk_index.h
Go to the documentation of this file.
1 
11 #ifndef META_DISK_INDEX_H_
12 #define META_DISK_INDEX_H_
13 
14 #include <memory>
15 #include <vector>
16 #include "util/pimpl.h"
17 #include "meta.h"
18 
19 namespace cpptoml
20 {
21 class table;
22 }
23 
24 namespace meta
25 {
26 
27 namespace index
28 {
29 class string_list;
30 class vocabulary_map;
31 }
32 
33 namespace tokenizers
34 {
35 class tokenizer;
36 }
37 
38 namespace util
39 {
40 template <class>
42 }
43 }
44 
45 namespace meta
46 {
47 namespace index
48 {
49 
56 {
57  public:
61  virtual ~disk_index() = default;
62 
66  std::string index_name() const;
67 
71  uint64_t num_docs() const;
72 
77  std::string doc_name(doc_id d_id) const;
78 
83  std::string doc_path(doc_id d_id) const;
84 
88  std::vector<doc_id> docs() const;
89 
95  uint64_t doc_size(doc_id d_id) const;
96 
102  class_label label(doc_id d_id) const;
103 
108  label_id lbl_id(doc_id d_id) const;
109 
114  label_id id(class_label label) const;
115 
120  class_label class_label_from_id(label_id l_id) const;
121 
125  uint64_t num_labels() const;
126 
131  std::vector<class_label> class_labels() const;
132 
137  virtual uint64_t unique_terms(doc_id d_id) const;
138 
142  virtual uint64_t unique_terms() const;
143 
148  term_id get_term_id(const std::string& term);
149 
154  std::string term_text(term_id t_id) const;
155 
156  protected:
161 
167  disk_index(const cpptoml::table& config, const std::string& name);
168 
172  disk_index(const disk_index&) = delete;
173 
177  disk_index& operator=(const disk_index&) = delete;
178 
179  public:
183  disk_index(disk_index&&) = default;
184 
188  disk_index& operator=(disk_index&&) = default;
189 };
190 }
191 }
192 
193 #endif
std::string index_name() const
Definition: disk_index.cpp:29
std::string doc_path(doc_id d_id) const
Definition: disk_index.cpp:102
Contains top-level namespace documentation for the META toolkit.
virtual ~disk_index()=default
Default destructor.
std::vector< class_label > class_labels() const
Definition: disk_index.cpp:71
Holds generic data structures and functions that inverted_index and forward_index both use...
Definition: disk_index.h:55
term_id get_term_id(const std::string &term)
Definition: disk_index.cpp:34
disk_index(const cpptoml::table &config, const std::string &name)
Constructor.
Definition: disk_index.cpp:24
label_id id(class_label label) const
Definition: disk_index.cpp:56
class_label class_label_from_id(label_id l_id) const
Definition: disk_index.cpp:61
virtual uint64_t unique_terms() const
Definition: disk_index.cpp:81
uint64_t num_docs() const
Definition: disk_index.cpp:91
Class to assist in simple pointer-to-implementation classes.
Definition: pimpl.h:26
The implementation of a disk_index.
Definition: disk_index_impl.h:48
disk_vector represents a large constant-size vector that does not necessarily fit in memory...
Definition: disk_index.h:41
label_id lbl_id(doc_id d_id) const
Definition: disk_index.cpp:51
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24
disk_index & operator=(const disk_index &)=delete
disk_index may not be copy-assigned.
util::pimpl< disk_index_impl > impl_
Implementation of this disk_index.
Definition: disk_index.h:158
std::vector< doc_id > docs() const
Definition: disk_index.cpp:107
uint64_t num_labels() const
Definition: disk_index.cpp:66
std::string term_text(term_id t_id) const
Definition: disk_index.cpp:230
uint64_t doc_size(doc_id d_id) const
Definition: disk_index.cpp:86
std::string doc_name(doc_id d_id) const
Definition: disk_index.cpp:96
class_label label(doc_id d_id) const
Definition: disk_index.cpp:46
Definition: analyzer.h:19