ModErn Text Analysis
META Enumerates Textual Applications
lda_model.h
Go to the documentation of this file.
1 
10 #ifndef META_TOPICS_LDA_MODEL_H_
11 #define META_TOPICS_LDA_MODEL_H_
12 
13 #include "index/forward_index.h"
14 
15 MAKE_NUMERIC_IDENTIFIER(topic_id, uint64_t)
16 
17 namespace meta
18 {
19 namespace topics
20 {
21 
25 class lda_model
26 {
27  public:
35  lda_model(std::shared_ptr<index::forward_index> idx, uint64_t num_topics);
36 
41  virtual ~lda_model() = default;
42 
51  virtual void run(uint64_t num_iters, double convergence) = 0;
52 
60  void save_doc_topic_distributions(const std::string& filename) const;
61 
69  void save_topic_term_distributions(const std::string& filename) const;
70 
77  void save(const std::string& prefix) const;
78 
79  protected:
83  lda_model& operator=(const lda_model&) = delete;
84 
88  lda_model(const lda_model&) = delete;
89 
97  virtual double compute_term_topic_probability(term_id term,
98  topic_id topic) const = 0;
99 
107  virtual double compute_doc_topic_probability(doc_id doc,
108  topic_id topic) const = 0;
109 
113  std::shared_ptr<index::forward_index> idx_;
114 
118  size_t num_topics_;
119 
123  size_t num_words_;
124 };
125 }
126 }
127 
128 #endif
An LDA topic model base class.
Definition: lda_model.h:25
size_t num_words_
The number of total unique words.
Definition: lda_model.h:123
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24
size_t num_topics_
The number of topics.
Definition: lda_model.h:118
std::shared_ptr< index::forward_index > idx_
The index containing the documents for the model.
Definition: lda_model.h:113
std::string filename(const std::string &path)
Definition: unit_test.h:114