ModErn Text Analysis
META Enumerates Textual Applications
lda_scvb.h
Go to the documentation of this file.
1 
10 #ifndef META_TOPICS_LDA_SCVB_H_
11 #define META_TOPICS_LDA_SCVB_H_
12 
13 #include "topics/lda_model.h"
14 
15 namespace meta
16 {
17 namespace topics
18 {
19 
27 class lda_scvb : public lda_model
28 {
29  public:
46  lda_scvb(std::shared_ptr<index::forward_index> idx, uint64_t num_topics,
47  double alpha, double beta, uint64_t minibatch_size = 100);
48 
52  virtual ~lda_scvb() = default;
53 
64  virtual void run(uint64_t num_iters, double convergence = 0) override;
65 
66  protected:
67  virtual double compute_term_topic_probability(term_id term,
68  topic_id topic) const
69  override;
70 
71  virtual double compute_doc_topic_probability(doc_id doc,
72  topic_id topic) const override;
73 
74  private:
80  void initialize(std::mt19937& gen);
81 
87  void perform_iteration(uint64_t iter, const std::vector<doc_id>& docs);
88 
94  std::vector<std::vector<double>> topic_term_count_;
95 
101  std::vector<std::vector<double>> doc_topic_count_;
102 
108  std::vector<double> topic_count_;
109 
111  const double alpha_;
113  const double beta_;
115  const uint64_t minibatch_size_;
116 };
117 }
118 }
119 
120 #endif
virtual double compute_doc_topic_probability(doc_id doc, topic_id topic) const override
Definition: lda_scvb.cpp:176
An LDA topic model base class.
Definition: lda_model.h:25
lda_scvb: An implementation of LDA that uses stochastic collapsed variational Bayes for inference...
Definition: lda_scvb.h:27
virtual ~lda_scvb()=default
Destructor: virtual for potential subclassing.
void perform_iteration(uint64_t iter, const std::vector< doc_id > &docs)
Performs one iteration (e.g., one minibatch) of the inference algorithm.
Definition: lda_scvb.cpp:76
lda_scvb(std::shared_ptr< index::forward_index > idx, uint64_t num_topics, double alpha, double beta, uint64_t minibatch_size=100)
Constructs the lda model over the given documents, with the given number of topics, and hyperparameters and for the priors on (topic distributions) and (topic proportions), respectively.
Definition: lda_scvb.cpp:16
const double alpha_
The hyperparameter on , the topic proportions.
Definition: lda_scvb.h:111
std::vector< std::vector< double > > topic_term_count_
Contains the expected counts for each word being assigned a given topic.
Definition: lda_scvb.h:94
std::vector< double > topic_count_
Contains the expected number of times the given topic has been assigned to a word.
Definition: lda_scvb.h:108
const uint64_t minibatch_size_
The size of the minibatches.
Definition: lda_scvb.h:115
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24
const double beta_
The hyperparameter on , the topic distributions.
Definition: lda_scvb.h:113
std::vector< std::vector< double > > doc_topic_count_
Contains the expected counts for each topic being assigned in a given document.
Definition: lda_scvb.h:101
void initialize(std::mt19937 &gen)
Initialize the model with random parameters.
Definition: lda_scvb.cpp:39
virtual void run(uint64_t num_iters, double convergence=0) override
Runs the variational inference algorithm for a maximum number of iterations.
Definition: lda_scvb.cpp:27
virtual double compute_term_topic_probability(term_id term, topic_id topic) const override
Definition: lda_scvb.cpp:169