ModErn Text Analysis
META Enumerates Textual Applications
corpus.h
Go to the documentation of this file.
1 
10 #ifndef META_CORPUS_H_
11 #define META_CORPUS_H_
12 
13 #include <stdexcept>
14 #include <memory>
15 
16 #include "meta.h"
17 #include "corpus/document.h"
18 
19 namespace meta
20 {
21 namespace corpus
22 {
23 
27 class corpus
28 {
29  public:
34  corpus(std::string encoding);
35 
39  virtual bool has_next() const = 0;
40 
44  virtual document next() = 0;
45 
49  virtual uint64_t size() const = 0;
50 
54  virtual ~corpus() = default;
55 
59  const std::string& encoding() const;
60 
66  static std::unique_ptr<corpus> load(const std::string& config_file);
67 
71  class corpus_exception : public std::runtime_error
72  {
73  public:
74  using std::runtime_error::runtime_error;
75  };
76 
77  private:
79  std::string encoding_;
80 };
81 }
82 }
83 
84 #endif
virtual bool has_next() const =0
Contains top-level namespace documentation for the META toolkit.
corpus(std::string encoding)
Constructs a new corpus with the given encoding.
Definition: corpus.cpp:16
static std::unique_ptr< corpus > load(const std::string &config_file)
Definition: corpus.cpp:26
Represents an indexable document.
Definition: document.h:31
const std::string & encoding() const
Definition: corpus.cpp:21
Provides interface to with multiple corpus input formats.
Definition: corpus.h:27
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24
virtual document next()=0
Basic exception for corpus interactions.
Definition: corpus.h:71
std::string encoding_
The type of encoding this document uses.
Definition: corpus.h:79
virtual ~corpus()=default
Destructor.
virtual uint64_t size() const =0