ModErn Text Analysis
META Enumerates Textual Applications
Main Page
Related Pages
Namespaces
Classes
Files
File List
File Members
include
corpus
file_corpus.h
Go to the documentation of this file.
1
10
#ifndef META_FILE_CORPUS_H_
11
#define META_FILE_CORPUS_H_
12
13
#include <string>
14
#include <vector>
15
#include <utility>
16
#include "
corpus/corpus.h
"
17
18
namespace
meta
19
{
20
namespace
corpus
21
{
22
27
class
file_corpus
:
public
corpus
28
{
29
public
:
36
file_corpus
(
const
std::string& prefix,
const
std::string& doc_list,
37
std::string
encoding
);
38
42
bool
has_next
()
const override
;
43
47
document
next
()
override
;
48
52
uint64_t
size
()
const override
;
53
54
private
:
56
uint64_t
cur_
;
57
59
std::string
prefix_
;
60
62
std::vector<std::pair<std::string, class_label>>
docs_
;
63
};
64
}
65
}
66
67
#endif
meta::corpus::file_corpus::docs_
std::vector< std::pair< std::string, class_label > > docs_
contains doc class labels and paths
Definition:
file_corpus.h:62
meta::corpus::file_corpus::file_corpus
file_corpus(const std::string &prefix, const std::string &doc_list, std::string encoding)
Definition:
file_corpus.cpp:14
meta::corpus::file_corpus::has_next
bool has_next() const override
Definition:
file_corpus.cpp:36
meta::corpus::document
Represents an indexable document.
Definition:
document.h:31
meta::corpus::corpus::encoding
const std::string & encoding() const
Definition:
corpus.cpp:21
meta::corpus::corpus
Provides interface to with multiple corpus input formats.
Definition:
corpus.h:27
meta
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition:
analyzer.h:24
meta::corpus::file_corpus
Creates document objects from individual files, each representing a single document.
Definition:
file_corpus.h:27
meta::corpus::file_corpus::prefix_
std::string prefix_
the path to all the documents
Definition:
file_corpus.h:59
meta::corpus::file_corpus::cur_
uint64_t cur_
the current document we are on
Definition:
file_corpus.h:56
corpus.h
meta::corpus::file_corpus::next
document next() override
Definition:
file_corpus.cpp:41
meta::corpus::file_corpus::size
uint64_t size() const override
Definition:
file_corpus.cpp:49
Generated on Tue Mar 3 2015 23:20:16 for ModErn Text Analysis by
1.8.9.1