ModErn Text Analysis
META Enumerates Textual Applications
Main Page
Related Pages
Namespaces
Classes
Files
File List
File Members
include
corpus
line_corpus.h
Go to the documentation of this file.
1
10
#ifndef META_LINE_CORPUS_H_
11
#define META_LINE_CORPUS_H_
12
13
#include <string>
14
#include <vector>
15
#include <utility>
16
#include "
io/parser.h
"
17
#include "
corpus/corpus.h
"
18
19
namespace
meta
20
{
21
namespace
corpus
22
{
23
29
class
line_corpus
:
public
corpus
30
{
31
public
:
40
line_corpus
(
const
std::string& file, std::string
encoding
,
41
uint64_t num_lines = 0);
42
46
bool
has_next
()
const override
;
47
51
document
next
()
override
;
52
56
uint64_t
size
()
const override
;
57
58
private
:
60
doc_id
cur_id_
;
61
63
uint64_t
num_lines_
;
64
66
io::parser
parser_
;
67
69
std::unique_ptr<io::parser>
class_parser_
;
70
72
std::unique_ptr<io::parser>
name_parser_
;
73
};
74
}
75
}
76
77
#endif
meta::corpus::line_corpus::line_corpus
line_corpus(const std::string &file, std::string encoding, uint64_t num_lines=0)
Definition:
line_corpus.cpp:18
parser.h
meta::corpus::line_corpus::class_parser_
std::unique_ptr< io::parser > class_parser_
Parser to read the class labels.
Definition:
line_corpus.h:69
meta::io::parser
Parses a text file by reading it completely into memory, delimiting tokens by user request...
Definition:
parser.h:29
meta::corpus::line_corpus::next
document next() override
Definition:
line_corpus.cpp:52
meta::corpus::document
Represents an indexable document.
Definition:
document.h:31
meta::corpus::corpus::encoding
const std::string & encoding() const
Definition:
corpus.cpp:21
meta::corpus::corpus
Provides interface to with multiple corpus input formats.
Definition:
corpus.h:27
meta
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition:
analyzer.h:24
meta::corpus::line_corpus::parser_
io::parser parser_
Parser to read the corpus file.
Definition:
line_corpus.h:66
meta::corpus::line_corpus::has_next
bool has_next() const override
Definition:
line_corpus.cpp:47
meta::corpus::line_corpus::cur_id_
doc_id cur_id_
The current document we are on.
Definition:
line_corpus.h:60
corpus.h
meta::corpus::line_corpus::name_parser_
std::unique_ptr< io::parser > name_parser_
Parser to read the document names.
Definition:
line_corpus.h:72
meta::corpus::line_corpus
Fills document objects with content line-by-line from an input file.
Definition:
line_corpus.h:29
meta::corpus::line_corpus::size
uint64_t size() const override
Definition:
line_corpus.cpp:69
meta::corpus::line_corpus::num_lines_
uint64_t num_lines_
The number of lines in the file.
Definition:
line_corpus.h:63
Generated on Tue Mar 3 2015 23:20:16 for ModErn Text Analysis by
1.8.9.1