ModErn Text Analysis
META Enumerates Textual Applications
Main Page
Related Pages
Namespaces
Classes
Files
File List
File Members
include
corpus
gz_corpus.h
Go to the documentation of this file.
1
10
#ifndef META_GZ_CORPUS_H_
11
#define META_GZ_CORPUS_H_
12
13
#include "
corpus/corpus.h
"
14
#include "
io/gzstream.h
"
15
16
namespace
meta
17
{
18
namespace
corpus
19
{
20
25
class
gz_corpus
:
public
corpus
26
{
27
public
:
33
gz_corpus
(
const
std::string& file, std::string
encoding
);
34
38
bool
has_next
()
const override
;
39
43
document
next
()
override
;
44
48
uint64_t
size
()
const override
;
49
50
private
:
52
doc_id
cur_id_
;
53
55
uint64_t
num_lines_
;
56
58
io::gzifstream
corpus_stream_
;
59
61
io::gzifstream
class_stream_
;
62
64
io::gzifstream
name_stream_
;
65
};
66
}
67
}
68
69
#endif
meta::corpus::gz_corpus::gz_corpus
gz_corpus(const std::string &file, std::string encoding)
Definition:
gz_corpus.cpp:14
meta::corpus::gz_corpus::class_stream_
io::gzifstream class_stream_
The stream to read the class labels.
Definition:
gz_corpus.h:61
meta::corpus::gz_corpus::cur_id_
doc_id cur_id_
The current document we are on.
Definition:
gz_corpus.h:52
meta::io::gzifstream
Definition:
gzstream.h:46
meta::corpus::gz_corpus::name_stream_
io::gzifstream name_stream_
The stream to read the document names.
Definition:
gz_corpus.h:64
meta::corpus::gz_corpus::next
document next() override
Definition:
gz_corpus.cpp:41
meta::corpus::document
Represents an indexable document.
Definition:
document.h:31
meta::corpus::corpus::encoding
const std::string & encoding() const
Definition:
corpus.cpp:21
meta::corpus::corpus
Provides interface to with multiple corpus input formats.
Definition:
corpus.h:27
meta
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition:
analyzer.h:24
meta::corpus::gz_corpus::has_next
bool has_next() const override
Definition:
gz_corpus.cpp:36
meta::corpus::gz_corpus::corpus_stream_
io::gzifstream corpus_stream_
The stream for reading the corpus.
Definition:
gz_corpus.h:58
meta::corpus::gz_corpus::size
uint64_t size() const override
Definition:
gz_corpus.cpp:61
meta::corpus::gz_corpus::num_lines_
uint64_t num_lines_
The number of lines in the file.
Definition:
gz_corpus.h:55
gzstream.h
corpus.h
meta::corpus::gz_corpus
Fills document objects with content line-by-line from gzip-compressed input files.
Definition:
gz_corpus.h:25
Generated on Tue Mar 3 2015 23:20:16 for ModErn Text Analysis by
1.8.9.1