ModErn Text Analysis
META Enumerates Textual Applications
parser.h
Go to the documentation of this file.
1 
10 #ifndef META_PARSER_H_
11 #define META_PARSER_H_
12 
13 #include <array>
14 #include <memory>
15 #include <string>
16 #include "util/optional.h"
17 
18 namespace meta
19 {
20 namespace io
21 {
22 
23 class mmap_file;
24 
29 class parser
30 {
31  public:
36  enum class input_type
37  {
38  File,
39  String
40  };
41 
47  parser(const std::string& input, const std::string& delims,
48  input_type in_type = input_type::File);
49 
53  ~parser();
54 
58  parser(parser&&);
59 
64 
68  std::string filename() const;
69 
74  std::string peek() const;
75 
80  std::string next();
81 
85  bool has_next() const;
86 
87  private:
91  void get_next();
92 
94  size_t idx_;
95 
97  std::array<bool, 256> invalid_;
98 
100  std::string filename_;
101 
103  std::unique_ptr<io::mmap_file> mmap_file_;
104 
106  uint64_t size_;
107 
109  const char* data_;
110 
113 };
114 }
115 }
116 
117 #endif
std::string filename_
Saves the name of the file if the parser is parsing a file.
Definition: parser.h:100
std::string next()
Definition: parser.cpp:76
std::string peek() const
Definition: parser.cpp:71
util::optional< std::string > next_
The next token to be returned; "" if none.
Definition: parser.h:112
~parser()
Destructor.
std::array< bool, 256 > invalid_
Array of booleans indicating whether or not a character is a delimiter.
Definition: parser.h:97
const char * data_
Pointer into a string or memory-mapped file.
Definition: parser.h:109
parser & operator=(parser &&)
May be move-assigned.
std::string filename() const
Definition: parser.cpp:66
void get_next()
Advances to the next token in the file or string, saving the result.
Definition: parser.cpp:45
Parses a text file by reading it completely into memory, delimiting tokens by user request...
Definition: parser.h:29
parser(const std::string &input, const std::string &delims, input_type in_type=input_type::File)
Definition: parser.cpp:14
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24
uint64_t size_
The number of characters that will be read.
Definition: parser.h:106
bool has_next() const
Definition: parser.cpp:83
std::unique_ptr< io::mmap_file > mmap_file_
Memory-mapped file pointer if the parser is parsing a file.
Definition: parser.h:103
input_type
Determines whether the parser parses a std::string or the contents of a file.
Definition: parser.h:36
size_t idx_
The current position of the "cursor" into the file or string.
Definition: parser.h:94