ModErn Text Analysis
META Enumerates Textual Applications
compressed_file_reader.h
Go to the documentation of this file.
1 
10 #ifndef META_COMPRESSED_FILE_READER_H_
11 #define META_COMPRESSED_FILE_READER_H_
12 
13 #include <functional>
14 #include <memory>
15 #include <stdexcept>
16 #include <string>
17 
18 namespace meta
19 {
20 namespace io
21 {
22 
23 class mmap_file;
24 
29 {
30  notDone,
31  readerDone,
32  userDone
33 };
34 
39 {
40  public:
50  std::function<uint64_t(uint64_t)> mapping);
51 
59  compressed_file_reader(const std::string& filename,
60  std::function<uint64_t(uint64_t)> mapping);
61 
66 
70  void reset();
71 
75  void close();
76 
82  void seek(uint64_t bit_offset);
83 
87  bool has_next() const;
88 
92  uint64_t next();
93 
97  std::string next_string();
98 
102  uint64_t bit_location() const;
103 
107  operator bool() const
108  {
109  return status_ != userDone;
110  }
111 
112  private:
117  void get_next();
118 
122  bool read_bit();
123 
128  std::unique_ptr<mmap_file> file_;
129 
134  char* start_;
135 
137  uint64_t size_;
138 
140  int status_;
141 
143  uint64_t current_value_;
144 
146  uint64_t current_char_;
147 
149  uint8_t current_bit_;
150 
152  std::function<uint64_t(uint64_t)> mapping_;
153 
154  public:
158  class compressed_file_reader_exception : public std::runtime_error
159  {
160  public:
161  using std::runtime_error::runtime_error;
162  };
163 };
164 
171 uint64_t default_compression_reader_func(uint64_t value);
172 }
173 }
174 
175 #endif
uint64_t default_compression_reader_func(uint64_t value)
Function that converts a compressed number back into its normal representation.
Definition: compressed_file_reader.cpp:148
bool read_bit()
Definition: compressed_file_reader.cpp:131
uint8_t current_bit_
current bit inside the current byte
Definition: compressed_file_reader.h:149
void get_next()
Seeks to the next compressed number and returns the current cached value.
Definition: compressed_file_reader.cpp:115
char * start_
Pointer to the beginning of the compressed file (which will be in memory most of the time) ...
Definition: compressed_file_reader.h:134
Memory maps a text file readonly.
Definition: mmap_file.h:24
void reset()
Sets the cursor back to the beginning of the file.
Definition: compressed_file_reader.cpp:60
Represents a file of unsigned integers compressed using gamma compression.
Definition: compressed_file_reader.h:38
uint64_t bit_location() const
Definition: compressed_file_reader.cpp:55
std::string next_string()
Definition: compressed_file_reader.cpp:68
uint64_t next()
Definition: compressed_file_reader.cpp:99
void seek(uint64_t bit_offset)
Sets the cursor to the specified position in the file.
Definition: compressed_file_reader.cpp:77
uint64_t current_char_
current byte in the compressed file
Definition: compressed_file_reader.h:146
Basic exception for compressed_file_reader interactions.
Definition: compressed_file_reader.h:158
std::unique_ptr< mmap_file > file_
Pointer to the mmap_file we are reading: nullptr if we don't own it, initialized if we do...
Definition: compressed_file_reader.h:128
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retreival, data mining, and other applications of text processing.
Definition: analyzer.h:24
uint64_t current_value_
current numeric value that was read
Definition: compressed_file_reader.h:143
uint64_t size_
the number of bytes in this compressed file
Definition: compressed_file_reader.h:137
int status_
reading/writing status
Definition: compressed_file_reader.h:140
std::function< uint64_t(uint64_t)> mapping_
hold the (actual -> compressed id) mapping
Definition: compressed_file_reader.h:152
compressed_file_reader(const mmap_file &file, std::function< uint64_t(uint64_t)> mapping)
Constructor; opens a compressed file for reading using the given mapping.
Definition: compressed_file_reader.cpp:32
ReaderStatus
Simply saves the current state of the reader.
Definition: compressed_file_reader.h:28
bool has_next() const
Definition: compressed_file_reader.cpp:94
void close()
Closes this compressed file.
Definition: compressed_file_reader.cpp:50
std::string filename(const std::string &path)
Definition: unit_test.h:114