ModErn Text Analysis
META Enumerates Textual Applications
|
#include <iostream>
#include <vector>
#include <unordered_set>
#include <string>
#include "cpptoml.h"
#include "util/shim.h"
#include "util/filesystem.h"
#include "analyzers/analyzer.h"
#include "analyzers/tokenizers/icu_tokenizer.h"
#include "analyzers/filters/all.h"
#include "analyzers/ngram/ngram_word_analyzer.h"
#include "corpus/document.h"
#include "parser/sr_parser.h"
#include "sequence/perceptron.h"
#include "sequence/io/ptb_parser.h"
#include "sequence/sequence.h"
Functions | |
int | print_usage (const std::string &prog) |
Prints help for this executable. More... | |
std::string | no_ext (const std::string &file) |
template<class Stream > | |
void | write_file (Stream &stream, const std::string &in_name, const std::string &out_name) |
void | stem (const std::string &file, const cpptoml::table &) |
Performs stemming on a text file. More... | |
void | stop (const std::string &file, const cpptoml::table &config) |
Performs stopword removal on a text file. More... | |
void | pos (const std::string &file, const cpptoml::table &config, bool replace) |
Performs part-of-speech tagging on a text file. More... | |
void | parse (const std::string &file, const cpptoml::table &config) |
Parses all sentences in a text file. | |
void | freq (const std::string &file, const cpptoml::table &, uint16_t n) |
Performs frequency analysis on a text file. More... | |
int | main (int argc, char *argv[]) |
int print_usage | ( | const std::string & | prog | ) |
Prints help for this executable.
prog | The name of the current executable |
std::string no_ext | ( | const std::string & | file | ) |
file | The filename to modify |
void write_file | ( | Stream & | stream, |
const std::string & | in_name, | ||
const std::string & | out_name | ||
) |
stream | Token stream to read from |
in_name | Input filename |
out_name | Output filename |
void stem | ( | const std::string & | file, |
const cpptoml::table & | |||
) |
Performs stemming on a text file.
file | The input file |
config | Configuration settings |
void stop | ( | const std::string & | file, |
const cpptoml::table & | config | ||
) |
Performs stopword removal on a text file.
file | The input file |
config | Configuration settings |
void pos | ( | const std::string & | file, |
const cpptoml::table & | config, | ||
bool | replace | ||
) |
Performs part-of-speech tagging on a text file.
file | The input file |
config | Configuration settings |
replace | Whether or not to replace words with their POS tags |
void freq | ( | const std::string & | file, |
const cpptoml::table & | , | ||
uint16_t | n | ||
) |
Performs frequency analysis on a text file.
file | The input file |
config | Configuration settings |
n | The n-gram value to use in tokenization |