|
ModErn Text Analysis
META Enumerates Textual Applications
|
#include <iostream>#include <vector>#include <unordered_set>#include <string>#include "cpptoml.h"#include "util/shim.h"#include "util/filesystem.h"#include "analyzers/analyzer.h"#include "analyzers/tokenizers/icu_tokenizer.h"#include "analyzers/filters/all.h"#include "analyzers/ngram/ngram_word_analyzer.h"#include "corpus/document.h"#include "parser/sr_parser.h"#include "sequence/perceptron.h"#include "sequence/io/ptb_parser.h"#include "sequence/sequence.h"Functions | |
| int | print_usage (const std::string &prog) |
| Prints help for this executable. More... | |
| std::string | no_ext (const std::string &file) |
| template<class Stream > | |
| void | write_file (Stream &stream, const std::string &in_name, const std::string &out_name) |
| void | stem (const std::string &file, const cpptoml::table &) |
| Performs stemming on a text file. More... | |
| void | stop (const std::string &file, const cpptoml::table &config) |
| Performs stopword removal on a text file. More... | |
| void | pos (const std::string &file, const cpptoml::table &config, bool replace) |
| Performs part-of-speech tagging on a text file. More... | |
| void | parse (const std::string &file, const cpptoml::table &config) |
| Parses all sentences in a text file. | |
| void | freq (const std::string &file, const cpptoml::table &, uint16_t n) |
| Performs frequency analysis on a text file. More... | |
| int | main (int argc, char *argv[]) |
| int print_usage | ( | const std::string & | prog | ) |
Prints help for this executable.
| prog | The name of the current executable |
| std::string no_ext | ( | const std::string & | file | ) |
| file | The filename to modify |
| void write_file | ( | Stream & | stream, |
| const std::string & | in_name, | ||
| const std::string & | out_name | ||
| ) |
| stream | Token stream to read from |
| in_name | Input filename |
| out_name | Output filename |
| void stem | ( | const std::string & | file, |
| const cpptoml::table & | |||
| ) |
Performs stemming on a text file.
| file | The input file |
| config | Configuration settings |
| void stop | ( | const std::string & | file, |
| const cpptoml::table & | config | ||
| ) |
Performs stopword removal on a text file.
| file | The input file |
| config | Configuration settings |
| void pos | ( | const std::string & | file, |
| const cpptoml::table & | config, | ||
| bool | replace | ||
| ) |
Performs part-of-speech tagging on a text file.
| file | The input file |
| config | Configuration settings |
| replace | Whether or not to replace words with their POS tags |
| void freq | ( | const std::string & | file, |
| const cpptoml::table & | , | ||
| uint16_t | n | ||
| ) |
Performs frequency analysis on a text file.
| file | The input file |
| config | Configuration settings |
| n | The n-gram value to use in tokenization |
1.8.9.1