ModErn Text Analysis
META Enumerates Textual Applications
Functions
profile.cpp File Reference
#include <iostream>
#include <vector>
#include <unordered_set>
#include <string>
#include "cpptoml.h"
#include "util/shim.h"
#include "util/filesystem.h"
#include "analyzers/analyzer.h"
#include "analyzers/tokenizers/icu_tokenizer.h"
#include "analyzers/filters/all.h"
#include "analyzers/ngram/ngram_word_analyzer.h"
#include "corpus/document.h"
#include "parser/sr_parser.h"
#include "sequence/perceptron.h"
#include "sequence/io/ptb_parser.h"
#include "sequence/sequence.h"

Functions

int print_usage (const std::string &prog)
 Prints help for this executable. More...
 
std::string no_ext (const std::string &file)
 
template<class Stream >
void write_file (Stream &stream, const std::string &in_name, const std::string &out_name)
 
void stem (const std::string &file, const cpptoml::table &)
 Performs stemming on a text file. More...
 
void stop (const std::string &file, const cpptoml::table &config)
 Performs stopword removal on a text file. More...
 
void pos (const std::string &file, const cpptoml::table &config, bool replace)
 Performs part-of-speech tagging on a text file. More...
 
void parse (const std::string &file, const cpptoml::table &config)
 Parses all sentences in a text file.
 
void freq (const std::string &file, const cpptoml::table &, uint16_t n)
 Performs frequency analysis on a text file. More...
 
int main (int argc, char *argv[])
 

Detailed Description

Author
Sean Massung

Function Documentation

int print_usage ( const std::string &  prog)

Prints help for this executable.

Parameters
progThe name of the current executable
Returns
the exit code for this program
std::string no_ext ( const std::string &  file)
Parameters
fileThe filename to modify
Returns
the base filename without an extension
template<class Stream >
void write_file ( Stream &  stream,
const std::string &  in_name,
const std::string &  out_name 
)
Parameters
streamToken stream to read from
in_nameInput filename
out_nameOutput filename
void stem ( const std::string &  file,
const cpptoml::table &   
)

Performs stemming on a text file.

Parameters
fileThe input file
configConfiguration settings
void stop ( const std::string &  file,
const cpptoml::table &  config 
)

Performs stopword removal on a text file.

Parameters
fileThe input file
configConfiguration settings
void pos ( const std::string &  file,
const cpptoml::table &  config,
bool  replace 
)

Performs part-of-speech tagging on a text file.

Parameters
fileThe input file
configConfiguration settings
replaceWhether or not to replace words with their POS tags
void freq ( const std::string &  file,
const cpptoml::table &  ,
uint16_t  n 
)

Performs frequency analysis on a text file.

Parameters
fileThe input file
configConfiguration settings
nThe n-gram value to use in tokenization