Implements stochastic gradient descent for learning binary linear classifiers.
More...
|
| sgd (const std::string &prefix, std::shared_ptr< index::forward_index > idx, class_label positive, class_label negative, std::unique_ptr< loss::loss_function > loss, double alpha=default_alpha, double gamma=default_gamma, double bias=default_bias, double lambda=default_lambda, size_t max_iter=default_max_iter) |
|
double | predict (doc_id d_id) const |
| Returns the dot product with the current weight vector. More...
|
|
void | train (const std::vector< doc_id > &docs) override |
| Creates a classification model based on training documents. More...
|
|
void | reset () override |
| Clears any learning data associated with this classifier.
|
|
| binary_classifier (std::shared_ptr< index::forward_index > idx, class_label positive, class_label negative) |
| Creates a new binary classifier using the given index to retrieve documents, treating anything with the given positive label as a positive example and everything else as a negative example. More...
|
|
class_label | classify (doc_id d_id) final |
| Classifies a document into a specific group, as determined by training data. More...
|
|
const class_label & | positive_label () const |
|
const class_label & | negative_label () const |
|
| classifier (std::shared_ptr< index::forward_index > idx) |
|
virtual confusion_matrix | test (const std::vector< doc_id > &docs) |
| Classifies a collection document into specific groups, as determined by training data; this function will make repeated calls to classify(). More...
|
|
virtual confusion_matrix | cross_validate (const std::vector< doc_id > &input_docs, size_t k, bool even_split=false, int seed=1) |
| Performs k-fold cross-validation on a set of documents. More...
|
|
|
static const constexpr double | default_alpha = 0.001 |
| The default \(\alpha\) parameter.
|
|
static const constexpr double | default_gamma = 1e-6 |
| The default \(\gamma\) parameter.
|
|
static const constexpr double | default_bias = 1 |
| The default \(b\) parameter.
|
|
static const constexpr double | default_lambda = 0.0001 |
| The default \(\lambda\) parameter.
|
|
static const constexpr size_t | default_max_iter = 50 |
| The default number of allowed iterations.
|
|
static const std::string | id = "sgd" |
| The identifier for this classifier.
|
|
|
util::disk_vector< double > | weights_ |
| The weights vector.
|
|
double | coeff_ {1.0} |
| The scalar coefficient for the weights vector.
|
|
const double | alpha_ |
| \(\alpha\), the learning rate.
|
|
const double | gamma_ |
| \(\gamma\), the error threshold.
|
|
double | bias_ |
| \(b\), the bias.
|
|
const double | bias_weight_ |
| The weight of the bias term for each document (defaults to 1)
|
|
const double | lambda_ |
| \(\lambda\), the regularization constant
|
|
const size_t | max_iter_ |
| The maximum number of iterations for training.
|
|
std::unique_ptr< loss::loss_function > | loss_ |
| The loss function to be used for the update.
|
|
Implements stochastic gradient descent for learning binary linear classifiers.
These may be extended to multiclass classification using the one_vs_all or all_vs_all adapters.