Evaluates lists of ranked documents returned from a search engine; can give stats per-query (e.g.
More...
#include <ir_eval.h>
|
using | result_type = std::vector< std::pair< doc_id, double >> |
|
|
| ir_eval (const std::string &config_file) |
|
double | precision (const result_type &results, query_id q_id, uint64_t num_docs=std::numeric_limits< uint64_t >::max()) const |
|
double | recall (const result_type &results, query_id q_id, uint64_t num_docs=std::numeric_limits< uint64_t >::max()) const |
|
double | f1 (const result_type &results, query_id q_id, uint64_t num_docs=std::numeric_limits< uint64_t >::max(), double beta=1.0) const |
|
double | ndcg (const result_type &results, query_id q_id, uint64_t num_docs=std::numeric_limits< uint64_t >::max()) const |
|
double | avg_p (const result_type &results, query_id q_id, uint64_t num_docs=std::numeric_limits< uint64_t >::max()) |
| Computes the average precision for a query.
|
|
double | map () const |
|
double | gmap () const |
|
void | print_stats (const result_type &results, query_id q_id, std::ostream &out=std::cout) |
|
void | reset_stats () |
| Clears saved scores for MAP and gMAP.
|
|
|
std::unordered_map< query_id, std::unordered_map< doc_id, uint8_t > > | qrels_ |
| query_id -> (doc_id -> relevance) mapping If the doc_id isn't in the map, it is non-relevant.
|
|
std::vector< double > | scores_ |
| Collection of scores used to calculate MAP and gMAP.
|
|
Evaluates lists of ranked documents returned from a search engine; can give stats per-query (e.g.
precision) or over a series of queries (e.g. MAP).
meta::index::ir_eval::ir_eval |
( |
const std::string & |
config_file | ) |
|
- Parameters
-
config_file | Path to cpptoml configuration file |
double meta::index::ir_eval::precision |
( |
const result_type & |
results, |
|
|
query_id |
q_id, |
|
|
uint64_t |
num_docs = std::numeric_limits<uint64_t>::max() |
|
) |
| const |
- Parameters
-
results | The ranked list of results |
q_id | The query that was run to produce these results |
num_docs | For p |
- Returns
- the precision: \( \frac{\# relevant~retrieved~docs}{\# retrieved~docs} \)
double meta::index::ir_eval::recall |
( |
const result_type & |
results, |
|
|
query_id |
q_id, |
|
|
uint64_t |
num_docs = std::numeric_limits<uint64_t>::max() |
|
) |
| const |
- Parameters
-
results | The ranked list of results |
q_id | The query that was run to produce these results |
num_docs | For r |
- Returns
- the recall: \( \frac{\# relevant~retrieved~docs}{\# relevant~docs} \)
double meta::index::ir_eval::f1 |
( |
const result_type & |
results, |
|
|
query_id |
q_id, |
|
|
uint64_t |
num_docs = std::numeric_limits<uint64_t>::max() , |
|
|
double |
beta = 1.0 |
|
) |
| const |
- Parameters
-
results | The ranked list of results |
q_id | The query that was run to produce these results |
num_docs | For f1 |
beta | Attach beta times as much importance to recall compared to precision (default 1.0, or equal) |
- Returns
- the F1 score: \( \frac{(1+\beta^2)(P\cdot R)}{(\beta^2\cdot P )+R} \)
double meta::index::ir_eval::ndcg |
( |
const result_type & |
results, |
|
|
query_id |
q_id, |
|
|
uint64_t |
num_docs = std::numeric_limits<uint64_t>::max() |
|
) |
| const |
- Returns
- the Normalized Discounted Cumulative Gain for a query. \( DCG_p = \sum_{i=1}^p \frac{2^{rel_i}-1}{\log(i+1)}, p = num\_docs \) and \( nDCG_p = \frac{DCG_p}{IDCG_p} \), where IDCG is the optimal DCG score for a given query.
- Parameters
-
results | The ranked list of results |
q_id | The query that was run to produce these results |
num_docs | For f1 |
double meta::index::ir_eval::map |
( |
| ) |
const |
- Returns
- the Mean Average Precision for a set of queries. Note that avg_p() must be called in order for the individual query scores to be calculated and saved. \( MAP = \frac{1}{n}\sum_{i=1}^n avg\_p(i)\)
double meta::index::ir_eval::gmap |
( |
| ) |
const |
- Returns
- the Geometric Mean Average Precision for a set of queries. Note that avg_p() must be called in order for the individual query scores to be calculated and saved. Also note that the product is computed in log space to minimize any floating point errors. \( gMAP = \frac{1}{n}\sum_{i=1}^n \log avg\_p(i + 1)\)
void meta::index::ir_eval::print_stats |
( |
const result_type & |
results, |
|
|
query_id |
q_id, |
|
|
std::ostream & |
out = std::cout |
|
) |
| |
- Parameters
-
results | The ranked list of results |
q_id | The query that was run to produce these results |
out | The stream to print to |
void meta::index::ir_eval::init_index |
( |
const std::string & |
path | ) |
|
|
private |
- Parameters
-
path | The path to the relevance judgements |
double meta::index::ir_eval::relevant_retrieved |
( |
const result_type & |
results, |
|
|
query_id |
q_id, |
|
|
uint64_t |
num_docs |
|
) |
| const |
|
private |
- Parameters
-
results | The ranked list of results |
q_id | The query that was run to produce these results |
num_docs | For scores |
- Returns
- the number of relevant results that were retrieved
The documentation for this class was generated from the following files:
- /home/chase/projects/meta/include/index/eval/ir_eval.h
- /home/chase/projects/meta/src/index/eval/ir_eval.cpp