10 #ifndef META_POSTINGS_DATA_
11 #define META_POSTINGS_DATA_
28 template <
class,
class>
31 template <
class PrimaryKey,
class SecondaryKey>
32 io::compressed_file_reader&
operator>>(io::compressed_file_reader&,
33 postings_data<PrimaryKey,
44 template <
class PrimaryKey,
class SecondaryKey>
48 using primary_key_type = PrimaryKey;
49 using secondary_key_type = SecondaryKey;
50 using pair_t = std::pair<SecondaryKey, double>;
51 using count_t = std::vector<pair_t>;
58 (std::is_integral<PrimaryKey>::value
59 || std::is_base_of<util::numeric, PrimaryKey>::value
60 || std::is_same<PrimaryKey, std::string>::value)
62 (std::is_integral<SecondaryKey>::value
63 || std::is_base_of<util::numeric, SecondaryKey>::value),
64 "primary and secondary keys in postings data must be numeric types");
70 static_assert(
sizeof(uint64_t) ==
sizeof(
double),
71 "sizeof(uint64_t) must equal sizeof(double) since "
72 "reinterpret_cast is used in postings_data");
103 double count(SecondaryKey s_id)
const;
109 const count_t&
counts()
const;
132 uint32_t num_pairs = in.
next();
133 for (uint32_t i = 0; i < num_pairs; ++i)
135 SecondaryKey s_id = SecondaryKey{in.
next()};
137 pd.counts_.emplace_back(s_id, static_cast<double>(count));
161 if (pd.counts_.empty())
165 uint32_t size = pd.counts_.size();
167 for (
auto& p : pd.counts_)
201 out <<
' ' << (c.first + 1) <<
':' << c.second;
233 const static uint64_t
delimiter_ = std::numeric_limits<uint64_t>::max();
242 template <
class PrimaryKey,
class SecondaryKey>
247 pd.p_id_ = in.
next();
248 stream_helper(in, pd);
263 stream_helper(in, pd);
273 template <
class PrimaryKey,
class SecondaryKey>
274 bool operator==(
const postings_data<PrimaryKey, SecondaryKey>& lhs,
275 const postings_data<PrimaryKey, SecondaryKey>& rhs);
281 template <
class PrimaryKey,
class SecondaryKey>
285 struct hash<
meta::index::postings_data<PrimaryKey, SecondaryKey>>
bool operator<(const postings_data &other) const
Definition: postings_data.tcc:88
void merge_with(postings_data &other)
Definition: postings_data.tcc:22
PrimaryKey p_id_
Primary id this postings_data represents.
Definition: postings_data.h:227
double count(SecondaryKey s_id) const
Definition: postings_data.tcc:61
const count_t & counts() const
Definition: postings_data.tcc:68
friend void stream_helper(io::compressed_file_reader &in, postings_data< PrimaryKey, SecondaryKey > &pd)
Helper function used by istream operator.
Definition: postings_data.h:128
uint64_t bytes_used() const
Definition: postings_data.tcc:205
void read_compressed(io::compressed_file_reader &reader)
Reads compressed postings_data into this object.
Definition: postings_data.tcc:155
static const uint64_t delimiter_
delimiter used when writing to compressed files
Definition: postings_data.h:233
uint64_t inverse_frequency() const
void set_counts(const count_t &counts)
Definition: postings_data.tcc:74
size_t operator()(const pdata_t &pd) const
Definition: postings_data.h:292
void set_primary_key(PrimaryKey new_key)
Definition: postings_data.tcc:82
void write_libsvm(std::ofstream &out) const
Definition: postings_data.h:197
void write_compressed(io::compressed_file_writer &writer) const
Writes this postings_data to a compressed file.
Definition: postings_data.tcc:109
friend io::compressed_file_writer & operator<<(io::compressed_file_writer &out, const postings_data< PrimaryKey, SecondaryKey > &pd)
Writes semi-compressed postings data to a compressed file.
Definition: postings_data.h:157
A class to represent the per-PrimaryKey data in an index's postings file.
Definition: forward_index.h:30
util::sparse_vector< SecondaryKey, double > counts_
The (secondary_key_type, count) pairs.
Definition: postings_data.h:230
void increase_count(SecondaryKey s_id, double amount)
Definition: postings_data.tcc:55
postings_data()=default
PrimaryKeys may only be integral types or strings; SecondaryKeys may only be integral types...
PrimaryKey primary_key() const
Definition: postings_data.tcc:102