2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
25#ifndef ARACHNE_PHEIDIPPIDES_HPP
26#define ARACHNE_PHEIDIPPIDES_HPP
29#include <nlohmann/json.hpp>
30#include <unordered_set>
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
86 const std::unordered_set<std::string>& batch,
91
92
93
96
97
98
99
100
101
102
103
104
105
106
108 std::span<
const std::string> ids, std::string_view separator =
"|"
Accumulates entity IDs into per-kind batches and organizes groups.
std::unordered_map< std::string, int > candidates
std::array< std::unordered_set< std::string >, batched_kind_count > extra_batches
bool touch_entity(const std::string &id_with_prefix) noexcept
Increment the touch counter for a single full ID (prefix REQUIRED).
static std::string entity_root(const std::string &id)
Extract the lexeme root from a full ID string.
std::string current_group
int touch_ids(std::span< const int > ids, corespace::entity_kind kind)
Batch variant of touch for numeric IDs.
static bool parse_id(const std::string &entity, size_t &pos, int &id)
Parse a full ID string and extract the numeric portion.
bool new_group(std::string name="")
Create or select a group and make it current.
size_t add_entity(const std::string &id_with_prefix, bool force=false, std::string name="")
Enqueue a full (prefixed) ID string and add it to a group.
std::unordered_map< std::string, std::unordered_set< std::string > > groups
std::chrono::milliseconds staleness_threshold
bool enqueue(std::string_view id, corespace::entity_kind kind, bool interactive) const
Decide whether an entity should be enqueued for fetching.
const size_t batch_threshold
Typical unauthenticated entity-per-request cap.
const int candidates_threshold
Intentional high bar for curiosity-driven candidates.
static std::string normalize(int id, corespace::entity_kind kind)
Normalize a numeric ID with the given kind to a prefixed string.
static bool ask_update(std::string_view id, corespace::entity_kind kind, std::chrono::milliseconds age)
Placeholder for interactive staleness confirmation.
void select_group(std::string name)
Select an existing group or create it on demand.
std::array< std::unordered_set< std::string >, batched_kind_count > main_batches
int queue_size(corespace::entity_kind kind) const noexcept
Get the number of queued (pending) entities tracked in the main batch containers.
static corespace::entity_kind identify(const std::string &entity) noexcept
Determine the kind of a full ID string.
bool flush(corespace::entity_kind kind=corespace::entity_kind::any)
Flush (send) up to batch_threshold entities of a specific kind.
size_t add_ids(std::span< const int > ids, corespace::entity_kind kind, std::string name="")
Enqueue numeric IDs with a given kind and add them to a group.
Batch courier for Wikidata/Commons: collects IDs, issues HTTP requests, and returns a merged JSON pay...
corespace::http_client client
Reused HTTP client (not thread-safe across threads).
static std::string join_str(std::span< const std::string > ids, std::string_view separator="|")
Join a span of strings with a separator (no encoding or validation).
const corespace::network_metrics & metrics_info() const
Access aggregated network metrics of the underlying client.
nlohmann::json fetch_json(const std::unordered_set< std::string > &batch, corespace::entity_kind kind=corespace::entity_kind::any)
Fetch metadata for a set of entity IDs and return a merged JSON object.
corespace::options opt
Request shaping parameters (chunking, fields, base params).
std::unique_ptr< CURLU, decltype(&curl_url_cleanup)> curl_url_ptr
Unique pointer type for CURLU with proper deleter.
static constexpr std::string prefixes
constexpr std::size_t batched_kind_count
Number of batchable kinds (Q, P, L, M, E, form, sense).
entity_kind
Wikidata entity kind.
@ any
API selector (e.g., flush(any)); not directly batchable.
@ lexeme
IDs prefixed with 'L'.
@ form
Lexeme form IDs such as "L<lexeme>-F<form>".
@ unknown
Unrecognized/invalid identifier.
@ sense
Lexeme sense IDs such as "L<lexeme>-S<sense>".
std::string random_hex(const std::size_t n)
Return exactly n random hexadecimal characters (lowercase).
Configuration for fetching entities via MediaWiki/Wikibase API.