2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
25#ifndef ARACHNE_HTTP_CLIENT_HPP
26#define ARACHNE_HTTP_CLIENT_HPP
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
53
54
55
56
57
58
59
60
61
62
63
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
92 get(std::string_view url,
const parameter_list& params = {},
93 std::string_view override = {});
95 std::string_view url,
const parameter_list& form,
96 const parameter_list& query = {}, std::string_view override = {}
99 std::string_view url, std::string_view body,
100 std::string_view content_type,
const parameter_list& query = {},
101 std::string_view override = {}
105
106
107
114
115
116
117
118
119
120
121
122
124 build_url(std::string_view url,
const parameter_list& params);
126
127
128
129
130
131
132
133
134
135
136
137
139 CURLU* url_handle, std::chrono::milliseconds& elapsed,
140 std::string_view override = {}
143 CURLU* url_handle, std::chrono::milliseconds& elapsed,
144 std::string_view content_type, std::string_view body,
145 std::string_view override
150
151
152
153
154
155
156
159
160
161
162
163
164
165
166
168 const http_response& response, std::chrono::milliseconds elapsed
171
172
173
174
177
178
179
180
181
182
183
184
185
186
187
188
192
193
194
195
196
197
198
199
200
203
204
205
206
207
208
209
213
214
215
216
217
218
219
221 write_callback(
const char* ptr, size_t size, size_t n,
void* data);
Accumulates entity IDs into per-kind batches and organizes groups.
std::unordered_map< std::string, int > candidates
std::array< std::unordered_set< std::string >, batched_kind_count > extra_batches
bool touch_entity(const std::string &id_with_prefix) noexcept
Increment the touch counter for a single full ID (prefix REQUIRED).
static std::string entity_root(const std::string &id)
Extract the lexeme root from a full ID string.
std::string current_group
int touch_ids(std::span< const int > ids, corespace::entity_kind kind)
Batch variant of touch for numeric IDs.
static bool parse_id(const std::string &entity, size_t &pos, int &id)
Parse a full ID string and extract the numeric portion.
bool new_group(std::string name="")
Create or select a group and make it current.
size_t add_entity(const std::string &id_with_prefix, bool force=false, std::string name="")
Enqueue a full (prefixed) ID string and add it to a group.
std::unordered_map< std::string, std::unordered_set< std::string > > groups
std::chrono::milliseconds staleness_threshold
bool enqueue(std::string_view id, corespace::entity_kind kind, bool interactive) const
Decide whether an entity should be enqueued for fetching.
const size_t batch_threshold
Typical unauthenticated entity-per-request cap.
const int candidates_threshold
Intentional high bar for curiosity-driven candidates.
static std::string normalize(int id, corespace::entity_kind kind)
Normalize a numeric ID with the given kind to a prefixed string.
static bool ask_update(std::string_view id, corespace::entity_kind kind, std::chrono::milliseconds age)
Placeholder for interactive staleness confirmation.
void select_group(std::string name)
Select an existing group or create it on demand.
std::array< std::unordered_set< std::string >, batched_kind_count > main_batches
int queue_size(corespace::entity_kind kind) const noexcept
Get the number of queued (pending) entities tracked in the main batch containers.
static corespace::entity_kind identify(const std::string &entity) noexcept
Determine the kind of a full ID string.
bool flush(corespace::entity_kind kind=corespace::entity_kind::any)
Flush (send) up to batch_threshold entities of a specific kind.
size_t add_ids(std::span< const int > ids, corespace::entity_kind kind, std::string name="")
Enqueue numeric IDs with a given kind and add them to a group.
Batch courier for Wikidata/Commons: collects IDs, issues HTTP requests, and returns a merged JSON pay...
corespace::http_client client
Reused HTTP client (not thread-safe across threads).
static std::string join_str(std::span< const std::string > ids, std::string_view separator="|")
Join a span of strings with a separator (no encoding or validation).
const corespace::network_metrics & metrics_info() const
Access aggregated network metrics of the underlying client.
nlohmann::json fetch_json(const std::unordered_set< std::string > &batch, corespace::entity_kind kind=corespace::entity_kind::any)
Fetch metadata for a set of entity IDs and return a merged JSON object.
corespace::options opt
Request shaping parameters (chunking, fields, base params).
static bool status_retry(const http_response &response, bool net_ok)
Retry predicate for transient outcomes.
http_response post_form(std::string_view url, const parameter_list &form, const parameter_list &query={}, std::string_view override={})
std::unique_ptr< curl_slist, decltype(&curl_slist_free_all)> header_list
Owned request header list.
http_response request_post(CURLU *url_handle, std::chrono::milliseconds &elapsed, std::string_view content_type, std::string_view body, std::string_view override) const
void update_headers(http_response &response) const
Refresh the header multimap from the last transfer.
http_client()
Construct a client and initialize libcurl.
const network_metrics & metrics_info() const
Access aggregated network metrics.
network_metrics metrics
Aggregated metrics (atomic counters).
long long next_delay(int attempt) const
Compute the next backoff delay for attempt (1-based).
const network_options opt
Fixed options installed at construction.
http_response request_get(CURLU *url_handle, std::chrono::milliseconds &elapsed, std::string_view override={}) const
Execute a single HTTP GET using the prepared URL handle.
static curl_url_ptr build_url(std::string_view url, const parameter_list ¶ms)
Construct a CURLU handle from url and append params.
static bool status_good(const http_response &response)
Success predicate: transport OK and HTTP 2xx.
http_response post_raw(std::string_view url, std::string_view body, std::string_view content_type, const parameter_list &query={}, std::string_view override={})
void apply_server_retry_hint(long long &sleep_ms) const
Apply server-provided retry hint if present.
std::unique_ptr< CURLU, decltype(&curl_url_cleanup)> curl_url_ptr
Unique pointer type for CURLU with proper deleter.
std::string build_form_body(const parameter_list &form) const
void update_metrics(const http_response &response, std::chrono::milliseconds elapsed)
Update counters and histograms after an attempt.
std::unique_ptr< CURL, decltype(&curl_easy_cleanup)> curl
Reused easy handle (not thread-safe).
static size_t write_callback(const char *ptr, size_t size, size_t n, void *data)
libcurl write callback: append chunk to response body.
http_response get(std::string_view url, const parameter_list ¶ms={}, std::string_view override={})
Perform an HTTP GET to url with optional query params.
static constexpr std::string prefixes
constexpr std::size_t batched_kind_count
Number of batchable kinds (Q, P, L, M, E, form, sense).
entity_kind
Wikidata entity kind.
@ any
API selector (e.g., flush(any)); not directly batchable.
@ lexeme
IDs prefixed with 'L'.
@ form
Lexeme form IDs such as "L<lexeme>-F<form>".
@ unknown
Unrecognized/invalid identifier.
@ sense
Lexeme sense IDs such as "L<lexeme>-S<sense>".
std::string random_hex(const std::size_t n)
Return exactly n random hexadecimal characters (lowercase).
Result object for an HTTP transfer.
Fixed runtime options for the HTTP client.
Configuration for fetching entities via MediaWiki/Wikibase API.