2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
25#ifndef ARACHNE_RNG_HPP
26#define ARACHNE_RNG_HPP
33
34
35
36
37
38
39
40std::mt19937_64&
rng();
43
44
45
46
47
48
Accumulates entity IDs into per-kind batches and organizes groups.
std::unordered_map< std::string, int > candidates
std::array< std::unordered_set< std::string >, batched_kind_count > extra_batches
bool touch_entity(const std::string &id_with_prefix) noexcept
Increment the touch counter for a single full ID (prefix REQUIRED).
static std::string entity_root(const std::string &id)
Extract the lexeme root from a full ID string.
std::string current_group
int touch_ids(std::span< const int > ids, corespace::entity_kind kind)
Batch variant of touch for numeric IDs.
static bool parse_id(const std::string &entity, size_t &pos, int &id)
Parse a full ID string and extract the numeric portion.
bool new_group(std::string name="")
Create or select a group and make it current.
size_t add_entity(const std::string &id_with_prefix, bool force=false, std::string name="")
Enqueue a full (prefixed) ID string and add it to a group.
std::unordered_map< std::string, std::unordered_set< std::string > > groups
std::chrono::milliseconds staleness_threshold
bool enqueue(std::string_view id, corespace::entity_kind kind, bool interactive) const
Decide whether an entity should be enqueued for fetching.
const size_t batch_threshold
Typical unauthenticated entity-per-request cap.
const int candidates_threshold
Intentional high bar for curiosity-driven candidates.
static std::string normalize(int id, corespace::entity_kind kind)
Normalize a numeric ID with the given kind to a prefixed string.
static bool ask_update(std::string_view id, corespace::entity_kind kind, std::chrono::milliseconds age)
Placeholder for interactive staleness confirmation.
void select_group(std::string name)
Select an existing group or create it on demand.
std::array< std::unordered_set< std::string >, batched_kind_count > main_batches
int queue_size(corespace::entity_kind kind) const noexcept
Get the number of queued (pending) entities tracked in the main batch containers.
static corespace::entity_kind identify(const std::string &entity) noexcept
Determine the kind of a full ID string.
bool flush(corespace::entity_kind kind=corespace::entity_kind::any)
Flush (send) up to batch_threshold entities of a specific kind.
size_t add_ids(std::span< const int > ids, corespace::entity_kind kind, std::string name="")
Enqueue numeric IDs with a given kind and add them to a group.
Batch courier for Wikidata/Commons: collects IDs, issues HTTP requests, and returns a merged JSON pay...
corespace::call_preview preview(const corespace::sparql_request &request) const
Produce a call preview describing the HTTP request that would be made.
corespace::http_client client
Reused HTTP client (not thread-safe across threads).
nlohmann::json wdqs(std::string query)
Convenience wrapper to run a raw SPARQL query string.
corespace::call_preview build_call_preview(const corespace::sparql_request &request) const
corespace::wdqs_options wdqs_opt
nlohmann::json sparql(const corespace::sparql_request &request)
Execute a SPARQL query according to the provided request.
static std::string join_str(std::span< const std::string > ids, std::string_view separator="|")
Join a span of strings with a separator (no encoding or validation).
const corespace::network_metrics & metrics_info() const
Access aggregated network metrics of the underlying client.
nlohmann::json fetch_json(const std::unordered_set< std::string > &batch, corespace::entity_kind kind=corespace::entity_kind::any)
Fetch metadata for a set of entity IDs and return a merged JSON object.
corespace::options opt
Request shaping parameters (chunking, fields, base params).
static bool status_retry(const http_response &response, bool net_ok)
Retry predicate for transient outcomes.
std::unique_ptr< curl_slist, decltype(&curl_slist_free_all)> header_list
Owned request header list.
void update_headers(http_response &response) const
Refresh the header multimap from the last transfer.
http_response request_get(CURLU *url_handle, std::chrono::milliseconds &elapsed, std::string_view accept={}, int timeout_sec=-1) const
Execute a single HTTP GET using the prepared URL handle.
http_client()
Construct a client and initialize libcurl.
const network_metrics & metrics_info() const
Access aggregated network metrics.
network_metrics metrics
Aggregated metrics (atomic counters).
http_response post_raw(std::string_view url, std::string_view body, std::string_view content_type, const parameter_list &query={}, std::string_view accept={}, int timeout_sec=-1)
Perform an HTTP POST with a raw body.
long long next_delay(int attempt) const
Compute the next backoff delay for attempt (1-based).
const network_options opt
Fixed options installed at construction.
static curl_url_ptr build_url(std::string_view url, const parameter_list ¶ms)
Construct a CURLU handle from url and append params.
http_response get(std::string_view url, const parameter_list ¶ms={}, std::string_view accept={}, int timeout_sec=-1)
Perform an HTTP GET to url with optional query params.
http_response request_post(CURLU *url_handle, std::chrono::milliseconds &elapsed, std::string_view content_type, std::string_view body, std::string_view accept={}, int timeout_sec=-1) const
Execute a single HTTP POST with given body and content type.
static bool status_good(const http_response &response)
Success predicate: transport OK and HTTP 2xx.
http_response post_form(std::string_view url, const parameter_list &form, const parameter_list &query={}, std::string_view accept={}, int timeout_sec=-1)
Perform an HTTP POST with form-encoded body.
void apply_server_retry_hint(long long &sleep_ms) const
Apply server-provided retry hint if present.
std::unique_ptr< CURLU, decltype(&curl_url_cleanup)> curl_url_ptr
Unique pointer type for CURLU with proper deleter.
std::string build_form_body(const parameter_list &form) const
void update_metrics(const http_response &response, std::chrono::milliseconds elapsed)
Update counters and histograms after an attempt.
std::unique_ptr< CURL, decltype(&curl_easy_cleanup)> curl
Reused easy handle (not thread-safe).
static size_t write_callback(const char *ptr, size_t size, size_t n, void *data)
libcurl write callback: append chunk to response body.
static constexpr std::string prefixes
constexpr std::size_t batched_kind_count
Number of batchable kinds (Q, P, L, M, E, form, sense).
std::mt19937_64 & rng()
Shared PRNG seeded on first use.
std::string resolve_accept(const sparql_request &request, const service_profile &profile, const std::string_view override_accept)
Resolves the Accept header value for a SPARQL request.
http_method choose_http_method(const sparql_request &request, const std::size_t threshold)
Chooses the appropriate HTTP method for a SPARQL request.
std::pair< std::string, bool > resolve_body_strategy(const sparql_request &request)
Determines the body content and strategy for a SPARQL request.
http_method
HTTP method to use for a request.
void sort_parameters(parameter_list ¶ms)
Sorts the parameter list in-place by key.
entity_kind
Wikidata entity kind.
@ any
API selector (e.g., flush(any)); not directly batchable.
@ property
IDs prefixed with 'P'.
@ lexeme
IDs prefixed with 'L'.
@ form
Lexeme form IDs such as "L<lexeme>-F<form>".
@ item
IDs prefixed with 'Q'.
@ mediainfo
IDs prefixed with 'M'.
@ entity_schema
IDs prefixed with 'E'.
@ unknown
Unrecognized/invalid identifier.
@ sense
Lexeme sense IDs such as "L<lexeme>-S<sense>".
service_kind
Identifies supported SPARQL services.
const service_profile & get_service_profile(const service_kind kind)
Retrieve the service profile for a given service kind.
void append_common_params(const service_kind kind, const http_method method, parameter_list ¶ms)
Appends common parameters required for a service and HTTP method.
std::pair< std::string, std::string > parameter
Single query parameter: key=value (pre-encoding is handled by libcurl).
std::string random_hex(const std::size_t n)
Return exactly n random hexadecimal characters (lowercase).
http_method_hint
Hint for selecting the HTTP method for a request.
parameter_list form_params
int timeout_sec
Per-request timeout in seconds (-1 for default).
std::string url
Full request URL (excluding query parameters).
bool has_param(std::string_view key) const
Check whether a query parameter with key key exists.
parameter_list query_params
std::string get_param(std::string_view key) const
Retrieve the first value for query parameter key.
http_method method
HTTP method to use for the request (GET, POST, etc.).
std::string accept
Accept header value indicating expected response format.
std::string content_type
Content-Type header value for the request body.
Result object for an HTTP transfer.
std::string error_message
Non-empty on libcurl error.
std::string text
Response body accumulated across callbacks.
header_map header
Response headers from the final attempt.
size_t status_code
HTTP status code (e.g., 200, 404).
CURLcode error_code
libcurl transport/result code.
std::atomic< long long > network_ms
Total time spent in libcurl (ms).
network_metrics()
Zero-initialize per-status counters.
std::atomic< unsigned > requests
Finished attempts (success or failure).
std::atomic< long long > sleep_ms
Total backoff duration slept (ms).
std::atomic< unsigned > retries
Number of retry cycles triggered.
std::atomic< size_t > bytes_received
Sum of response body sizes (bytes).
std::array< std::atomic< unsigned >, 600 > statuses
Per-code histogram for HTTP 0..599.
Fixed runtime options for the HTTP client.
int retry_base_ms
Base for exponential backoff (ms).
int connect_ms
Connect timeout (ms).
int timeout_ms
Total request timeout (ms).
std::string accept
Default Accept header.
std::string user_agent
Default User-Agent.
int max_retries
Max retry attempts after the first try.
long long retry_max_ms
Max per-attempt backoff (ms).
Configuration for fetching entities via MediaWiki/Wikibase API.
std::vector< std::string > props
std::vector< std::string > prop
Static configuration values describing a remote service.
std::vector< std::string > rate_hints
std::string default_accept
static constexpr size_t service_default
Options specific to WDQS usage and heuristics.
std::string accept_override
std::size_t length_threshold