2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
29static constexpr std::string
prefixes =
"QPLME";
35 }
while (groups.contains(name));
37 auto [it, inserted] = groups.try_emplace(name);
48 throw std::invalid_argument(
"unknown kind of numeric IDs");
51 size_t last_size = groups[current_group].size();
52 for (
const int id : ids) {
53 std::string id_with_prefix = normalize(id, kind);
54 last_size = add_entity(id_with_prefix,
false, current_group);
64 throw std::invalid_argument(
"unknown kind of numeric IDs");
67 for (
const int id : ids) {
68 std::string id_with_prefix = normalize(id, kind);
69 added += touch_entity(id_with_prefix);
78 throw std::invalid_argument(
"invalid or unknown entity kind");
83 if (id.size() < 2 || id.front() !=
'L') {
84 throw std::invalid_argument(
85 "bad root-lexeme prefix of the entity: " + id
89 if (size_t pos = 1; !parse_id(id, pos, val)) {
90 throw std::invalid_argument(
91 "bad numeric identifier of the entity: " + id
94 return "L" + std::to_string(val);
100 const auto& batch = main_batches[
static_cast<size_t>(kind)];
101 const size_t size = batch.size();
102 auto data = phe_client.fetch_json(batch, kind);
104 return size > batch.size();
110 for (
const auto& batch : main_batches) {
113 return static_cast<
int>(sum);
115 const auto idx =
static_cast<std::size_t>(kind);
116 if (idx >= main_batches.size()) {
119 return static_cast<
int>(main_batches[idx].size());
123 if (entity.size() < 2) {
127 size_t kind =
prefixes.find(entity[pos++]);
129 if (kind == std::string::npos || !parse_id(entity, pos, id)) {
132 if (pos == entity.size()) {
136 || pos >= entity.size() || entity[pos++] !=
'-'
137 || pos >= entity.size()) {
140 const char tag = entity[pos++];
141 if (tag !=
'F' && tag !=
'S' || !parse_id(entity, pos, id)
142 || pos != entity.size()) {
149bool arachne::parse_id(
const std::string& entity, size_t& pos,
int& id) {
153 id = std::stoi(entity.substr(pos), &len);
157 if (id < 0 || len == 0 || std::to_string(id).size() != len) {
167 throw std::invalid_argument(
"normalize: id must be non-negative");
171 throw std::invalid_argument(
172 "normalize: kind must be a concrete, known entity kind"
175 auto idx =
static_cast<std::size_t>(kind);
181 return prefixes[idx] + std::to_string(id);
203 const bool interactive
206 auto [exist, last] = std::pair<
bool,
long long>(
false, -1);
207 if (!exist || last < 0) {
210 const auto now_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
211 std::chrono::system_clock::now().time_since_epoch()
214 const auto age = std::chrono::milliseconds { now_ms - last };
215 if (age > staleness_threshold) {
219 return ask_update(id, kind, age);
225 candidates[id_with_prefix]++;
226 if (candidates[id_with_prefix] >= candidates_threshold) {
229 extra_batches[
static_cast<size_t>(kind)].insert(canonical);
236 const std::string& id_with_prefix,
const bool force, std::string name
240 auto& group = groups[current_group];
241 group.insert(id_with_prefix);
243 || enqueue(canonical, kind, ui == corespace::interface::command_line)) {
244 auto& pool = main_batches[
static_cast<size_t>(kind)];
245 pool.insert(canonical);
246 if (pool.size() >= batch_threshold) {
Accumulates entity IDs into per-kind batches and organizes groups.
bool touch_entity(const std::string &id_with_prefix) noexcept
Increment the touch counter for a single full ID (prefix REQUIRED).
static std::string entity_root(const std::string &id)
Extract the lexeme root from a full ID string.
std::string current_group
int touch_ids(std::span< const int > ids, corespace::entity_kind kind)
Batch variant of touch for numeric IDs.
bool new_group(std::string name="")
Create or select a group and make it current.
size_t add_entity(const std::string &id_with_prefix, bool force=false, std::string name="")
Enqueue a full (prefixed) ID string and add it to a group.
bool enqueue(std::string_view id, corespace::entity_kind kind, bool interactive) const
Decide whether an entity should be enqueued for fetching.
static std::string normalize(int id, corespace::entity_kind kind)
Normalize a numeric ID with the given kind to a prefixed string.
void select_group(std::string name)
Select an existing group or create it on demand.
int queue_size(corespace::entity_kind kind) const noexcept
Get the number of queued (pending) entities tracked in the main batch containers.
static corespace::entity_kind identify(const std::string &entity) noexcept
Determine the kind of a full ID string.
bool flush(corespace::entity_kind kind=corespace::entity_kind::any)
Flush (send) up to batch_threshold entities of a specific kind.
size_t add_ids(std::span< const int > ids, corespace::entity_kind kind, std::string name="")
Enqueue numeric IDs with a given kind and add them to a group.
static constexpr std::string prefixes
entity_kind
Wikidata entity kind.
@ any
API selector (e.g., flush(any)); not directly batchable.
@ lexeme
IDs prefixed with 'L'.
@ form
Lexeme form IDs such as "L<lexeme>-F<form>".
@ unknown
Unrecognized/invalid identifier.
@ sense
Lexeme sense IDs such as "L<lexeme>-S<sense>".
std::string random_hex(const std::size_t n)
Return exactly n random hexadecimal characters (lowercase).