Arachne 1.0
Arachne - the perpetual stitcher of Wikidata entities.
Loading...
Searching...
No Matches
pheidippides.cpp
Go to the documentation of this file.
1/*
2 * The MIT License (MIT)
3 *
4 * Copyright (c) 2025 Yaroslav Riabtsev <yaroslav.riabtsev@rwth-aachen.de>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "pheidippides.hpp"
26#include "arachne.hpp"
27
28namespace arachnespace {
30 const std::unordered_set<std::string>& batch,
31 const corespace::entity_kind kind
32) {
33 if (batch.empty()) {
34 return nlohmann::json::object();
35 }
36 std::string url
38 ? "https://www.wikidata.org/w/api.php"
39 : "https://commons.wikimedia.org/w/api.php");
40 std::string props
41 = (kind != corespace::entity_kind::entity_schema ? join_str(opt.props)
42 : join_str(opt.prop));
43
44 corespace::parameter_list base_params { opt.params };
46 base_params.emplace_back("action", "query");
47 } else {
48 base_params.emplace_back("action", "wbgetentities");
49 }
50
51 std::string prefix {};
53 prefix = "EntitySchema:";
54 }
55 nlohmann::json combined = nlohmann::json::object();
56 for (auto&& chunk : batch | std::views::chunk(opt.batch_threshold)) {
57 std::vector<std::string> chunk_vec;
58 for (const auto& id : chunk) {
59 if (arachne::identify(id) != kind) {
60 continue;
61 }
62 chunk_vec.emplace_back(prefix + id);
63 }
64 corespace::parameter_list params { base_params };
65 auto entities = join_str(chunk_vec);
66
67 if (kind == corespace::entity_kind::entity_schema) {
68 params.emplace_back("titles", entities);
69 params.emplace_back("prop", props);
70 } else {
71 params.emplace_back("ids", entities);
72 params.emplace_back("props", props);
73 }
74 auto r = client.get(url, params);
75 auto data = nlohmann::json::parse(r.text, nullptr, true);
76 if (!data.is_object()) {
77 continue;
78 }
79 combined.merge_patch(data);
80 }
81 return combined;
82}
83
84const corespace::network_metrics& pheidippides::metrics_info() const {
85 return client.metrics_info();
86}
87
88std::string pheidippides::join_str(
89 std::span<const std::string> ids, const std::string_view separator
90) {
91 if (ids.empty()) {
92 return {};
93 }
94 auto it = ids.begin();
95 std::string result = *it;
96 for (++it; it != ids.end(); ++it) {
99 }
100 return result;
101}
102}
Batch courier for Wikidata/Commons: collects IDs, issues HTTP requests, and returns a merged JSON pay...
const corespace::network_metrics & metrics_info() const
Access aggregated network metrics of the underlying client.
nlohmann::json fetch_json(const std::unordered_set< std::string > &batch, corespace::entity_kind kind=corespace::entity_kind::any)
Fetch metadata for a set of entity IDs and return a merged JSON object.
entity_kind
Wikidata entity kind.
Definition utils.hpp:46
@ mediainfo
IDs prefixed with 'M'.
Definition utils.hpp:50
@ entity_schema
IDs prefixed with 'E'.
Definition utils.hpp:51