Arachne 1.0
Arachne - the perpetual stitcher of Wikidata entities.
Loading...
Searching...
No Matches
http_client.cpp
Go to the documentation of this file.
1/*
2 * The MIT License (MIT)
3 *
4 * Copyright (c) 2025 Yaroslav Riabtsev <yaroslav.riabtsev@rwth-aachen.de>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "http_client.hpp"
26#include "rng.hpp"
27
28#include <mutex>
29#include <thread>
30
31namespace corespace {
32namespace {
33 std::once_flag global_curl;
34
35 void curl_inited() {
36 if (curl_global_init(CURL_GLOBAL_DEFAULT) != 0) {
37 throw std::runtime_error("curl_global_init failed");
38 }
39 }
40}
41
43 std::call_once(global_curl, curl_inited);
44
45 curl.reset(curl_easy_init());
46 if (!curl) {
47 throw std::runtime_error("curl_easy_init failed");
48 }
49
50 const std::string accept_header = "Accept: " + opt.accept;
51 curl_slist* headers = curl_slist_append(nullptr, accept_header.c_str());
52 if (!headers) {
53 throw std::runtime_error("failed to allocate curl headers");
54 }
55 header_list.reset(headers);
56
57 curl_easy_setopt(curl.get(), CURLOPT_USERAGENT, opt.user_agent.c_str());
58 curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, header_list.get());
59 curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
60 curl_easy_setopt(curl.get(), CURLOPT_ACCEPT_ENCODING, "");
61 curl_easy_setopt(curl.get(), CURLOPT_NOSIGNAL, 1L);
62 curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT_MS, opt.timeout_ms);
63 curl_easy_setopt(curl.get(), CURLOPT_CONNECTTIMEOUT_MS, opt.connect_ms);
64}
65
66const network_metrics& http_client::metrics_info() const { return metrics; }
67
69 const std::string_view url, const parameter_list& params,
70 const std::string_view override
71) {
72 std::lock_guard lk(mu);
73 const auto url_handle = build_url(url, params);
74 for (int attempt = 1;; ++attempt) {
75 std::chrono::milliseconds elapsed { 0l };
76 http_response response
77 = request_get(url_handle.get(), elapsed, override);
78
79 update_metrics(response, elapsed);
80
81 const bool net_ok = (response.error_code == CURLE_OK);
82 if (status_good(response)) {
83 return response;
84 }
85 if (attempt <= opt.max_retries && status_retry(response, net_ok)) {
86 ++metrics.retries;
87 long long sleep_ms = next_delay(attempt);
89 metrics.sleep_ms += sleep_ms;
90 std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms));
91 continue;
92 }
93
94 if (!net_ok) {
95 throw std::runtime_error("curl error: " + response.error_message);
96 }
97 throw std::runtime_error(
98 "http error: " + std::to_string(response.status_code)
99 );
100 }
101}
102
104 const std::string_view url, const parameter_list& form,
105 const parameter_list& query, const std::string_view override
106) {
107 std::lock_guard lk(mu);
108 const auto url_handle = build_url(url, query);
109 const std::string body = build_form_body(form);
110 for (int attempt = 1;; ++attempt) {
111 std::chrono::milliseconds elapsed { 0l };
112 http_response response = request_post(
113 url_handle.get(), elapsed, "application/x-www-form-urlencoded",
114 body, override
115 );
116 update_metrics(response, elapsed);
117 const bool net_ok = (response.error_code == CURLE_OK);
118 if (status_good(response)) {
119 return response;
120 }
121 if (attempt <= opt.max_retries && status_retry(response, net_ok)) {
122 ++metrics.retries;
123 long long sleep_ms = next_delay(attempt);
125 metrics.sleep_ms += sleep_ms;
126 std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms));
127 continue;
128 }
129 if (!net_ok) {
130 throw std::runtime_error("curl error: " + response.error_message);
131 }
132 throw std::runtime_error(
133 "http error: " + std::to_string(response.status_code)
134 );
135 }
136}
137
139 const std::string_view url, const std::string_view body,
140 const std::string_view content_type, const parameter_list& query,
141 const std::string_view override
142) {
143 std::lock_guard lk(mu);
144 const auto url_handle = build_url(url, query);
145 const std::string body_copy(body);
146 for (int attempt = 1;; ++attempt) {
147 std::chrono::milliseconds elapsed { 0l };
148 http_response response = request_post(
149 url_handle.get(), elapsed, content_type, body_copy, override
150 );
151 update_metrics(response, elapsed);
152 const bool net_ok = (response.error_code == CURLE_OK);
153 if (status_good(response)) {
154 return response;
155 }
156 if (attempt <= opt.max_retries && status_retry(response, net_ok)) {
157 ++metrics.retries;
158 long long sleep_ms = next_delay(attempt);
160 metrics.sleep_ms += sleep_ms;
161 std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms));
162 continue;
163 }
164 if (!net_ok) {
165 throw std::runtime_error("curl error: " + response.error_message);
166 }
167 throw std::runtime_error(
168 "http error: " + std::to_string(response.status_code)
169 );
170 }
171}
172
173http_client::curl_url_ptr http_client::build_url(
174 const std::string_view url, const parameter_list& params
175) {
176 curl_url_ptr url_handle(curl_url(), &curl_url_cleanup);
177 if (!url_handle) {
178 throw std::runtime_error("curl_url failed");
179 }
180
181 const std::string url_copy(url);
182 if (curl_url_set(url_handle.get(), CURLUPART_URL, url_copy.c_str(), 0)
183 != CURLUE_OK) {
184 throw std::runtime_error("failed to set request url");
185 }
186
187 for (const auto& [key, value] : params) {
188 std::string parameter = key + "=" + std::string(value);
189 if (curl_url_set(
190 url_handle.get(), CURLUPART_QUERY, parameter.c_str(),
191 CURLU_APPENDQUERY | CURLU_URLENCODE
192 )
193 != CURLUE_OK) {
194 throw std::runtime_error("failed to append query parameter");
195 }
196 }
197
198 return url_handle;
199}
200
202 CURLU* const url_handle, std::chrono::milliseconds& elapsed,
203 const std::string_view override
204) const {
205 using namespace std::chrono;
206
207 http_response response;
208
209 curl_slist* tmp_headers = nullptr;
210 if (!override.empty()) {
211 const std::string h = "Accept: " + std::string(override);
212 tmp_headers = curl_slist_append(nullptr, h.c_str());
213 curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, tmp_headers);
214 } else {
215 curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, header_list.get());
216 }
217
218 curl_easy_setopt(curl.get(), CURLOPT_HTTPGET, 1L);
219 curl_easy_setopt(curl.get(), CURLOPT_CURLU, url_handle);
220 curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, write_callback);
221 curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response.text);
222
223 const auto t0 = steady_clock::now();
224 response.error_code = curl_easy_perform(curl.get());
225 curl_easy_setopt(curl.get(), CURLOPT_CURLU, nullptr);
226 const auto t1 = steady_clock::now();
227 elapsed = duration_cast<milliseconds>(t1 - t0);
228
229 curl_easy_getinfo(
230 curl.get(), CURLINFO_RESPONSE_CODE, &response.status_code
231 );
232 update_headers(response);
233
234 if (response.error_code != CURLE_OK) {
235 response.error_message = curl_easy_strerror(response.error_code);
236 }
237
238 if (tmp_headers) {
239 curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, header_list.get());
240 curl_slist_free_all(tmp_headers);
241 }
242
243 return response;
244}
245
247 CURLU* url_handle, std::chrono::milliseconds& elapsed,
248 const std::string_view content_type, const std::string_view body,
249 const std::string_view override
250) const {
251 using namespace std::chrono;
252 http_response response;
253 curl_slist* tmp_headers = nullptr;
254
255 const std::string ct = "Content-Type: " + std::string(content_type);
256 tmp_headers = curl_slist_append(tmp_headers, ct.c_str());
257 const std::string acc = "Accept: "
258 + std::string(override.empty() ? opt.accept : std::string(override));
259 tmp_headers = curl_slist_append(tmp_headers, acc.c_str());
260 curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, tmp_headers);
261
262 curl_easy_setopt(curl.get(), CURLOPT_CURLU, url_handle);
263 curl_easy_setopt(curl.get(), CURLOPT_POST, 1L);
264 curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDS, body.data());
265 curl_easy_setopt(
266 curl.get(), CURLOPT_POSTFIELDSIZE, static_cast<long>(body.size())
267 );
268 curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, write_callback);
269 curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response.text);
270 const auto t0 = steady_clock::now();
271 response.error_code = curl_easy_perform(curl.get());
272 curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDS, nullptr);
273 curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDSIZE, 0L);
274 curl_easy_setopt(curl.get(), CURLOPT_POST, 0L);
275 curl_easy_setopt(curl.get(), CURLOPT_CURLU, nullptr);
276 const auto t1 = steady_clock::now();
277 elapsed = duration_cast<milliseconds>(t1 - t0);
278 curl_easy_getinfo(
279 curl.get(), CURLINFO_RESPONSE_CODE, &response.status_code
280 );
281 update_headers(response);
282 if (response.error_code != CURLE_OK) {
283 response.error_message = curl_easy_strerror(response.error_code);
284 }
285 curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, header_list.get());
286 if (tmp_headers) {
287 curl_slist_free_all(tmp_headers);
288 }
289 return response;
290}
291
292std::string http_client::build_form_body(const parameter_list& form) const {
293 std::string body;
294 bool first = true;
295 for (const auto& [key, value] : form) {
296 char* ekey = curl_easy_escape(
297 curl.get(), key.c_str(), static_cast<int>(key.size())
298 );
299 char* evalue = curl_easy_escape(
300 curl.get(), value.data(), static_cast<int>(value.size())
301 );
302 if (!first) {
303 body.push_back('&');
304 }
305 body.append(ekey ? ekey : "");
306 body.push_back('=');
307 body.append(evalue ? evalue : "");
308 if (ekey) {
309 curl_free(ekey);
310 }
311 if (evalue) {
312 curl_free(evalue);
313 }
314 first = false;
315 }
316 return body;
317}
318
320 response.header.clear();
321 for (curl_header* header = nullptr;;) {
322 header = curl_easy_nextheader(curl.get(), CURLH_HEADER, 0, header);
323 if (!header) {
324 break;
325 }
326 response.header.emplace(header->name, header->value);
327 }
328}
329
331 const http_response& response, const std::chrono::milliseconds elapsed
332) {
333 ++metrics.requests;
334 metrics.network_ms += elapsed.count();
335
336 if (response.status_code < metrics.statuses.size()) {
337 ++metrics.statuses[response.status_code];
338 }
339 metrics.bytes_received += response.text.size();
340}
341
342bool http_client::status_good(const http_response& response) {
343 return response.error_code == CURLE_OK && response.status_code >= 200
344 && response.status_code < 300;
345}
346
348 const http_response& response, const bool net_ok
349) {
350 return !net_ok || response.status_code == 429 || response.status_code == 408
351 || (response.status_code >= 500 && response.status_code < 600);
352}
353
354long long http_client::next_delay(const int attempt) const {
355 const long long base = opt.retry_base_ms * (1 << (attempt - 1));
356 std::uniform_int_distribution<long long> d(0, base);
357 return std::min(base + d(rng()), opt.retry_max_ms);
358}
359
360void http_client::apply_server_retry_hint(long long& sleep_ms) const {
361 curl_off_t retry_after = -1;
362 if (curl_easy_getinfo(curl.get(), CURLINFO_RETRY_AFTER, &retry_after)
363 == CURLE_OK
364 && retry_after >= 0) {
365 const long long server_hint_ms
366 = std::chrono::duration_cast<std::chrono::milliseconds>(
367 std::chrono::seconds(retry_after)
368 )
369 .count();
370 sleep_ms = std::max(sleep_ms, server_hint_ms);
371 }
372}
373
374size_t http_client::write_callback(
375 const char* ptr, const size_t size, const size_t n, void* data
376) {
377 const size_t total = size * n;
378 auto* text = static_cast<std::string*>(data);
379 text->append(ptr, total);
380 return total;
381}
382}
static bool status_retry(const http_response &response, bool net_ok)
Retry predicate for transient outcomes.
http_response post_form(std::string_view url, const parameter_list &form, const parameter_list &query={}, std::string_view override={})
http_response request_post(CURLU *url_handle, std::chrono::milliseconds &elapsed, std::string_view content_type, std::string_view body, std::string_view override) const
void update_headers(http_response &response) const
Refresh the header multimap from the last transfer.
http_client()
Construct a client and initialize libcurl.
const network_metrics & metrics_info() const
Access aggregated network metrics.
long long next_delay(int attempt) const
Compute the next backoff delay for attempt (1-based).
http_response request_get(CURLU *url_handle, std::chrono::milliseconds &elapsed, std::string_view override={}) const
Execute a single HTTP GET using the prepared URL handle.
static bool status_good(const http_response &response)
Success predicate: transport OK and HTTP 2xx.
http_response post_raw(std::string_view url, std::string_view body, std::string_view content_type, const parameter_list &query={}, std::string_view override={})
void apply_server_retry_hint(long long &sleep_ms) const
Apply server-provided retry hint if present.
std::unique_ptr< CURLU, decltype(&curl_url_cleanup)> curl_url_ptr
Unique pointer type for CURLU with proper deleter.
std::string build_form_body(const parameter_list &form) const
void update_metrics(const http_response &response, std::chrono::milliseconds elapsed)
Update counters and histograms after an attempt.
http_response get(std::string_view url, const parameter_list &params={}, std::string_view override={})
Perform an HTTP GET to url with optional query params.
Result object for an HTTP transfer.
Definition utils.hpp:145
std::string error_message
Non-empty on libcurl error.
Definition utils.hpp:153
CURLcode error_code
libcurl transport/result code.
Definition utils.hpp:152