Arachne 1.0
Arachne - the perpetual stitcher of Wikidata entities.
Loading...
Searching...
No Matches
http_client.cpp
Go to the documentation of this file.
1/*
2 * The MIT License (MIT)
3 *
4 * Copyright (c) 2025 Yaroslav Riabtsev <yaroslav.riabtsev@rwth-aachen.de>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "http_client.hpp"
26#include "rng.hpp"
27
28#include <mutex>
29#include <thread>
30
31namespace corespace {
32namespace {
33 std::once_flag global_curl;
34
35 /**
36 * @brief Initialize libcurl globally once per process.
37 *
38 * Called via std::call_once to perform curl_global_init and throw
39 * std::runtime_error on failure.
40 */
41 void curl_inited() {
42 if (curl_global_init(CURL_GLOBAL_DEFAULT) != 0) {
43 throw std::runtime_error("curl_global_init failed");
44 }
45 }
46}
47
49 std::call_once(global_curl, curl_inited);
50
51 curl.reset(curl_easy_init());
52 if (!curl) {
53 throw std::runtime_error("curl_easy_init failed");
54 }
55
56 const std::string accept_header = "Accept: " + opt.accept;
57 curl_slist* headers = curl_slist_append(nullptr, accept_header.c_str());
58 if (!headers) {
59 throw std::runtime_error("failed to allocate curl headers");
60 }
61 header_list.reset(headers);
62
63 curl_easy_setopt(curl.get(), CURLOPT_USERAGENT, opt.user_agent.c_str());
64 curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, header_list.get());
65 curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
66 curl_easy_setopt(curl.get(), CURLOPT_ACCEPT_ENCODING, "");
67 curl_easy_setopt(curl.get(), CURLOPT_NOSIGNAL, 1L);
68 curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT_MS, opt.timeout_ms);
69 curl_easy_setopt(curl.get(), CURLOPT_CONNECTTIMEOUT_MS, opt.connect_ms);
70}
71
72const network_metrics& http_client::metrics_info() const { return metrics; }
73
75 const std::string_view url, const parameter_list& params,
76 const std::string_view accept, const int timeout_sec
77) {
78 std::lock_guard lk(mu);
79 const auto url_handle = build_url(url, params);
80 for (int attempt = 1;; ++attempt) {
81 std::chrono::milliseconds elapsed { 0l };
82 http_response response
83 = request_get(url_handle.get(), elapsed, accept, timeout_sec);
84
85 update_metrics(response, elapsed);
86
87 const bool net_ok = (response.error_code == CURLE_OK);
88 if (status_good(response)) {
89 return response;
90 }
91 if (attempt <= opt.max_retries && status_retry(response, net_ok)) {
92 ++metrics.retries;
93 long long sleep_ms = next_delay(attempt);
95 metrics.sleep_ms += sleep_ms;
96 std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms));
97 continue;
98 }
99
100 if (!net_ok) {
101 throw std::runtime_error("curl error: " + response.error_message);
102 }
103 throw std::runtime_error(
104 "http error: " + std::to_string(response.status_code)
105 );
106 }
107}
108
110 const std::string_view url, const parameter_list& form,
111 const parameter_list& query, const std::string_view accept,
112 const int timeout_sec
113) {
114 std::lock_guard lk(mu);
115 const auto url_handle = build_url(url, query);
116 const std::string body = build_form_body(form);
117 for (int attempt = 1;; ++attempt) {
118 std::chrono::milliseconds elapsed { 0l };
119 http_response response = request_post(
120 url_handle.get(), elapsed, "application/x-www-form-urlencoded",
121 body, accept, timeout_sec
122 );
123 update_metrics(response, elapsed);
124 const bool net_ok = (response.error_code == CURLE_OK);
125 if (status_good(response)) {
126 return response;
127 }
128 if (attempt <= opt.max_retries && status_retry(response, net_ok)) {
129 ++metrics.retries;
130 long long sleep_ms = next_delay(attempt);
132 metrics.sleep_ms += sleep_ms;
133 std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms));
134 continue;
135 }
136 if (!net_ok) {
137 throw std::runtime_error("curl error: " + response.error_message);
138 }
139 throw std::runtime_error(
140 "http error: " + std::to_string(response.status_code)
141 );
142 }
143}
144
146 const std::string_view url, const std::string_view body,
147 const std::string_view content_type, const parameter_list& query,
148 const std::string_view accept, const int timeout_sec
149) {
150 std::lock_guard lk(mu);
151 const auto url_handle = build_url(url, query);
152 const std::string body_copy(body);
153 for (int attempt = 1;; ++attempt) {
154 std::chrono::milliseconds elapsed { 0l };
155 http_response response = request_post(
156 url_handle.get(), elapsed, content_type, body_copy, accept,
157 timeout_sec
158 );
159 update_metrics(response, elapsed);
160 const bool net_ok = (response.error_code == CURLE_OK);
161 if (status_good(response)) {
162 return response;
163 }
164 if (attempt <= opt.max_retries && status_retry(response, net_ok)) {
165 ++metrics.retries;
166 long long sleep_ms = next_delay(attempt);
168 metrics.sleep_ms += sleep_ms;
169 std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms));
170 continue;
171 }
172 if (!net_ok) {
173 throw std::runtime_error("curl error: " + response.error_message);
174 }
175 throw std::runtime_error(
176 "http error: " + std::to_string(response.status_code)
177 );
178 }
179}
180
181http_client::curl_url_ptr http_client::build_url(
182 const std::string_view url, const parameter_list& params
183) {
184 curl_url_ptr url_handle(curl_url(), &curl_url_cleanup);
185 if (!url_handle) {
186 throw std::runtime_error("curl_url failed");
187 }
188
189 const std::string url_copy(url);
190 if (curl_url_set(url_handle.get(), CURLUPART_URL, url_copy.c_str(), 0)
191 != CURLUE_OK) {
192 throw std::runtime_error("failed to set request url");
193 }
194
195 for (const auto& [key, value] : params) {
196 std::string parameter = key + "=" + std::string(value);
197 if (curl_url_set(
198 url_handle.get(), CURLUPART_QUERY, parameter.c_str(),
199 CURLU_APPENDQUERY | CURLU_URLENCODE
200 )
201 != CURLUE_OK) {
202 throw std::runtime_error("failed to append query parameter");
203 }
204 }
205
206 return url_handle;
207}
208
210 CURLU* const url_handle, std::chrono::milliseconds& elapsed,
211 const std::string_view accept, const int timeout_sec
212) const {
213 using namespace std::chrono;
214
215 http_response response;
216
217 curl_slist* tmp_headers = nullptr;
218 if (!accept.empty()) {
219 const std::string h = "Accept: " + std::string(accept);
220 tmp_headers = curl_slist_append(nullptr, h.c_str());
221 curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, tmp_headers);
222 } else {
223 curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, header_list.get());
224 }
225
226 curl_easy_setopt(curl.get(), CURLOPT_HTTPGET, 1L);
227 const long timeout_ms = (timeout_sec >= 0)
228 ? std::max(0L, timeout_sec * 1000L)
229 : opt.timeout_ms;
230 curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT_MS, timeout_ms);
231 curl_easy_setopt(curl.get(), CURLOPT_CURLU, url_handle);
232 curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, write_callback);
233 curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response.text);
234
235 const auto t0 = steady_clock::now();
236 response.error_code = curl_easy_perform(curl.get());
237 curl_easy_setopt(curl.get(), CURLOPT_CURLU, nullptr);
238 const auto t1 = steady_clock::now();
239 elapsed = duration_cast<milliseconds>(t1 - t0);
240
241 curl_easy_getinfo(
242 curl.get(), CURLINFO_RESPONSE_CODE, &response.status_code
243 );
244 update_headers(response);
245
246 if (response.error_code != CURLE_OK) {
247 response.error_message = curl_easy_strerror(response.error_code);
248 }
249
250 if (tmp_headers) {
251 curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, header_list.get());
252 curl_slist_free_all(tmp_headers);
253 }
254
255 return response;
256}
257
259 CURLU* url_handle, std::chrono::milliseconds& elapsed,
260 const std::string_view content_type, const std::string_view body,
261 const std::string_view accept, const int timeout_sec
262) const {
263 using namespace std::chrono;
264 http_response response;
265 curl_slist* tmp_headers = nullptr;
266
267 const std::string ct = "Content-Type: " + std::string(content_type);
268 tmp_headers = curl_slist_append(tmp_headers, ct.c_str());
269 const std::string acc = "Accept: "
270 + std::string(accept.empty() ? opt.accept : std::string(accept));
271 tmp_headers = curl_slist_append(tmp_headers, acc.c_str());
272 curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, tmp_headers);
273
274 curl_easy_setopt(curl.get(), CURLOPT_CURLU, url_handle);
275 const long timeout_ms = (timeout_sec >= 0)
276 ? std::max(0L, timeout_sec * 1000L)
277 : opt.timeout_ms;
278 curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT_MS, timeout_ms);
279 curl_easy_setopt(curl.get(), CURLOPT_POST, 1L);
280 curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDS, body.data());
281 curl_easy_setopt(
282 curl.get(), CURLOPT_POSTFIELDSIZE, static_cast<long>(body.size())
283 );
284 curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, write_callback);
285 curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &response.text);
286 const auto t0 = steady_clock::now();
287 response.error_code = curl_easy_perform(curl.get());
288 curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDS, nullptr);
289 curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDSIZE, 0L);
290 curl_easy_setopt(curl.get(), CURLOPT_POST, 0L);
291 curl_easy_setopt(curl.get(), CURLOPT_CURLU, nullptr);
292 const auto t1 = steady_clock::now();
293 elapsed = duration_cast<milliseconds>(t1 - t0);
294 curl_easy_getinfo(
295 curl.get(), CURLINFO_RESPONSE_CODE, &response.status_code
296 );
297 update_headers(response);
298 if (response.error_code != CURLE_OK) {
299 response.error_message = curl_easy_strerror(response.error_code);
300 }
301 curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, header_list.get());
302 if (tmp_headers) {
303 curl_slist_free_all(tmp_headers);
304 }
305 return response;
306}
307
308std::string http_client::build_form_body(const parameter_list& form) const {
309 std::string body;
310 bool first = true;
311 for (const auto& [key, value] : form) {
312 char* ekey = curl_easy_escape(
313 curl.get(), key.c_str(), static_cast<int>(key.size())
314 );
315 char* evalue = curl_easy_escape(
316 curl.get(), value.data(), static_cast<int>(value.size())
317 );
318 if (!first) {
319 body.push_back('&');
320 }
321 body.append(ekey ? ekey : "");
322 body.push_back('=');
323 body.append(evalue ? evalue : "");
324 if (ekey) {
325 curl_free(ekey);
326 }
327 if (evalue) {
328 curl_free(evalue);
329 }
330 first = false;
331 }
332 return body;
333}
334
336 response.header.clear();
337 for (curl_header* header = nullptr;;) {
338 header = curl_easy_nextheader(curl.get(), CURLH_HEADER, 0, header);
339 if (!header) {
340 break;
341 }
342 response.header.emplace(header->name, header->value);
343 }
344}
345
347 const http_response& response, const std::chrono::milliseconds elapsed
348) {
349 ++metrics.requests;
350 metrics.network_ms += elapsed.count();
351
352 if (response.status_code < metrics.statuses.size()) {
353 ++metrics.statuses[response.status_code];
354 }
355 metrics.bytes_received += response.text.size();
356}
357
358bool http_client::status_good(const http_response& response) {
359 return response.error_code == CURLE_OK && response.status_code >= 200
360 && response.status_code < 300;
361}
362
364 const http_response& response, const bool net_ok
365) {
366 return !net_ok || response.status_code == 429 || response.status_code == 408
367 || (response.status_code >= 500 && response.status_code < 600);
368}
369
370long long http_client::next_delay(const int attempt) const {
371 const long long base = opt.retry_base_ms * (1 << (attempt - 1));
372 std::uniform_int_distribution<long long> d(0, base);
373 return std::min(base + d(rng()), opt.retry_max_ms);
374}
375
376void http_client::apply_server_retry_hint(long long& sleep_ms) const {
377 curl_off_t retry_after = -1;
378 if (curl_easy_getinfo(curl.get(), CURLINFO_RETRY_AFTER, &retry_after)
379 == CURLE_OK
380 && retry_after >= 0) {
381 const long long server_hint_ms
382 = std::chrono::duration_cast<std::chrono::milliseconds>(
383 std::chrono::seconds(retry_after)
384 )
385 .count();
386 sleep_ms = std::max(sleep_ms, server_hint_ms);
387 }
388}
389
390size_t http_client::write_callback(
391 const char* ptr, const size_t size, const size_t n, void* data
392) {
393 const size_t total = size * n;
394 auto* text = static_cast<std::string*>(data);
395 text->append(ptr, total);
396 return total;
397}
398}
static bool status_retry(const http_response &response, bool net_ok)
Retry predicate for transient outcomes.
void update_headers(http_response &response) const
Refresh the header multimap from the last transfer.
http_response request_get(CURLU *url_handle, std::chrono::milliseconds &elapsed, std::string_view accept={}, int timeout_sec=-1) const
Execute a single HTTP GET using the prepared URL handle.
http_client()
Construct a client and initialize libcurl.
const network_metrics & metrics_info() const
Access aggregated network metrics.
http_response post_raw(std::string_view url, std::string_view body, std::string_view content_type, const parameter_list &query={}, std::string_view accept={}, int timeout_sec=-1)
Perform an HTTP POST with a raw body.
long long next_delay(int attempt) const
Compute the next backoff delay for attempt (1-based).
http_response get(std::string_view url, const parameter_list &params={}, std::string_view accept={}, int timeout_sec=-1)
Perform an HTTP GET to url with optional query params.
http_response request_post(CURLU *url_handle, std::chrono::milliseconds &elapsed, std::string_view content_type, std::string_view body, std::string_view accept={}, int timeout_sec=-1) const
Execute a single HTTP POST with given body and content type.
static bool status_good(const http_response &response)
Success predicate: transport OK and HTTP 2xx.
http_response post_form(std::string_view url, const parameter_list &form, const parameter_list &query={}, std::string_view accept={}, int timeout_sec=-1)
Perform an HTTP POST with form-encoded body.
void apply_server_retry_hint(long long &sleep_ms) const
Apply server-provided retry hint if present.
std::unique_ptr< CURLU, decltype(&curl_url_cleanup)> curl_url_ptr
Unique pointer type for CURLU with proper deleter.
std::string build_form_body(const parameter_list &form) const
void update_metrics(const http_response &response, std::chrono::milliseconds elapsed)
Update counters and histograms after an attempt.
void curl_inited()
Initialize libcurl globally once per process.
Result object for an HTTP transfer.
Definition utils.hpp:157
std::string error_message
Non-empty on libcurl error.
Definition utils.hpp:165
CURLcode error_code
libcurl transport/result code.
Definition utils.hpp:164