vector/sources/util/
http_client.rs

1//! Common logic for sources that are HTTP clients.
2//!
3//! Specific HTTP client sources will:
4//!   - Call build_url() to build the URL(s) to call.
5//!   - Implement a specific context struct which:
6//!       - Contains the data that source needs in order to process the HTTP responses into internal_events
7//!       - Implements the HttpClient trait
8//!   - Call call() supplying the generic inputs for calling and the source-specific
9//!     context.
10
11// Okta source only imports defaults but doesn't use the rest of the client
12#![cfg_attr(feature = "sources-okta", allow(dead_code))]
13
14use std::{collections::HashMap, future::ready, time::Duration};
15
16use bytes::Bytes;
17use futures_util::{FutureExt, StreamExt, TryFutureExt, stream};
18use http::{Uri, response::Parts};
19use hyper::{Body, Request};
20use tokio_stream::wrappers::IntervalStream;
21use vector_lib::{
22    EstimatedJsonEncodedSizeOf, config::proxy::ProxyConfig, event::Event, json_size::JsonSize,
23    shutdown::ShutdownSignal,
24};
25
26use crate::{
27    SourceSender,
28    http::{Auth, HttpClient, QueryParameterValue, QueryParameters},
29    internal_events::{
30        EndpointBytesReceived, HttpClientEventsReceived, HttpClientHttpError,
31        HttpClientHttpResponseError, StreamClosedError,
32    },
33    sources::util::http::HttpMethod,
34    tls::TlsSettings,
35};
36
37/// Contains the inputs generic to any http client.
38pub(crate) struct GenericHttpClientInputs {
39    /// Array of URLs to call.
40    pub urls: Vec<Uri>,
41    /// Interval between calls.
42    pub interval: Duration,
43    /// Timeout for the HTTP request.
44    pub timeout: Duration,
45    /// Map of Header+Value to apply to HTTP request.
46    pub headers: HashMap<String, Vec<String>>,
47    /// Content type of the HTTP request, determined by the source.
48    pub content_type: String,
49    pub auth: Option<Auth>,
50    pub tls: TlsSettings,
51    pub proxy: ProxyConfig,
52    pub shutdown: ShutdownSignal,
53}
54
55/// The default interval to call the HTTP endpoint if none is configured.
56pub(crate) const fn default_interval() -> Duration {
57    Duration::from_secs(15)
58}
59
60/// The default timeout for the HTTP request if none is configured.
61pub(crate) const fn default_timeout() -> Duration {
62    Duration::from_secs(5)
63}
64
65/// Builds the context, allowing the source-specific implementation to leverage data from the
66/// config and the current HTTP request.
67pub(crate) trait HttpClientBuilder {
68    type Context: HttpClientContext;
69
70    /// Called before the HTTP request is made to build out the context.
71    fn build(&self, url: &Uri) -> Self::Context;
72}
73
74/// Methods that allow context-specific behavior during the scraping procedure.
75pub(crate) trait HttpClientContext {
76    /// Called after the HTTP request succeeds and returns the decoded/parsed Event array.
77    fn on_response(&mut self, url: &Uri, header: &Parts, body: &Bytes) -> Option<Vec<Event>>;
78
79    /// (Optional) Called if the HTTP response is not 200 ('OK').
80    fn on_http_response_error(&self, _uri: &Uri, _header: &Parts) {}
81
82    /// (Optional) Process the base URL before each request.
83    /// Allows for dynamic query parameters that update at runtime.
84    /// Returns a new URL if parameters need to be updated, or None to use the original URL.
85    fn process_url(&self, _url: &Uri) -> Option<Uri> {
86        None
87    }
88
89    // This function can be defined to enrich events with additional HTTP
90    // metadata. This function should be used rather than internal enrichment so
91    // that accurate byte count metrics can be emitted.
92    fn enrich_events(&mut self, _events: &mut Vec<Event>) {}
93}
94
95/// Builds a url for the HTTP requests.
96pub(crate) fn build_url(uri: &Uri, query: &QueryParameters) -> Uri {
97    let mut serializer = url::form_urlencoded::Serializer::new(String::new());
98    if let Some(query) = uri.query() {
99        serializer.extend_pairs(url::form_urlencoded::parse(query.as_bytes()));
100    };
101    for (k, query_value) in query {
102        match query_value {
103            QueryParameterValue::SingleParam(param) => {
104                serializer.append_pair(k, param.value());
105            }
106            QueryParameterValue::MultiParams(params) => {
107                for v in params {
108                    serializer.append_pair(k, v.value());
109                }
110            }
111        };
112    }
113    let mut builder = Uri::builder();
114    if let Some(scheme) = uri.scheme() {
115        builder = builder.scheme(scheme.clone());
116    };
117    if let Some(authority) = uri.authority() {
118        builder = builder.authority(authority.clone());
119    };
120    builder = builder.path_and_query(match serializer.finish() {
121        query if !query.is_empty() => format!("{}?{}", uri.path(), query),
122        _ => uri.path().to_string(),
123    });
124    builder
125        .build()
126        .expect("Failed to build URI from parsed arguments")
127}
128
129/// Warns if the scrape timeout is greater than the scrape interval.
130pub(crate) fn warn_if_interval_too_low(timeout: Duration, interval: Duration) {
131    if timeout > interval {
132        warn!(
133            interval_secs = %interval.as_secs_f64(),
134            timeout_secs = %timeout.as_secs_f64(),
135            message = "Having a scrape timeout that exceeds the scrape interval can lead to excessive resource consumption.",
136        );
137    }
138}
139
140/// Calls one or more urls at an interval.
141///   - The HTTP request is built per the options in provided generic inputs.
142///   - The HTTP response is decoded/parsed into events by the specific context.
143///   - The events are then sent to the output stream.
144pub(crate) async fn call<
145    B: HttpClientBuilder<Context = C> + Send + Clone,
146    C: HttpClientContext + Send,
147>(
148    inputs: GenericHttpClientInputs,
149    context_builder: B,
150    mut out: SourceSender,
151    http_method: HttpMethod,
152) -> Result<(), ()> {
153    // Building the HttpClient should not fail as it is just setting up the client with the
154    // proxy and tls settings.
155    let client =
156        HttpClient::new(inputs.tls.clone(), &inputs.proxy).expect("Building HTTP client failed");
157    let mut stream = IntervalStream::new(tokio::time::interval(inputs.interval))
158        .take_until(inputs.shutdown)
159        .map(move |_| stream::iter(inputs.urls.clone()))
160        .flatten()
161        .map(move |base_url| {
162            let client = client.clone();
163            let endpoint = base_url.to_string();
164
165            let context_builder = context_builder.clone();
166            let mut context = context_builder.build(&base_url);
167
168            // Check if we need to process the URL dynamically (for updating VRL expressions)
169            let url = context.process_url(&base_url).unwrap_or(base_url);
170
171            let mut builder = match http_method {
172                HttpMethod::Head => Request::head(&url),
173                HttpMethod::Get => Request::get(&url),
174                HttpMethod::Post => Request::post(&url),
175                HttpMethod::Put => Request::put(&url),
176                HttpMethod::Patch => Request::patch(&url),
177                HttpMethod::Delete => Request::delete(&url),
178                HttpMethod::Options => Request::options(&url),
179            };
180
181            // add user specified headers
182            for (header, values) in &inputs.headers {
183                for value in values {
184                    builder = builder.header(header, value);
185                }
186            }
187
188            // set ACCEPT header if not user specified
189            if !inputs.headers.contains_key(http::header::ACCEPT.as_str()) {
190                builder = builder.header(http::header::ACCEPT, &inputs.content_type);
191            }
192
193            // building an empty request should be infallible
194            let mut request = builder.body(Body::empty()).expect("error creating request");
195
196            if let Some(auth) = &inputs.auth {
197                auth.apply(&mut request);
198            }
199
200            tokio::time::timeout(inputs.timeout, client.send(request))
201                .then(move |result| async move {
202                    match result {
203                        Ok(Ok(response)) => Ok(response),
204                        Ok(Err(error)) => Err(error.into()),
205                        Err(_) => Err(format!(
206                            "Timeout error: request exceeded {}s",
207                            inputs.timeout.as_secs_f64()
208                        )
209                        .into()),
210                    }
211                })
212                .and_then(|response| async move {
213                    let (header, body) = response.into_parts();
214                    let body = hyper::body::to_bytes(body).await?;
215                    emit!(EndpointBytesReceived {
216                        byte_size: body.len(),
217                        protocol: "http",
218                        endpoint: endpoint.as_str(),
219                    });
220                    Ok((header, body))
221                })
222                .into_stream()
223                .filter_map(move |response| {
224                    ready(match response {
225                        Ok((header, body)) if header.status == hyper::StatusCode::OK => {
226                            context.on_response(&url, &header, &body).map(|mut events| {
227                                let byte_size = if events.is_empty() {
228                                    // We need to explicitly set the byte size
229                                    // to 0 since
230                                    // `estimated_json_encoded_size_of` returns
231                                    // at least 1 for an empty collection. For
232                                    // the purposes of the
233                                    // HttpClientEventsReceived event, we should
234                                    // emit 0 when there aren't any usable
235                                    // metrics.
236                                    JsonSize::zero()
237                                } else {
238                                    events.estimated_json_encoded_size_of()
239                                };
240
241                                emit!(HttpClientEventsReceived {
242                                    byte_size,
243                                    count: events.len(),
244                                    url: url.to_string()
245                                });
246
247                                // We'll enrich after receiving the events so
248                                // that the byte sizes are accurate.
249                                context.enrich_events(&mut events);
250
251                                stream::iter(events)
252                            })
253                        }
254                        Ok((header, _)) => {
255                            context.on_http_response_error(&url, &header);
256                            emit!(HttpClientHttpResponseError {
257                                code: header.status,
258                                url: url.to_string(),
259                            });
260                            None
261                        }
262                        Err(error) => {
263                            emit!(HttpClientHttpError {
264                                error,
265                                url: url.to_string()
266                            });
267                            None
268                        }
269                    })
270                })
271                .flatten()
272                .boxed()
273        })
274        .flatten_unordered(None)
275        .boxed();
276
277    match out.send_event_stream(&mut stream).await {
278        Ok(()) => {
279            debug!("Finished sending.");
280            Ok(())
281        }
282        Err(_) => {
283            let (count, _) = stream.size_hint();
284            emit!(StreamClosedError { count });
285            Err(())
286        }
287    }
288}