vrl/stdlib/
parse_url.rs

1use crate::compiler::prelude::*;
2use std::collections::BTreeMap;
3use std::sync::LazyLock;
4use url::Url;
5
6static DEFAULT_DEFAULT_KNOWN_PORTS: LazyLock<Value> = LazyLock::new(|| Value::Boolean(false));
7
8static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
9    vec![
10        Parameter::required("value", kind::BYTES, "The text of the URL."),
11        Parameter::optional(
12            "default_known_ports",
13            kind::BOOLEAN,
14            "If true and the port number is not specified in the input URL
15string (or matches the default port for the scheme), it is
16populated from well-known ports for the following schemes:
17`http`, `https`, `ws`, `wss`, and `ftp`.",
18        )
19        .default(&DEFAULT_DEFAULT_KNOWN_PORTS),
20    ]
21});
22
23#[derive(Clone, Copy, Debug)]
24pub struct ParseUrl;
25
26impl Function for ParseUrl {
27    fn identifier(&self) -> &'static str {
28        "parse_url"
29    }
30
31    fn usage(&self) -> &'static str {
32        "Parses the `value` in [URL](https://en.wikipedia.org/wiki/URL) format."
33    }
34
35    fn category(&self) -> &'static str {
36        Category::Parse.as_ref()
37    }
38
39    fn internal_failure_reasons(&self) -> &'static [&'static str] {
40        &["`value` is not a properly formatted URL."]
41    }
42
43    fn return_kind(&self) -> u16 {
44        kind::OBJECT
45    }
46
47    fn parameters(&self) -> &'static [Parameter] {
48        PARAMETERS.as_slice()
49    }
50
51    fn examples(&self) -> &'static [Example] {
52        &[
53            example! {
54                title: "Parse URL",
55                source: r#"parse_url!("ftp://foo:bar@example.com:4343/foobar?hello=world#123")"#,
56                result: Ok(indoc! {r#"
57                {
58                    "fragment": "123",
59                    "host": "example.com",
60                    "password": "bar",
61                    "path": "/foobar",
62                    "port": 4343,
63                    "query": {
64                        "hello": "world"
65                    },
66                    "scheme": "ftp",
67                    "username": "foo"
68                }
69            "#}),
70            },
71            example! {
72                title: "Parse URL with default port",
73                source: r#"parse_url!("https://example.com", default_known_ports: true)"#,
74                result: Ok(indoc! {r#"
75                {
76                    "fragment": null,
77                    "host": "example.com",
78                    "password": "",
79                    "path": "/",
80                    "port": 443,
81                    "query": {},
82                    "scheme": "https",
83                    "username": ""
84                }
85            "#}),
86            },
87        ]
88    }
89
90    fn compile(
91        &self,
92        _state: &state::TypeState,
93        _ctx: &mut FunctionCompileContext,
94        arguments: ArgumentList,
95    ) -> Compiled {
96        let value = arguments.required("value");
97        let default_known_ports = arguments.optional("default_known_ports");
98
99        Ok(ParseUrlFn {
100            value,
101            default_known_ports,
102        }
103        .as_expr())
104    }
105}
106
107#[derive(Debug, Clone)]
108struct ParseUrlFn {
109    value: Box<dyn Expression>,
110    default_known_ports: Option<Box<dyn Expression>>,
111}
112
113impl FunctionExpression for ParseUrlFn {
114    fn resolve(&self, ctx: &mut Context) -> Resolved {
115        let value = self.value.resolve(ctx)?;
116        let string = value.try_bytes_utf8_lossy()?;
117
118        let default_known_ports = self
119            .default_known_ports
120            .map_resolve_with_default(ctx, || DEFAULT_DEFAULT_KNOWN_PORTS.clone())?
121            .try_boolean()?;
122
123        Url::parse(&string)
124            .map_err(|e| format!("unable to parse url: {e}").into())
125            .map(|url| url_to_value(&url, default_known_ports))
126    }
127
128    fn type_def(&self, _: &state::TypeState) -> TypeDef {
129        TypeDef::object(inner_kind()).fallible()
130    }
131}
132
133fn url_to_value(url: &Url, default_known_ports: bool) -> Value {
134    let mut map = BTreeMap::<&str, Value>::new();
135
136    map.insert("scheme", url.scheme().to_owned().into());
137    map.insert("username", url.username().to_owned().into());
138    map.insert(
139        "password",
140        url.password()
141            .map(ToOwned::to_owned)
142            .unwrap_or_default()
143            .into(),
144    );
145    map.insert("path", url.path().to_owned().into());
146    map.insert("host", url.host_str().map(ToOwned::to_owned).into());
147
148    let port = if default_known_ports {
149        url.port_or_known_default()
150    } else {
151        url.port()
152    };
153    map.insert("port", port.into());
154    map.insert("fragment", url.fragment().map(ToOwned::to_owned).into());
155    map.insert(
156        "query",
157        url.query_pairs()
158            .into_owned()
159            .map(|(k, v)| (k.into(), v.into()))
160            .collect::<ObjectMap>()
161            .into(),
162    );
163
164    map.into_iter()
165        .map(|(k, v)| (k.to_owned(), v))
166        .collect::<Value>()
167}
168
169fn inner_kind() -> BTreeMap<Field, Kind> {
170    BTreeMap::from([
171        ("scheme".into(), Kind::bytes()),
172        ("username".into(), Kind::bytes()),
173        ("password".into(), Kind::bytes()),
174        ("path".into(), Kind::bytes().or_null()),
175        ("host".into(), Kind::bytes()),
176        ("port".into(), Kind::integer().or_null()),
177        ("fragment".into(), Kind::bytes().or_null()),
178        (
179            "query".into(),
180            Kind::object(Collection::from_unknown(Kind::bytes())),
181        ),
182    ])
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188    use crate::value;
189
190    test_function![
191        parse_url => ParseUrl;
192
193        https {
194            args: func_args![value: value!("https://vector.dev")],
195            want: Ok(value!({
196                fragment: (),
197                host: "vector.dev",
198                password: "",
199                path: "/",
200                port: (),
201                query: {},
202                scheme: "https",
203                username: "",
204            })),
205            tdef: TypeDef::object(inner_kind()).fallible(),
206        }
207
208        default_port_specified {
209            args: func_args![value: value!("https://vector.dev:443")],
210            want: Ok(value!({
211                fragment: (),
212                host: "vector.dev",
213                password: "",
214                path: "/",
215                port: (),
216                query: {},
217                scheme: "https",
218                username: "",
219            })),
220            tdef: TypeDef::object(inner_kind()).fallible(),
221        }
222
223        default_port {
224            args: func_args![value: value!("https://vector.dev"), default_known_ports: true],
225            want: Ok(value!({
226                fragment: (),
227                host: "vector.dev",
228                password: "",
229                path: "/",
230                port: 443_i64,
231                query: {},
232                scheme: "https",
233                username: "",
234            })),
235            tdef: TypeDef::object(inner_kind()).fallible(),
236        }
237
238        punycode {
239            args: func_args![value: value!("https://www.café.com")],
240            want: Ok(value!({
241                fragment: (),
242                host: "www.xn--caf-dma.com",
243                password: "",
244                path: "/",
245                port: (),
246                query: {},
247                scheme: "https",
248                username: "",
249            })),
250            tdef: TypeDef::object(inner_kind()).fallible(),
251        }
252
253        punycode_mixed_case {
254            args: func_args![value: value!("https://www.CAFé.com")],
255            want: Ok(value!({
256                fragment: (),
257                host: "www.xn--caf-dma.com",
258                password: "",
259                path: "/",
260                port: (),
261                query: {},
262                scheme: "https",
263                username: "",
264            })),
265            tdef: TypeDef::object(inner_kind()).fallible(),
266        }
267    ];
268}