1use crate::compiler::prelude::*;
2use std::collections::BTreeMap;
3use std::sync::LazyLock;
4use url::Url;
5
6static DEFAULT_DEFAULT_KNOWN_PORTS: LazyLock<Value> = LazyLock::new(|| Value::Boolean(false));
7
8static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
9 vec![
10 Parameter::required("value", kind::BYTES, "The text of the URL."),
11 Parameter::optional(
12 "default_known_ports",
13 kind::BOOLEAN,
14 "If true and the port number is not specified in the input URL
15string (or matches the default port for the scheme), it is
16populated from well-known ports for the following schemes:
17`http`, `https`, `ws`, `wss`, and `ftp`.",
18 )
19 .default(&DEFAULT_DEFAULT_KNOWN_PORTS),
20 ]
21});
22
23#[derive(Clone, Copy, Debug)]
24pub struct ParseUrl;
25
26impl Function for ParseUrl {
27 fn identifier(&self) -> &'static str {
28 "parse_url"
29 }
30
31 fn usage(&self) -> &'static str {
32 "Parses the `value` in [URL](https://en.wikipedia.org/wiki/URL) format."
33 }
34
35 fn category(&self) -> &'static str {
36 Category::Parse.as_ref()
37 }
38
39 fn internal_failure_reasons(&self) -> &'static [&'static str] {
40 &["`value` is not a properly formatted URL."]
41 }
42
43 fn return_kind(&self) -> u16 {
44 kind::OBJECT
45 }
46
47 fn parameters(&self) -> &'static [Parameter] {
48 PARAMETERS.as_slice()
49 }
50
51 fn examples(&self) -> &'static [Example] {
52 &[
53 example! {
54 title: "Parse URL",
55 source: r#"parse_url!("ftp://foo:bar@example.com:4343/foobar?hello=world#123")"#,
56 result: Ok(indoc! {r#"
57 {
58 "fragment": "123",
59 "host": "example.com",
60 "password": "bar",
61 "path": "/foobar",
62 "port": 4343,
63 "query": {
64 "hello": "world"
65 },
66 "scheme": "ftp",
67 "username": "foo"
68 }
69 "#}),
70 },
71 example! {
72 title: "Parse URL with default port",
73 source: r#"parse_url!("https://example.com", default_known_ports: true)"#,
74 result: Ok(indoc! {r#"
75 {
76 "fragment": null,
77 "host": "example.com",
78 "password": "",
79 "path": "/",
80 "port": 443,
81 "query": {},
82 "scheme": "https",
83 "username": ""
84 }
85 "#}),
86 },
87 ]
88 }
89
90 fn compile(
91 &self,
92 _state: &state::TypeState,
93 _ctx: &mut FunctionCompileContext,
94 arguments: ArgumentList,
95 ) -> Compiled {
96 let value = arguments.required("value");
97 let default_known_ports = arguments.optional("default_known_ports");
98
99 Ok(ParseUrlFn {
100 value,
101 default_known_ports,
102 }
103 .as_expr())
104 }
105}
106
107#[derive(Debug, Clone)]
108struct ParseUrlFn {
109 value: Box<dyn Expression>,
110 default_known_ports: Option<Box<dyn Expression>>,
111}
112
113impl FunctionExpression for ParseUrlFn {
114 fn resolve(&self, ctx: &mut Context) -> Resolved {
115 let value = self.value.resolve(ctx)?;
116 let string = value.try_bytes_utf8_lossy()?;
117
118 let default_known_ports = self
119 .default_known_ports
120 .map_resolve_with_default(ctx, || DEFAULT_DEFAULT_KNOWN_PORTS.clone())?
121 .try_boolean()?;
122
123 Url::parse(&string)
124 .map_err(|e| format!("unable to parse url: {e}").into())
125 .map(|url| url_to_value(&url, default_known_ports))
126 }
127
128 fn type_def(&self, _: &state::TypeState) -> TypeDef {
129 TypeDef::object(inner_kind()).fallible()
130 }
131}
132
133fn url_to_value(url: &Url, default_known_ports: bool) -> Value {
134 let mut map = BTreeMap::<&str, Value>::new();
135
136 map.insert("scheme", url.scheme().to_owned().into());
137 map.insert("username", url.username().to_owned().into());
138 map.insert(
139 "password",
140 url.password()
141 .map(ToOwned::to_owned)
142 .unwrap_or_default()
143 .into(),
144 );
145 map.insert("path", url.path().to_owned().into());
146 map.insert("host", url.host_str().map(ToOwned::to_owned).into());
147
148 let port = if default_known_ports {
149 url.port_or_known_default()
150 } else {
151 url.port()
152 };
153 map.insert("port", port.into());
154 map.insert("fragment", url.fragment().map(ToOwned::to_owned).into());
155 map.insert(
156 "query",
157 url.query_pairs()
158 .into_owned()
159 .map(|(k, v)| (k.into(), v.into()))
160 .collect::<ObjectMap>()
161 .into(),
162 );
163
164 map.into_iter()
165 .map(|(k, v)| (k.to_owned(), v))
166 .collect::<Value>()
167}
168
169fn inner_kind() -> BTreeMap<Field, Kind> {
170 BTreeMap::from([
171 ("scheme".into(), Kind::bytes()),
172 ("username".into(), Kind::bytes()),
173 ("password".into(), Kind::bytes()),
174 ("path".into(), Kind::bytes().or_null()),
175 ("host".into(), Kind::bytes()),
176 ("port".into(), Kind::integer().or_null()),
177 ("fragment".into(), Kind::bytes().or_null()),
178 (
179 "query".into(),
180 Kind::object(Collection::from_unknown(Kind::bytes())),
181 ),
182 ])
183}
184
185#[cfg(test)]
186mod tests {
187 use super::*;
188 use crate::value;
189
190 test_function![
191 parse_url => ParseUrl;
192
193 https {
194 args: func_args![value: value!("https://vector.dev")],
195 want: Ok(value!({
196 fragment: (),
197 host: "vector.dev",
198 password: "",
199 path: "/",
200 port: (),
201 query: {},
202 scheme: "https",
203 username: "",
204 })),
205 tdef: TypeDef::object(inner_kind()).fallible(),
206 }
207
208 default_port_specified {
209 args: func_args![value: value!("https://vector.dev:443")],
210 want: Ok(value!({
211 fragment: (),
212 host: "vector.dev",
213 password: "",
214 path: "/",
215 port: (),
216 query: {},
217 scheme: "https",
218 username: "",
219 })),
220 tdef: TypeDef::object(inner_kind()).fallible(),
221 }
222
223 default_port {
224 args: func_args![value: value!("https://vector.dev"), default_known_ports: true],
225 want: Ok(value!({
226 fragment: (),
227 host: "vector.dev",
228 password: "",
229 path: "/",
230 port: 443_i64,
231 query: {},
232 scheme: "https",
233 username: "",
234 })),
235 tdef: TypeDef::object(inner_kind()).fallible(),
236 }
237
238 punycode {
239 args: func_args![value: value!("https://www.café.com")],
240 want: Ok(value!({
241 fragment: (),
242 host: "www.xn--caf-dma.com",
243 password: "",
244 path: "/",
245 port: (),
246 query: {},
247 scheme: "https",
248 username: "",
249 })),
250 tdef: TypeDef::object(inner_kind()).fallible(),
251 }
252
253 punycode_mixed_case {
254 args: func_args![value: value!("https://www.CAFé.com")],
255 want: Ok(value!({
256 fragment: (),
257 host: "www.xn--caf-dma.com",
258 password: "",
259 path: "/",
260 port: (),
261 query: {},
262 scheme: "https",
263 username: "",
264 })),
265 tdef: TypeDef::object(inner_kind()).fallible(),
266 }
267 ];
268}