vrl/stdlib/
encode_percent.rs

1use crate::compiler::function::EnumVariant;
2use crate::compiler::prelude::*;
3use crate::value;
4use percent_encoding::{AsciiSet, utf8_percent_encode};
5use std::sync::LazyLock;
6
7static DEFAULT_ASCII_SET: LazyLock<Value> =
8    LazyLock::new(|| Value::Bytes(Bytes::from("NON_ALPHANUMERIC")));
9
10static ASCII_SET_ENUM: &[EnumVariant] = &[
11    EnumVariant {
12        value: "NON_ALPHANUMERIC",
13        description: "Encode any non-alphanumeric characters. This is the safest option.",
14    },
15    EnumVariant {
16        value: "CONTROLS",
17        description: "Encode only [control characters](https://infra.spec.whatwg.org/#c0-control).",
18    },
19    EnumVariant {
20        value: "FRAGMENT",
21        description: "Encode only [fragment characters](https://url.spec.whatwg.org/#fragment-percent-encode-set)",
22    },
23    EnumVariant {
24        value: "QUERY",
25        description: "Encode only [query characters](https://url.spec.whatwg.org/#query-percent-encode-set)",
26    },
27    EnumVariant {
28        value: "SPECIAL",
29        description: "Encode only [special characters](https://url.spec.whatwg.org/#special-percent-encode-set)",
30    },
31    EnumVariant {
32        value: "PATH",
33        description: "Encode only [path characters](https://url.spec.whatwg.org/#path-percent-encode-set)",
34    },
35    EnumVariant {
36        value: "USERINFO",
37        description: "Encode only [userinfo characters](https://url.spec.whatwg.org/#userinfo-percent-encode-set)",
38    },
39    EnumVariant {
40        value: "COMPONENT",
41        description: "Encode only [component characters](https://url.spec.whatwg.org/#component-percent-encode-set)",
42    },
43    EnumVariant {
44        value: "WWW_FORM_URLENCODED",
45        description: "Encode only [`application/x-www-form-urlencoded`](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set)",
46    },
47];
48
49static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
50    vec![
51        Parameter::required("value", kind::BYTES, "The string to encode."),
52        Parameter::optional(
53            "ascii_set",
54            kind::BYTES,
55            "The ASCII set to use when encoding the data.",
56        )
57        .default(&DEFAULT_ASCII_SET)
58        .enum_variants(ASCII_SET_ENUM),
59    ]
60});
61
62fn encode_percent(value: &Value, ascii_set: &Bytes) -> Resolved {
63    let string = value.try_bytes_utf8_lossy()?;
64    let ascii_set = match ascii_set.as_ref() {
65        b"NON_ALPHANUMERIC" => percent_encoding::NON_ALPHANUMERIC,
66        b"CONTROLS" => percent_encoding::CONTROLS,
67        b"FRAGMENT" => FRAGMENT,
68        b"QUERY" => QUERY,
69        b"SPECIAL" => SPECIAL,
70        b"PATH" => PATH,
71        b"USERINFO" => USERINFO,
72        b"COMPONENT" => COMPONENT,
73        b"WWW_FORM_URLENCODED" => WWW_FORM_URLENCODED,
74        _ => unreachable!("enum invariant"),
75    };
76
77    Ok(utf8_percent_encode(&string, ascii_set).to_string().into())
78}
79
80/// <https://url.spec.whatwg.org/#fragment-percent-encode-set>
81const FRAGMENT: &AsciiSet = &percent_encoding::CONTROLS
82    .add(b' ')
83    .add(b'"')
84    .add(b'<')
85    .add(b'>')
86    .add(b'`');
87
88/// <https://url.spec.whatwg.org/#query-percent-encode-set>
89const QUERY: &AsciiSet = &percent_encoding::CONTROLS
90    .add(b' ')
91    .add(b'"')
92    .add(b'#')
93    .add(b'<')
94    .add(b'>');
95
96/// <https://url.spec.whatwg.org/#special-percent-encode-set>
97const SPECIAL: &AsciiSet = &QUERY.add(b'\'');
98
99/// <https://url.spec.whatwg.org/#path-percent-encode-set>
100const PATH: &AsciiSet = &QUERY.add(b'?').add(b'`').add(b'{').add(b'}');
101
102/// <https://url.spec.whatwg.org/#userinfo-percent-encode-set>
103const USERINFO: &AsciiSet = &PATH
104    .add(b'/')
105    .add(b':')
106    .add(b';')
107    .add(b'=')
108    .add(b'@')
109    .add(b'[')
110    .add(b'\\')
111    .add(b']')
112    .add(b'^')
113    .add(b'|');
114
115/// <https://url.spec.whatwg.org/#component-percent-encode-set>
116const COMPONENT: &AsciiSet = &USERINFO.add(b'$').add(b'%').add(b'&').add(b'+').add(b',');
117
118/// <https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set>
119const WWW_FORM_URLENCODED: &AsciiSet =
120    &COMPONENT.add(b'!').add(b'\'').add(b'(').add(b')').add(b'~');
121
122#[derive(Clone, Copy, Debug)]
123pub struct EncodePercent;
124
125fn ascii_sets() -> Vec<Value> {
126    vec![
127        value!("NON_ALPHANUMERIC"),
128        value!("CONTROLS"),
129        value!("FRAGMENT"),
130        value!("QUERY"),
131        value!("SPECIAL"),
132        value!("PATH"),
133        value!("USERINFO"),
134        value!("COMPONENT"),
135        value!("WWW_FORM_URLENCODED"),
136    ]
137}
138
139impl Function for EncodePercent {
140    fn identifier(&self) -> &'static str {
141        "encode_percent"
142    }
143
144    fn usage(&self) -> &'static str {
145        "Encodes a `value` with [percent encoding](https://url.spec.whatwg.org/#percent-encoded-bytes) to safely be used in URLs."
146    }
147
148    fn category(&self) -> &'static str {
149        Category::Codec.as_ref()
150    }
151
152    fn return_kind(&self) -> u16 {
153        kind::BYTES
154    }
155
156    fn parameters(&self) -> &'static [Parameter] {
157        PARAMETERS.as_slice()
158    }
159
160    fn compile(
161        &self,
162        state: &state::TypeState,
163        _ctx: &mut FunctionCompileContext,
164        arguments: ArgumentList,
165    ) -> Compiled {
166        let value = arguments.required("value");
167        let ascii_set = arguments
168            .optional_enum("ascii_set", &ascii_sets(), state)?
169            .unwrap_or_else(|| DEFAULT_ASCII_SET.clone())
170            .try_bytes()
171            .expect("ascii_set not bytes");
172
173        Ok(EncodePercentFn { value, ascii_set }.as_expr())
174    }
175
176    fn examples(&self) -> &'static [Example] {
177        &[
178            example! {
179                title: "Percent encode all non-alphanumeric characters (default)",
180                source: r#"encode_percent("foo bar?")"#,
181                result: Ok("foo%20bar%3F"),
182            },
183            example! {
184                title: "Percent encode only control characters",
185                source: r#"encode_percent("foo \tbar", ascii_set: "CONTROLS")"#,
186                result: Ok("foo %09bar"),
187            },
188            example! {
189                title: "Percent encode special characters",
190                source: r#"encode_percent("foo@bar?")"#,
191                result: Ok("foo%40bar%3F"),
192            },
193        ]
194    }
195}
196
197#[derive(Clone, Debug)]
198struct EncodePercentFn {
199    value: Box<dyn Expression>,
200    ascii_set: Bytes,
201}
202
203impl FunctionExpression for EncodePercentFn {
204    fn resolve(&self, ctx: &mut Context) -> Resolved {
205        let value = self.value.resolve(ctx)?;
206        encode_percent(&value, &self.ascii_set)
207    }
208
209    fn type_def(&self, _: &state::TypeState) -> TypeDef {
210        TypeDef::bytes()
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217
218    test_function![
219        encode_percent => EncodePercent;
220
221        default {
222            args: func_args![value: "foo bar?"],
223            want: Ok("foo%20bar%3F"),
224            tdef: TypeDef::bytes().infallible(),
225        }
226
227        controls {
228            args: func_args![value: "foo bar", ascii_set: "CONTROLS"],
229            want: Ok("foo %14bar"),
230            tdef: TypeDef::bytes().infallible(),
231        }
232
233        fragment {
234            args: func_args![value: r#"foo <>" `bar"#, ascii_set: "FRAGMENT"],
235            want: Ok("foo%20%3C%3E%22%20%60bar"),
236            tdef: TypeDef::bytes().infallible(),
237        }
238
239        query {
240            args: func_args![value: r#"foo #"<>bar"#, ascii_set: "QUERY"],
241            want: Ok("foo%20%23%22%3C%3Ebar"),
242            tdef: TypeDef::bytes().infallible(),
243        }
244
245        special {
246            args: func_args![value: r#"foo #"<>'bar"#, ascii_set: "SPECIAL"],
247            want: Ok("foo%20%23%22%3C%3E%27bar"),
248            tdef: TypeDef::bytes().infallible(),
249        }
250
251        path {
252            args: func_args![value: r#"foo #"<>?`{}bar"#, ascii_set: "PATH"],
253            want: Ok("foo%20%23%22%3C%3E%3F%60%7B%7Dbar"),
254            tdef: TypeDef::bytes().infallible(),
255        }
256
257        userinfo {
258            args: func_args![value: r#"foo #"<>?`{}/:;=@[\]^|bar"#, ascii_set: "USERINFO"],
259            want: Ok("foo%20%23%22%3C%3E%3F%60%7B%7D%2F%3A%3B%3D%40%5B%5C%5D%5E%7Cbar"),
260            tdef: TypeDef::bytes().infallible(),
261        }
262
263        component {
264            args: func_args![value: r#"foo #"<>?`{}/:;=@[\]^|$%&+,bar"#, ascii_set: "COMPONENT"],
265            want: Ok("foo%20%23%22%3C%3E%3F%60%7B%7D%2F%3A%3B%3D%40%5B%5C%5D%5E%7C%24%25%26%2B%2Cbar"),
266            tdef: TypeDef::bytes().infallible(),
267        }
268
269        www_form_urlencoded {
270            args: func_args![value: r#"foo #"<>?`{}/:;=@[\]^|$%&+,!'()~bar"#, ascii_set: "WWW_FORM_URLENCODED"],
271            want: Ok("foo%20%23%22%3C%3E%3F%60%7B%7D%2F%3A%3B%3D%40%5B%5C%5D%5E%7C%24%25%26%2B%2C%21%27%28%29%7Ebar"),
272            tdef: TypeDef::bytes().infallible(),
273        }
274    ];
275}