1use crate::compiler::function::EnumVariant;
2use crate::compiler::prelude::*;
3use crate::value;
4use percent_encoding::{AsciiSet, utf8_percent_encode};
5use std::sync::LazyLock;
6
7static DEFAULT_ASCII_SET: LazyLock<Value> =
8 LazyLock::new(|| Value::Bytes(Bytes::from("NON_ALPHANUMERIC")));
9
10static ASCII_SET_ENUM: &[EnumVariant] = &[
11 EnumVariant {
12 value: "NON_ALPHANUMERIC",
13 description: "Encode any non-alphanumeric characters. This is the safest option.",
14 },
15 EnumVariant {
16 value: "CONTROLS",
17 description: "Encode only [control characters](https://infra.spec.whatwg.org/#c0-control).",
18 },
19 EnumVariant {
20 value: "FRAGMENT",
21 description: "Encode only [fragment characters](https://url.spec.whatwg.org/#fragment-percent-encode-set)",
22 },
23 EnumVariant {
24 value: "QUERY",
25 description: "Encode only [query characters](https://url.spec.whatwg.org/#query-percent-encode-set)",
26 },
27 EnumVariant {
28 value: "SPECIAL",
29 description: "Encode only [special characters](https://url.spec.whatwg.org/#special-percent-encode-set)",
30 },
31 EnumVariant {
32 value: "PATH",
33 description: "Encode only [path characters](https://url.spec.whatwg.org/#path-percent-encode-set)",
34 },
35 EnumVariant {
36 value: "USERINFO",
37 description: "Encode only [userinfo characters](https://url.spec.whatwg.org/#userinfo-percent-encode-set)",
38 },
39 EnumVariant {
40 value: "COMPONENT",
41 description: "Encode only [component characters](https://url.spec.whatwg.org/#component-percent-encode-set)",
42 },
43 EnumVariant {
44 value: "WWW_FORM_URLENCODED",
45 description: "Encode only [`application/x-www-form-urlencoded`](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set)",
46 },
47];
48
49static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
50 vec![
51 Parameter::required("value", kind::BYTES, "The string to encode."),
52 Parameter::optional(
53 "ascii_set",
54 kind::BYTES,
55 "The ASCII set to use when encoding the data.",
56 )
57 .default(&DEFAULT_ASCII_SET)
58 .enum_variants(ASCII_SET_ENUM),
59 ]
60});
61
62fn encode_percent(value: &Value, ascii_set: &Bytes) -> Resolved {
63 let string = value.try_bytes_utf8_lossy()?;
64 let ascii_set = match ascii_set.as_ref() {
65 b"NON_ALPHANUMERIC" => percent_encoding::NON_ALPHANUMERIC,
66 b"CONTROLS" => percent_encoding::CONTROLS,
67 b"FRAGMENT" => FRAGMENT,
68 b"QUERY" => QUERY,
69 b"SPECIAL" => SPECIAL,
70 b"PATH" => PATH,
71 b"USERINFO" => USERINFO,
72 b"COMPONENT" => COMPONENT,
73 b"WWW_FORM_URLENCODED" => WWW_FORM_URLENCODED,
74 _ => unreachable!("enum invariant"),
75 };
76
77 Ok(utf8_percent_encode(&string, ascii_set).to_string().into())
78}
79
80const FRAGMENT: &AsciiSet = &percent_encoding::CONTROLS
82 .add(b' ')
83 .add(b'"')
84 .add(b'<')
85 .add(b'>')
86 .add(b'`');
87
88const QUERY: &AsciiSet = &percent_encoding::CONTROLS
90 .add(b' ')
91 .add(b'"')
92 .add(b'#')
93 .add(b'<')
94 .add(b'>');
95
96const SPECIAL: &AsciiSet = &QUERY.add(b'\'');
98
99const PATH: &AsciiSet = &QUERY.add(b'?').add(b'`').add(b'{').add(b'}');
101
102const USERINFO: &AsciiSet = &PATH
104 .add(b'/')
105 .add(b':')
106 .add(b';')
107 .add(b'=')
108 .add(b'@')
109 .add(b'[')
110 .add(b'\\')
111 .add(b']')
112 .add(b'^')
113 .add(b'|');
114
115const COMPONENT: &AsciiSet = &USERINFO.add(b'$').add(b'%').add(b'&').add(b'+').add(b',');
117
118const WWW_FORM_URLENCODED: &AsciiSet =
120 &COMPONENT.add(b'!').add(b'\'').add(b'(').add(b')').add(b'~');
121
122#[derive(Clone, Copy, Debug)]
123pub struct EncodePercent;
124
125fn ascii_sets() -> Vec<Value> {
126 vec![
127 value!("NON_ALPHANUMERIC"),
128 value!("CONTROLS"),
129 value!("FRAGMENT"),
130 value!("QUERY"),
131 value!("SPECIAL"),
132 value!("PATH"),
133 value!("USERINFO"),
134 value!("COMPONENT"),
135 value!("WWW_FORM_URLENCODED"),
136 ]
137}
138
139impl Function for EncodePercent {
140 fn identifier(&self) -> &'static str {
141 "encode_percent"
142 }
143
144 fn usage(&self) -> &'static str {
145 "Encodes a `value` with [percent encoding](https://url.spec.whatwg.org/#percent-encoded-bytes) to safely be used in URLs."
146 }
147
148 fn category(&self) -> &'static str {
149 Category::Codec.as_ref()
150 }
151
152 fn return_kind(&self) -> u16 {
153 kind::BYTES
154 }
155
156 fn parameters(&self) -> &'static [Parameter] {
157 PARAMETERS.as_slice()
158 }
159
160 fn compile(
161 &self,
162 state: &state::TypeState,
163 _ctx: &mut FunctionCompileContext,
164 arguments: ArgumentList,
165 ) -> Compiled {
166 let value = arguments.required("value");
167 let ascii_set = arguments
168 .optional_enum("ascii_set", &ascii_sets(), state)?
169 .unwrap_or_else(|| DEFAULT_ASCII_SET.clone())
170 .try_bytes()
171 .expect("ascii_set not bytes");
172
173 Ok(EncodePercentFn { value, ascii_set }.as_expr())
174 }
175
176 fn examples(&self) -> &'static [Example] {
177 &[
178 example! {
179 title: "Percent encode all non-alphanumeric characters (default)",
180 source: r#"encode_percent("foo bar?")"#,
181 result: Ok("foo%20bar%3F"),
182 },
183 example! {
184 title: "Percent encode only control characters",
185 source: r#"encode_percent("foo \tbar", ascii_set: "CONTROLS")"#,
186 result: Ok("foo %09bar"),
187 },
188 example! {
189 title: "Percent encode special characters",
190 source: r#"encode_percent("foo@bar?")"#,
191 result: Ok("foo%40bar%3F"),
192 },
193 ]
194 }
195}
196
197#[derive(Clone, Debug)]
198struct EncodePercentFn {
199 value: Box<dyn Expression>,
200 ascii_set: Bytes,
201}
202
203impl FunctionExpression for EncodePercentFn {
204 fn resolve(&self, ctx: &mut Context) -> Resolved {
205 let value = self.value.resolve(ctx)?;
206 encode_percent(&value, &self.ascii_set)
207 }
208
209 fn type_def(&self, _: &state::TypeState) -> TypeDef {
210 TypeDef::bytes()
211 }
212}
213
214#[cfg(test)]
215mod tests {
216 use super::*;
217
218 test_function![
219 encode_percent => EncodePercent;
220
221 default {
222 args: func_args![value: "foo bar?"],
223 want: Ok("foo%20bar%3F"),
224 tdef: TypeDef::bytes().infallible(),
225 }
226
227 controls {
228 args: func_args![value: "foo bar", ascii_set: "CONTROLS"],
229 want: Ok("foo %14bar"),
230 tdef: TypeDef::bytes().infallible(),
231 }
232
233 fragment {
234 args: func_args![value: r#"foo <>" `bar"#, ascii_set: "FRAGMENT"],
235 want: Ok("foo%20%3C%3E%22%20%60bar"),
236 tdef: TypeDef::bytes().infallible(),
237 }
238
239 query {
240 args: func_args![value: r#"foo #"<>bar"#, ascii_set: "QUERY"],
241 want: Ok("foo%20%23%22%3C%3Ebar"),
242 tdef: TypeDef::bytes().infallible(),
243 }
244
245 special {
246 args: func_args![value: r#"foo #"<>'bar"#, ascii_set: "SPECIAL"],
247 want: Ok("foo%20%23%22%3C%3E%27bar"),
248 tdef: TypeDef::bytes().infallible(),
249 }
250
251 path {
252 args: func_args![value: r#"foo #"<>?`{}bar"#, ascii_set: "PATH"],
253 want: Ok("foo%20%23%22%3C%3E%3F%60%7B%7Dbar"),
254 tdef: TypeDef::bytes().infallible(),
255 }
256
257 userinfo {
258 args: func_args![value: r#"foo #"<>?`{}/:;=@[\]^|bar"#, ascii_set: "USERINFO"],
259 want: Ok("foo%20%23%22%3C%3E%3F%60%7B%7D%2F%3A%3B%3D%40%5B%5C%5D%5E%7Cbar"),
260 tdef: TypeDef::bytes().infallible(),
261 }
262
263 component {
264 args: func_args![value: r#"foo #"<>?`{}/:;=@[\]^|$%&+,bar"#, ascii_set: "COMPONENT"],
265 want: Ok("foo%20%23%22%3C%3E%3F%60%7B%7D%2F%3A%3B%3D%40%5B%5C%5D%5E%7C%24%25%26%2B%2Cbar"),
266 tdef: TypeDef::bytes().infallible(),
267 }
268
269 www_form_urlencoded {
270 args: func_args![value: r#"foo #"<>?`{}/:;=@[\]^|$%&+,!'()~bar"#, ascii_set: "WWW_FORM_URLENCODED"],
271 want: Ok("foo%20%23%22%3C%3E%3F%60%7B%7D%2F%3A%3B%3D%40%5B%5C%5D%5E%7C%24%25%26%2B%2C%21%27%28%29%7Ebar"),
272 tdef: TypeDef::bytes().infallible(),
273 }
274 ];
275}