vrl/stdlib/
encode_punycode.rs1use crate::compiler::prelude::*;
2use std::sync::LazyLock;
3
4static DEFAULT_VALIDATE: LazyLock<Value> = LazyLock::new(|| Value::Boolean(true));
5
6const PUNYCODE_PREFIX: &str = "xn--";
7
8static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
9 vec![
10 Parameter::required("value", kind::BYTES, "The string to encode."),
11 Parameter::optional(
12 "validate",
13 kind::BOOLEAN,
14 "Whether to validate the input string to check if it is a valid domain name.",
15 )
16 .default(&DEFAULT_VALIDATE),
17 ]
18});
19
20#[derive(Clone, Copy, Debug)]
21pub struct EncodePunycode;
22
23impl Function for EncodePunycode {
24 fn identifier(&self) -> &'static str {
25 "encode_punycode"
26 }
27
28 fn usage(&self) -> &'static str {
29 "Encodes a `value` to [punycode](https://en.wikipedia.org/wiki/Punycode). Useful for internationalized domain names ([IDN](https://en.wikipedia.org/wiki/Internationalized_domain_name)). This function assumes that the value passed is meant to be used in IDN context and that it is either a domain name or a part of it."
30 }
31
32 fn category(&self) -> &'static str {
33 Category::Codec.as_ref()
34 }
35
36 fn internal_failure_reasons(&self) -> &'static [&'static str] {
37 &["`value` can not be encoded to `punycode`"]
38 }
39
40 fn return_kind(&self) -> u16 {
41 kind::BYTES
42 }
43
44 fn parameters(&self) -> &'static [Parameter] {
45 PARAMETERS.as_slice()
46 }
47
48 fn compile(
49 &self,
50 _state: &state::TypeState,
51 _ctx: &mut FunctionCompileContext,
52 arguments: ArgumentList,
53 ) -> Compiled {
54 let value = arguments.required("value");
55 let validate = arguments.optional("validate");
56
57 Ok(EncodePunycodeFn { value, validate }.as_expr())
58 }
59
60 fn examples(&self) -> &'static [Example] {
61 &[
62 example! {
63 title: "Encode an internationalized domain name",
64 source: r#"encode_punycode!("www.café.com")"#,
65 result: Ok("www.xn--caf-dma.com"),
66 },
67 example! {
68 title: "Encode an internationalized domain name with mixed case",
69 source: r#"encode_punycode!("www.CAFé.com")"#,
70 result: Ok("www.xn--caf-dma.com"),
71 },
72 example! {
73 title: "Encode an ASCII only string",
74 source: r#"encode_punycode!("www.cafe.com")"#,
75 result: Ok("www.cafe.com"),
76 },
77 example! {
78 title: "Ignore validation",
79 source: r#"encode_punycode!("xn--8hbb.xn--fiba.xn--8hbf.xn--eib.", validate: false)"#,
80 result: Ok("xn--8hbb.xn--fiba.xn--8hbf.xn--eib."),
81 },
82 ]
83 }
84}
85
86#[derive(Clone, Debug)]
87struct EncodePunycodeFn {
88 value: Box<dyn Expression>,
89 validate: Option<Box<dyn Expression>>,
90}
91
92impl FunctionExpression for EncodePunycodeFn {
93 fn resolve(&self, ctx: &mut Context) -> Resolved {
94 let value = self.value.resolve(ctx)?;
95 let string = value.try_bytes_utf8_lossy()?;
96
97 let validate = self
98 .validate
99 .map_resolve_with_default(ctx, || DEFAULT_VALIDATE.clone())?
100 .try_boolean()?;
101
102 if validate {
103 let encoded = idna::domain_to_ascii(&string)
104 .map_err(|_errors| "unable to encode to punycode".to_string())?;
105 Ok(encoded.into())
106 } else {
107 if string
108 .chars()
109 .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '.')
110 {
111 return Ok(string.into());
112 }
113
114 let encoded = string
115 .split('.')
116 .map(|part| {
117 if part.starts_with(PUNYCODE_PREFIX) || part.is_ascii() {
118 part.to_lowercase()
119 } else {
120 format!(
121 "{}{}",
122 PUNYCODE_PREFIX,
123 idna::punycode::encode_str(&part.to_lowercase())
124 .unwrap_or(part.to_lowercase())
125 )
126 }
127 })
128 .collect::<Vec<String>>()
129 .join(".");
130 Ok(encoded.into())
131 }
132 }
133
134 fn type_def(&self, _: &state::TypeState) -> TypeDef {
135 TypeDef::bytes().fallible()
136 }
137}
138
139#[cfg(test)]
140mod test {
141 use super::*;
142 use crate::value;
143
144 test_function![
145 encode_punycode => EncodePunycode;
146
147 idn_string {
148 args: func_args![value: value!("www.café.com")],
149 want: Ok(value!("www.xn--caf-dma.com")),
150 tdef: TypeDef::bytes().fallible(),
151 }
152
153 mixed_case {
154 args: func_args![value: value!("www.CAFé.com")],
155 want: Ok(value!("www.xn--caf-dma.com")),
156 tdef: TypeDef::bytes().fallible(),
157 }
158
159 mixed_case_ignore_validation {
160 args: func_args![value: value!("www.CAFé.com"), validate: false],
161 want: Ok(value!("www.xn--caf-dma.com")),
162 tdef: TypeDef::bytes().fallible(),
163 }
164
165 ascii_string {
166 args: func_args![value: value!("www.cafe.com")],
167 want: Ok(value!("www.cafe.com")),
168 tdef: TypeDef::bytes().fallible(),
169 }
170
171 ascii_string_ignore_validation {
172 args: func_args![value: value!("www.cafe.com"), validate: false],
173 want: Ok(value!("www.cafe.com")),
174 tdef: TypeDef::bytes().fallible(),
175 }
176
177 bidi_error {
178 args: func_args![value: value!("xn--8hbb.xn--fiba.xn--8hbf.xn--eib.")],
179 want: Err("unable to encode to punycode"),
180 tdef: TypeDef::bytes().fallible(),
181 }
182
183 multiple_errors {
184 args: func_args![value: value!("dns1.webproxy.idc.csesvcgateway.xn--line-svcgateway-jp-mvm-ri-d060072.\\-1roslin.canva.cn.")],
185 want: Err("unable to encode to punycode"),
186 tdef: TypeDef::bytes().fallible(),
187 }
188
189 bidi_error2 {
190 args: func_args![value: value!("wwes.ir.abadgostaran.ir.taakads.ir.farhadrahimy.ir.regk.ir.2qok.com.خرید-پستی.com.maskancto.com.phpars.com.eshelstore.ir.techtextile.ir.mrafiei.ir.hamtamotor.com.surfiran.ir.negar3d.com.tjketab.ir.3d4dl.ir.cabindooshsahand.com.mashtikebab.sbs.")],
191 want: Err("unable to encode to punycode"),
192 tdef: TypeDef::bytes().fallible(),
193 }
194
195 bidi_error_ignore {
196 args: func_args![value: value!("xn--8hbb.xn--fiba.xn--8hbf.xn--eib."), validate: false],
197 want: Ok(value!("xn--8hbb.xn--fiba.xn--8hbf.xn--eib.")),
198 tdef: TypeDef::bytes().fallible(),
199 }
200
201 bidi_error2_ignore {
202 args: func_args![value: value!("wwes.ir.abadgostaran.ir.taakads.ir.farhadrahimy.ir.regk.ir.2qok.com.خرید-پستی.com.maskancto.com.phpars.com.eshelstore.ir.techtextile.ir.mrafiei.ir.hamtamotor.com.surfiran.ir.negar3d.com.tjketab.ir.3d4dl.ir.cabindooshsahand.com.mashtikebab.sbs."), validate: false],
203 want: Ok(value!("wwes.ir.abadgostaran.ir.taakads.ir.farhadrahimy.ir.regk.ir.2qok.com.xn----5mckejo83c6tfa.com.maskancto.com.phpars.com.eshelstore.ir.techtextile.ir.mrafiei.ir.hamtamotor.com.surfiran.ir.negar3d.com.tjketab.ir.3d4dl.ir.cabindooshsahand.com.mashtikebab.sbs.")),
204 tdef: TypeDef::bytes().fallible(),
205 }
206
207 multiple_errors_ignore {
208 args: func_args![value: value!("dns1.webproxy.idc.csesvcgateway.xn--line-svcgateway-jp-mvm-ri-d060072.\\-1roslin.canva.cn."), validate: false],
209 want: Ok(value!("dns1.webproxy.idc.csesvcgateway.xn--line-svcgateway-jp-mvm-ri-d060072.\\-1roslin.canva.cn.")),
210 tdef: TypeDef::bytes().fallible(),
211 }
212 ];
213}