vrl/stdlib/
encode_punycode.rs

1use crate::compiler::prelude::*;
2use std::sync::LazyLock;
3
4static DEFAULT_VALIDATE: LazyLock<Value> = LazyLock::new(|| Value::Boolean(true));
5
6const PUNYCODE_PREFIX: &str = "xn--";
7
8static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
9    vec![
10        Parameter::required("value", kind::BYTES, "The string to encode."),
11        Parameter::optional(
12            "validate",
13            kind::BOOLEAN,
14            "Whether to validate the input string to check if it is a valid domain name.",
15        )
16        .default(&DEFAULT_VALIDATE),
17    ]
18});
19
20#[derive(Clone, Copy, Debug)]
21pub struct EncodePunycode;
22
23impl Function for EncodePunycode {
24    fn identifier(&self) -> &'static str {
25        "encode_punycode"
26    }
27
28    fn usage(&self) -> &'static str {
29        "Encodes a `value` to [punycode](https://en.wikipedia.org/wiki/Punycode). Useful for internationalized domain names ([IDN](https://en.wikipedia.org/wiki/Internationalized_domain_name)). This function assumes that the value passed is meant to be used in IDN context and that it is either a domain name or a part of it."
30    }
31
32    fn category(&self) -> &'static str {
33        Category::Codec.as_ref()
34    }
35
36    fn internal_failure_reasons(&self) -> &'static [&'static str] {
37        &["`value` can not be encoded to `punycode`"]
38    }
39
40    fn return_kind(&self) -> u16 {
41        kind::BYTES
42    }
43
44    fn parameters(&self) -> &'static [Parameter] {
45        PARAMETERS.as_slice()
46    }
47
48    fn compile(
49        &self,
50        _state: &state::TypeState,
51        _ctx: &mut FunctionCompileContext,
52        arguments: ArgumentList,
53    ) -> Compiled {
54        let value = arguments.required("value");
55        let validate = arguments.optional("validate");
56
57        Ok(EncodePunycodeFn { value, validate }.as_expr())
58    }
59
60    fn examples(&self) -> &'static [Example] {
61        &[
62            example! {
63                title: "Encode an internationalized domain name",
64                source: r#"encode_punycode!("www.café.com")"#,
65                result: Ok("www.xn--caf-dma.com"),
66            },
67            example! {
68                title: "Encode an internationalized domain name with mixed case",
69                source: r#"encode_punycode!("www.CAFé.com")"#,
70                result: Ok("www.xn--caf-dma.com"),
71            },
72            example! {
73                title: "Encode an ASCII only string",
74                source: r#"encode_punycode!("www.cafe.com")"#,
75                result: Ok("www.cafe.com"),
76            },
77            example! {
78                title: "Ignore validation",
79                source: r#"encode_punycode!("xn--8hbb.xn--fiba.xn--8hbf.xn--eib.", validate: false)"#,
80                result: Ok("xn--8hbb.xn--fiba.xn--8hbf.xn--eib."),
81            },
82        ]
83    }
84}
85
86#[derive(Clone, Debug)]
87struct EncodePunycodeFn {
88    value: Box<dyn Expression>,
89    validate: Option<Box<dyn Expression>>,
90}
91
92impl FunctionExpression for EncodePunycodeFn {
93    fn resolve(&self, ctx: &mut Context) -> Resolved {
94        let value = self.value.resolve(ctx)?;
95        let string = value.try_bytes_utf8_lossy()?;
96
97        let validate = self
98            .validate
99            .map_resolve_with_default(ctx, || DEFAULT_VALIDATE.clone())?
100            .try_boolean()?;
101
102        if validate {
103            let encoded = idna::domain_to_ascii(&string)
104                .map_err(|_errors| "unable to encode to punycode".to_string())?;
105            Ok(encoded.into())
106        } else {
107            if string
108                .chars()
109                .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '.')
110            {
111                return Ok(string.into());
112            }
113
114            let encoded = string
115                .split('.')
116                .map(|part| {
117                    if part.starts_with(PUNYCODE_PREFIX) || part.is_ascii() {
118                        part.to_lowercase()
119                    } else {
120                        format!(
121                            "{}{}",
122                            PUNYCODE_PREFIX,
123                            idna::punycode::encode_str(&part.to_lowercase())
124                                .unwrap_or(part.to_lowercase())
125                        )
126                    }
127                })
128                .collect::<Vec<String>>()
129                .join(".");
130            Ok(encoded.into())
131        }
132    }
133
134    fn type_def(&self, _: &state::TypeState) -> TypeDef {
135        TypeDef::bytes().fallible()
136    }
137}
138
139#[cfg(test)]
140mod test {
141    use super::*;
142    use crate::value;
143
144    test_function![
145        encode_punycode => EncodePunycode;
146
147        idn_string {
148            args: func_args![value: value!("www.café.com")],
149            want: Ok(value!("www.xn--caf-dma.com")),
150            tdef: TypeDef::bytes().fallible(),
151        }
152
153        mixed_case {
154            args: func_args![value: value!("www.CAFé.com")],
155            want: Ok(value!("www.xn--caf-dma.com")),
156            tdef: TypeDef::bytes().fallible(),
157        }
158
159        mixed_case_ignore_validation {
160            args: func_args![value: value!("www.CAFé.com"), validate: false],
161            want: Ok(value!("www.xn--caf-dma.com")),
162            tdef: TypeDef::bytes().fallible(),
163        }
164
165        ascii_string {
166            args: func_args![value: value!("www.cafe.com")],
167            want: Ok(value!("www.cafe.com")),
168            tdef: TypeDef::bytes().fallible(),
169        }
170
171        ascii_string_ignore_validation {
172            args: func_args![value: value!("www.cafe.com"), validate: false],
173            want: Ok(value!("www.cafe.com")),
174            tdef: TypeDef::bytes().fallible(),
175        }
176
177        bidi_error {
178            args: func_args![value: value!("xn--8hbb.xn--fiba.xn--8hbf.xn--eib.")],
179            want: Err("unable to encode to punycode"),
180            tdef: TypeDef::bytes().fallible(),
181        }
182
183        multiple_errors {
184            args: func_args![value: value!("dns1.webproxy.idc.csesvcgateway.xn--line-svcgateway-jp-mvm-ri-d060072.\\-1roslin.canva.cn.")],
185            want: Err("unable to encode to punycode"),
186            tdef: TypeDef::bytes().fallible(),
187        }
188
189        bidi_error2 {
190            args: func_args![value: value!("wwes.ir.abadgostaran.ir.taakads.ir.farhadrahimy.ir.regk.ir.2qok.com.خرید-پستی.com.maskancto.com.phpars.com.eshelstore.ir.techtextile.ir.mrafiei.ir.hamtamotor.com.surfiran.ir.negar3d.com.tjketab.ir.3d4dl.ir.cabindooshsahand.com.mashtikebab.sbs.")],
191            want: Err("unable to encode to punycode"),
192            tdef: TypeDef::bytes().fallible(),
193        }
194
195        bidi_error_ignore {
196            args: func_args![value: value!("xn--8hbb.xn--fiba.xn--8hbf.xn--eib."), validate: false],
197            want: Ok(value!("xn--8hbb.xn--fiba.xn--8hbf.xn--eib.")),
198            tdef: TypeDef::bytes().fallible(),
199        }
200
201        bidi_error2_ignore {
202            args: func_args![value: value!("wwes.ir.abadgostaran.ir.taakads.ir.farhadrahimy.ir.regk.ir.2qok.com.خرید-پستی.com.maskancto.com.phpars.com.eshelstore.ir.techtextile.ir.mrafiei.ir.hamtamotor.com.surfiran.ir.negar3d.com.tjketab.ir.3d4dl.ir.cabindooshsahand.com.mashtikebab.sbs."), validate: false],
203            want: Ok(value!("wwes.ir.abadgostaran.ir.taakads.ir.farhadrahimy.ir.regk.ir.2qok.com.xn----5mckejo83c6tfa.com.maskancto.com.phpars.com.eshelstore.ir.techtextile.ir.mrafiei.ir.hamtamotor.com.surfiran.ir.negar3d.com.tjketab.ir.3d4dl.ir.cabindooshsahand.com.mashtikebab.sbs.")),
204            tdef: TypeDef::bytes().fallible(),
205        }
206
207        multiple_errors_ignore {
208            args: func_args![value: value!("dns1.webproxy.idc.csesvcgateway.xn--line-svcgateway-jp-mvm-ri-d060072.\\-1roslin.canva.cn."), validate: false],
209            want: Ok(value!("dns1.webproxy.idc.csesvcgateway.xn--line-svcgateway-jp-mvm-ri-d060072.\\-1roslin.canva.cn.")),
210            tdef: TypeDef::bytes().fallible(),
211        }
212    ];
213}