vrl/stdlib/
decode_punycode.rs

1use crate::compiler::prelude::*;
2use std::sync::LazyLock;
3
4static DEFAULT_VALIDATE: LazyLock<Value> = LazyLock::new(|| Value::Boolean(true));
5
6const PUNYCODE_PREFIX: &str = "xn--";
7
8static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
9    vec![
10        Parameter::required("value", kind::BYTES, "The string to decode."),
11        Parameter::optional(
12            "validate",
13            kind::BOOLEAN,
14            "If enabled, checks if the input string is a valid domain name.",
15        )
16        .default(&DEFAULT_VALIDATE),
17    ]
18});
19
20#[derive(Clone, Copy, Debug)]
21pub struct DecodePunycode;
22
23impl Function for DecodePunycode {
24    fn identifier(&self) -> &'static str {
25        "decode_punycode"
26    }
27
28    fn usage(&self) -> &'static str {
29        "Decodes a [punycode](https://en.wikipedia.org/wiki/Punycode) encoded `value`, such as an internationalized domain name ([IDN](https://en.wikipedia.org/wiki/Internationalized_domain_name)). This function assumes that the value passed is meant to be used in IDN context and that it is either a domain name or a part of it."
30    }
31
32    fn category(&self) -> &'static str {
33        Category::Codec.as_ref()
34    }
35
36    fn internal_failure_reasons(&self) -> &'static [&'static str] {
37        &["`value` is not valid `punycode`"]
38    }
39
40    fn return_kind(&self) -> u16 {
41        kind::BYTES
42    }
43
44    fn parameters(&self) -> &'static [Parameter] {
45        PARAMETERS.as_slice()
46    }
47
48    fn compile(
49        &self,
50        _state: &state::TypeState,
51        _ctx: &mut FunctionCompileContext,
52        arguments: ArgumentList,
53    ) -> Compiled {
54        let value = arguments.required("value");
55        let validate = arguments.optional("validate");
56
57        Ok(DecodePunycodeFn { value, validate }.as_expr())
58    }
59
60    fn examples(&self) -> &'static [Example] {
61        &[
62            example! {
63                title: "Decode a punycode encoded internationalized domain name",
64                source: r#"decode_punycode!("www.xn--caf-dma.com")"#,
65                result: Ok("www.café.com"),
66            },
67            example! {
68                title: "Decode an ASCII only string",
69                source: r#"decode_punycode!("www.cafe.com")"#,
70                result: Ok("www.cafe.com"),
71            },
72            example! {
73                title: "Ignore validation",
74                source: r#"decode_punycode!("xn--8hbb.xn--fiba.xn--8hbf.xn--eib.", validate: false)"#,
75                result: Ok("١٠.٦٦.٣٠.٥."),
76            },
77        ]
78    }
79}
80
81#[derive(Clone, Debug)]
82struct DecodePunycodeFn {
83    value: Box<dyn Expression>,
84    validate: Option<Box<dyn Expression>>,
85}
86
87impl FunctionExpression for DecodePunycodeFn {
88    fn resolve(&self, ctx: &mut Context) -> Resolved {
89        let value = self.value.resolve(ctx)?;
90        let string = value.try_bytes_utf8_lossy()?;
91
92        if !string.contains(PUNYCODE_PREFIX) {
93            return Ok(string.into());
94        }
95
96        let validate = self
97            .validate
98            .map_resolve_with_default(ctx, || DEFAULT_VALIDATE.clone())?
99            .try_boolean()?;
100
101        if validate {
102            let (decoded, result) = idna::domain_to_unicode(&string);
103
104            result.map_err(|_errors| "unable to decode punycode".to_string())?;
105            Ok(decoded.into())
106        } else {
107            let decoded = string
108                .split('.')
109                .map(|part| {
110                    if let Some(stripped) = part.strip_prefix(PUNYCODE_PREFIX) {
111                        idna::punycode::decode_to_string(stripped).unwrap_or(part.to_string())
112                    } else {
113                        part.to_string()
114                    }
115                })
116                .collect::<Vec<String>>()
117                .join(".");
118            Ok(decoded.into())
119        }
120    }
121
122    fn type_def(&self, _: &state::TypeState) -> TypeDef {
123        TypeDef::bytes().fallible()
124    }
125}
126
127#[cfg(test)]
128mod test {
129    use super::*;
130    use crate::value;
131
132    test_function![
133        decode_punycode => DecodePunycode;
134
135        demo_string {
136            args: func_args![value: value!("www.xn--caf-dma.com")],
137            want: Ok(value!("www.café.com")),
138            tdef: TypeDef::bytes().fallible(),
139        }
140
141        ascii_string {
142            args: func_args![value: value!("www.cafe.com")],
143            want: Ok(value!("www.cafe.com")),
144            tdef: TypeDef::bytes().fallible(),
145        }
146
147        bidi_error {
148            args: func_args![value: value!("xn--8hbb.xn--fiba.xn--8hbf.xn--eib.")],
149            want: Err("unable to decode punycode"),
150            tdef: TypeDef::bytes().fallible(),
151        }
152
153        multiple_errors {
154            args: func_args![value: value!("dns1.webproxy.idc.csesvcgateway.xn--line-svcgateway-jp-mvm-ri-d060072.\\-1roslin.canva.cn.")],
155            want: Err("unable to decode punycode"),
156            tdef: TypeDef::bytes().fallible(),
157        }
158
159        bidi_error_ignore {
160            args: func_args![value: value!("xn--8hbb.xn--fiba.xn--8hbf.xn--eib."), validate: false],
161            want: Ok(value!("١٠.٦٦.٣٠.٥.")),
162            tdef: TypeDef::bytes().fallible(),
163        }
164
165        multiple_errors_ignore {
166            args: func_args![value: value!("dns1.webproxy.idc.csesvcgateway.xn--line-svcgateway-jp-mvm-ri-d060072.\\-1roslin.canva.cn."), validate: false],
167            want: Ok(value!("dns1.webproxy.idc.csesvcgateway.xn--line-svcgateway-jp-mvm-ri-d060072.\\-1roslin.canva.cn.")),
168            tdef: TypeDef::bytes().fallible(),
169        }
170    ];
171}