vrl/stdlib/
encode_charset.rs

1use crate::diagnostic::Label;
2use crate::prelude::*;
3use encoding_rs::Encoding;
4use nom::AsBytes;
5use std::str::from_utf8;
6
7#[derive(Clone, Copy, Debug)]
8pub struct EncodeCharset;
9
10impl Function for EncodeCharset {
11    fn identifier(&self) -> &'static str {
12        "encode_charset"
13    }
14
15    fn examples(&self) -> &'static [Example] {
16        &[
17            example! {
18                title: "Encode UTF8 string to EUC-KR",
19                source: r#"encode_base64(encode_charset!("안녕하세요", "euc-kr"))"#,
20                result: Ok("vsiz58fPvLy/5A=="),
21            },
22            example! {
23                title: "Encode UTF8 string to EUC-JP",
24                source: r#"encode_base64(encode_charset!("こんにちは", "euc-jp"))"#,
25                result: Ok(r"pLOk86TLpMGkzw=="),
26            },
27            example! {
28                title: "Encode UTF8 string to GB2312",
29                source: r#"encode_base64(encode_charset!("你好", "gb2312"))"#,
30                result: Ok(r"xOO6ww=="),
31            },
32        ]
33    }
34
35    fn summary(&self) -> &'static str {
36        "Encode UTF-8 to non UTF-8 charset"
37    }
38
39    fn usage(&self) -> &'static str {
40        indoc! {"
41            Encodes the `value` (a UTF8 string) to a non-UTF8 string using the specified
42            [character set](https://encoding.spec.whatwg.org/#names-and-labels).
43        "}
44    }
45
46    fn category(&self) -> &'static str {
47        Category::Codec.as_ref()
48    }
49
50    fn internal_failure_reasons(&self) -> &'static [&'static str] {
51        &[
52            "`to_charset` isn't a valid [character set](https://encoding.spec.whatwg.org/#names-and-labels).",
53        ]
54    }
55
56    fn return_kind(&self) -> u16 {
57        kind::BYTES
58    }
59
60    fn parameters(&self) -> &'static [Parameter] {
61        const PARAMETERS: &[Parameter] = &[
62            Parameter::required("value", kind::BYTES, "The UTF8 string to encode."),
63            Parameter::required(
64                "to_charset",
65                kind::BYTES,
66                "The [character set](https://encoding.spec.whatwg.org/#names-and-labels) to use when encoding the data.",
67            ),
68        ];
69        PARAMETERS
70    }
71
72    fn compile(
73        &self,
74        _state: &TypeState,
75        _ctx: &mut FunctionCompileContext,
76        arguments: ArgumentList,
77    ) -> Compiled {
78        let value = arguments.required("value");
79        let to_charset = arguments.required("to_charset");
80
81        Ok(DecodeCharsetFn { value, to_charset }.as_expr())
82    }
83}
84
85fn encode_charset(value: &str, to_charset: &[u8]) -> Resolved {
86    let encoder = Encoding::for_label(to_charset).ok_or_else(|| create_error(to_charset))?;
87
88    let (output, _, _) = encoder.encode(value);
89    Ok(Value::Bytes(output.as_bytes().to_vec().into()))
90}
91
92fn create_error(to_charset: &[u8]) -> ExpressionError {
93    ExpressionError::Error {
94        message: format!(
95            "Unknown charset: {}",
96            from_utf8(to_charset).unwrap_or("unknown")
97        ),
98        labels: vec![Label::primary("Unknown charset", Span::default())],
99        notes: vec![Note::SeeDocs(
100            "Encoding Living Standard".to_string(),
101            "https://encoding.spec.whatwg.org/".to_string(),
102        )],
103    }
104}
105
106#[derive(Debug, Clone)]
107struct DecodeCharsetFn {
108    value: Box<dyn Expression>,
109    to_charset: Box<dyn Expression>,
110}
111
112impl FunctionExpression for DecodeCharsetFn {
113    fn resolve(&self, ctx: &mut Context) -> Resolved {
114        let value = self.value.resolve(ctx)?.try_bytes()?;
115        let to_charset = self.to_charset.resolve(ctx)?.try_bytes()?;
116
117        encode_charset(from_utf8(value.as_bytes()).unwrap(), to_charset.as_bytes())
118    }
119
120    fn type_def(&self, _state: &TypeState) -> TypeDef {
121        TypeDef::bytes().fallible()
122    }
123}
124
125#[cfg(test)]
126mod test {
127    use super::*;
128    use crate::value;
129
130    test_function![
131        encode_charset => EncodeCharset;
132
133        encode_to_euc_kr {
134            args: func_args![value: value!("안녕하세요"),
135                             to_charset: value!("euc-kr")],
136            want: Ok(value!(b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4")),
137            tdef: TypeDef::bytes().fallible(),
138        }
139
140        encode_to_euc_jp {
141            args: func_args![value: value!("こんにちは"),
142                             to_charset: value!("euc-jp")],
143            want: Ok(value!(b"\xa4\xb3\xa4\xf3\xa4\xcb\xa4\xc1\xa4\xcf")),
144            tdef: TypeDef::bytes().fallible(),
145        }
146
147        encode_to_gb2312 {
148            args: func_args![value: value!("你好"),
149                             to_charset: value!("gb2312")],
150            want: Ok(value!(b"\xc4\xe3\xba\xc3")),
151            tdef: TypeDef::bytes().fallible(),
152        }
153
154        unknown_charset {
155                args: func_args![value: value!("안녕하세요"),
156                             to_charset: value!("euc--kr")],
157            want: Err("Unknown charset: euc--kr"),
158            tdef: TypeDef::bytes().fallible(),
159        }
160    ];
161}