vrl/stdlib/
decode_mime_q.rs

1use charset::Charset;
2use nom::{
3    IResult, Parser,
4    branch::alt,
5    bytes::complete::{tag, take_until, take_until1},
6    combinator::{map, map_opt, opt, success},
7    error::{ContextError, ParseError},
8    multi::fold_many1,
9    sequence::{delimited, pair, separated_pair},
10};
11
12use crate::compiler::prelude::*;
13use crate::value::Value;
14
15#[derive(Clone, Copy, Debug)]
16pub struct DecodeMimeQ;
17
18impl Function for DecodeMimeQ {
19    fn identifier(&self) -> &'static str {
20        "decode_mime_q"
21    }
22
23    fn usage(&self) -> &'static str {
24        "Replaces q-encoded or base64-encoded [encoded-word](https://datatracker.ietf.org/doc/html/rfc2047#section-2) substrings in the `value` with their original string."
25    }
26
27    fn category(&self) -> &'static str {
28        Category::Codec.as_ref()
29    }
30
31    fn internal_failure_reasons(&self) -> &'static [&'static str] {
32        &[
33            "`value` has invalid encoded [encoded-word](https://datatracker.ietf.org/doc/html/rfc2047#section-2) string.",
34        ]
35    }
36
37    fn return_kind(&self) -> u16 {
38        kind::BYTES
39    }
40
41    fn parameters(&self) -> &'static [Parameter] {
42        const PARAMETERS: &[Parameter] = &[Parameter::required(
43            "value",
44            kind::BYTES,
45            "The string with [encoded-words](https://datatracker.ietf.org/doc/html/rfc2047#section-2) to decode.",
46        )];
47        PARAMETERS
48    }
49
50    fn compile(
51        &self,
52        _state: &state::TypeState,
53        _ctx: &mut FunctionCompileContext,
54        arguments: ArgumentList,
55    ) -> Compiled {
56        let value = arguments.required("value");
57
58        Ok(DecodeMimeQFn { value }.as_expr())
59    }
60
61    fn examples(&self) -> &'static [Example] {
62        &[
63            example! {
64                title: "Decode single encoded-word",
65                source: r#"decode_mime_q!("=?utf-8?b?SGVsbG8sIFdvcmxkIQ==?=")"#,
66                result: Ok("Hello, World!"),
67            },
68            example! {
69                title: "Embedded",
70                source: r#"decode_mime_q!("From: =?utf-8?b?SGVsbG8sIFdvcmxkIQ==?= <=?utf-8?q?hello=5Fworld=40example=2ecom?=>")"#,
71                result: Ok("From: Hello, World! <hello_world@example.com>"),
72            },
73            example! {
74                title: "Without charset",
75                source: r#"decode_mime_q!("?b?SGVsbG8sIFdvcmxkIQ==")"#,
76                result: Ok("Hello, World!"),
77            },
78        ]
79    }
80}
81
82#[derive(Clone, Debug)]
83struct DecodeMimeQFn {
84    value: Box<dyn Expression>,
85}
86
87impl FunctionExpression for DecodeMimeQFn {
88    fn resolve(&self, ctx: &mut Context) -> Resolved {
89        let value = self.value.resolve(ctx)?;
90
91        decode_mime_q(&value)
92    }
93
94    fn type_def(&self, _: &state::TypeState) -> TypeDef {
95        TypeDef::bytes().fallible()
96    }
97}
98
99fn decode_mime_q(bytes: &Value) -> Resolved {
100    // Parse
101    let input = bytes.try_bytes_utf8_lossy()?;
102    let input: &str = &input;
103    let (remaining, decoded) = alt((
104        fold_many1(
105            parse_delimited_q,
106            || ExpressionResult::<String>::Ok(String::new()),
107            |result, (head, word)| {
108                let mut result = result?;
109
110                result.push_str(head);
111                result.push_str(&word.decode_word()?);
112
113                Ok(result)
114            },
115        ),
116        alt((
117            map_opt(parse_internal_q, |word| word.decode_word().map(Ok).ok()),
118            success(Ok(String::new())),
119        )),
120    ))
121    .parse(input)
122    .map_err(|e| match e {
123        nom::Err::Error(e) | nom::Err::Failure(e) => {
124            // Create a descriptive error message if possible.
125            nom_language::error::convert_error(input, e)
126        }
127        nom::Err::Incomplete(_) => e.to_string(),
128    })?;
129
130    let mut decoded = decoded?;
131
132    // Add remaining input to the decoded string.
133    decoded.push_str(remaining);
134
135    Ok(decoded.into())
136}
137
138/// Parses input into (head, (charset, encoding, encoded text))
139fn parse_delimited_q<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
140    input: &'a str,
141) -> IResult<&'a str, (&'a str, EncodedWord<'a>), E> {
142    pair(
143        take_until("=?"),
144        delimited(tag("=?"), parse_internal_q, tag("?=")),
145    )
146    .parse(input)
147}
148
149/// Parses inside of encoded word into (charset, encoding, encoded text)
150fn parse_internal_q<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
151    input: &'a str,
152) -> IResult<&'a str, EncodedWord<'a>, E> {
153    map(
154        separated_pair(
155            opt(take_until1("?")),
156            tag("?"),
157            separated_pair(
158                take_until("?"),
159                tag("?"),
160                alt((take_until("?="), |input| Ok(("", input)))),
161            ),
162        ),
163        |(charset, (encoding, input))| EncodedWord {
164            charset,
165            encoding,
166            input,
167        },
168    )
169    .parse(input)
170}
171
172struct EncodedWord<'a> {
173    charset: Option<&'a str>,
174    encoding: &'a str,
175    input: &'a str,
176}
177
178impl EncodedWord<'_> {
179    fn decode_word(&self) -> Result<String, ExpressionError> {
180        // Modified version from https://github.com/staktrace/mailparse/blob/a83d961fe53fd6504d75ee951a0e91dfea03c830/src/header.rs#L39
181
182        // Decode
183        let decoded = match self.encoding {
184            "B" | "b" => base64_simd::STANDARD
185                .decode_to_vec(self.input.as_bytes())
186                .map_err(|_| "Unable to decode base64 value")?,
187            "Q" | "q" => {
188                // The quoted_printable module does a trim_end on the input, so if
189                // that affects the output we should save and restore the trailing
190                // whitespace
191                let to_decode = self.input.replace('_', " ");
192                let trimmed = to_decode.trim_end();
193                let mut result =
194                    quoted_printable::decode(trimmed, quoted_printable::ParseMode::Robust);
195                if let Ok(ref mut d) = result
196                    && to_decode.len() != trimmed.len()
197                {
198                    d.extend_from_slice(&to_decode.as_bytes()[trimmed.len()..]);
199                }
200                result.map_err(|_| "Unable to decode quoted_printable value")?
201            }
202            _ => return Err(format!("Invalid encoding: {:?}", self.encoding).into()),
203        };
204
205        // Convert to UTF-8
206        let charset = self.charset.unwrap_or("utf-8");
207        let charset = Charset::for_label_no_replacement(charset.as_bytes())
208            .ok_or_else(|| format!("Unable to decode {charset:?} value"))?;
209        let (cow, _) = charset.decode_without_bom_handling(&decoded);
210        Ok(cow.into_owned())
211    }
212}
213
214#[cfg(test)]
215mod test {
216    use nom_language::error::VerboseError;
217
218    use crate::value;
219
220    use super::*;
221
222    #[test]
223    fn internal() {
224        let (remaining, word) =
225            parse_internal_q::<VerboseError<&str>>("utf-8?Q?hello=5Fworld=40example=2ecom")
226                .unwrap();
227        assert_eq!(remaining, "");
228        assert_eq!(word.charset, Some("utf-8"));
229        assert_eq!(word.encoding, "Q");
230        assert_eq!(word.input, "hello=5Fworld=40example=2ecom");
231    }
232
233    #[test]
234    fn internal_no_charset() {
235        let (remaining, word) =
236            parse_internal_q::<VerboseError<&str>>("?Q?hello=5Fworld=40example=2ecom").unwrap();
237        assert_eq!(remaining, "");
238        assert_eq!(word.charset, None);
239        assert_eq!(word.encoding, "Q");
240        assert_eq!(word.input, "hello=5Fworld=40example=2ecom");
241    }
242
243    test_function![
244        decode_mime_q=> DecodeMimeQ;
245
246        non_utf8_charset {
247            args: func_args![value: value!("Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=")],
248            want: Ok(value!("Subject: ¡Hola, señor!")),
249            tdef: TypeDef::bytes().fallible(),
250        }
251
252        missing_encoding{
253            args: func_args![value: value!("Subject: =?iso-8859-1??=A1Hola,_se=F1or!?=")],
254            want: Err("Invalid encoding: \"\""),
255            tdef: TypeDef::bytes().fallible(),
256        }
257
258        unknown_charset{
259            args: func_args![value: value!("Subject: =?iso-9001?Q?hello=5Fworld=40example=2ecom?=")],
260            want: Err("Unable to decode \"iso-9001\" value"),
261            tdef: TypeDef::bytes().fallible(),
262        }
263
264        no_start{
265            args: func_args![value: value!("Hello world.")],
266            want: Ok(value!("Hello world.")),
267            tdef: TypeDef::bytes().fallible(),
268        }
269
270        not_encoded{
271            args: func_args![value: value!("Is =? equal to ?= or not?")],
272            want: Ok(value!("Is =? equal to ?= or not?")),
273            tdef: TypeDef::bytes().fallible(),
274        }
275
276        partial{
277            args: func_args![value: value!("Is =? equal or not?")],
278            want: Ok(value!("Is =? equal or not?")),
279            tdef: TypeDef::bytes().fallible(),
280        }
281
282        empty{
283            args: func_args![value: value!("")],
284            want: Ok(value!("")),
285            tdef: TypeDef::bytes().fallible(),
286        }
287
288    ];
289}