1use charset::Charset;
2use nom::{
3 IResult, Parser,
4 branch::alt,
5 bytes::complete::{tag, take_until, take_until1},
6 combinator::{map, map_opt, opt, success},
7 error::{ContextError, ParseError},
8 multi::fold_many1,
9 sequence::{delimited, pair, separated_pair},
10};
11
12use crate::compiler::prelude::*;
13use crate::value::Value;
14
15#[derive(Clone, Copy, Debug)]
16pub struct DecodeMimeQ;
17
18impl Function for DecodeMimeQ {
19 fn identifier(&self) -> &'static str {
20 "decode_mime_q"
21 }
22
23 fn usage(&self) -> &'static str {
24 "Replaces q-encoded or base64-encoded [encoded-word](https://datatracker.ietf.org/doc/html/rfc2047#section-2) substrings in the `value` with their original string."
25 }
26
27 fn category(&self) -> &'static str {
28 Category::Codec.as_ref()
29 }
30
31 fn internal_failure_reasons(&self) -> &'static [&'static str] {
32 &[
33 "`value` has invalid encoded [encoded-word](https://datatracker.ietf.org/doc/html/rfc2047#section-2) string.",
34 ]
35 }
36
37 fn return_kind(&self) -> u16 {
38 kind::BYTES
39 }
40
41 fn parameters(&self) -> &'static [Parameter] {
42 const PARAMETERS: &[Parameter] = &[Parameter::required(
43 "value",
44 kind::BYTES,
45 "The string with [encoded-words](https://datatracker.ietf.org/doc/html/rfc2047#section-2) to decode.",
46 )];
47 PARAMETERS
48 }
49
50 fn compile(
51 &self,
52 _state: &state::TypeState,
53 _ctx: &mut FunctionCompileContext,
54 arguments: ArgumentList,
55 ) -> Compiled {
56 let value = arguments.required("value");
57
58 Ok(DecodeMimeQFn { value }.as_expr())
59 }
60
61 fn examples(&self) -> &'static [Example] {
62 &[
63 example! {
64 title: "Decode single encoded-word",
65 source: r#"decode_mime_q!("=?utf-8?b?SGVsbG8sIFdvcmxkIQ==?=")"#,
66 result: Ok("Hello, World!"),
67 },
68 example! {
69 title: "Embedded",
70 source: r#"decode_mime_q!("From: =?utf-8?b?SGVsbG8sIFdvcmxkIQ==?= <=?utf-8?q?hello=5Fworld=40example=2ecom?=>")"#,
71 result: Ok("From: Hello, World! <hello_world@example.com>"),
72 },
73 example! {
74 title: "Without charset",
75 source: r#"decode_mime_q!("?b?SGVsbG8sIFdvcmxkIQ==")"#,
76 result: Ok("Hello, World!"),
77 },
78 ]
79 }
80}
81
82#[derive(Clone, Debug)]
83struct DecodeMimeQFn {
84 value: Box<dyn Expression>,
85}
86
87impl FunctionExpression for DecodeMimeQFn {
88 fn resolve(&self, ctx: &mut Context) -> Resolved {
89 let value = self.value.resolve(ctx)?;
90
91 decode_mime_q(&value)
92 }
93
94 fn type_def(&self, _: &state::TypeState) -> TypeDef {
95 TypeDef::bytes().fallible()
96 }
97}
98
99fn decode_mime_q(bytes: &Value) -> Resolved {
100 let input = bytes.try_bytes_utf8_lossy()?;
102 let input: &str = &input;
103 let (remaining, decoded) = alt((
104 fold_many1(
105 parse_delimited_q,
106 || ExpressionResult::<String>::Ok(String::new()),
107 |result, (head, word)| {
108 let mut result = result?;
109
110 result.push_str(head);
111 result.push_str(&word.decode_word()?);
112
113 Ok(result)
114 },
115 ),
116 alt((
117 map_opt(parse_internal_q, |word| word.decode_word().map(Ok).ok()),
118 success(Ok(String::new())),
119 )),
120 ))
121 .parse(input)
122 .map_err(|e| match e {
123 nom::Err::Error(e) | nom::Err::Failure(e) => {
124 nom_language::error::convert_error(input, e)
126 }
127 nom::Err::Incomplete(_) => e.to_string(),
128 })?;
129
130 let mut decoded = decoded?;
131
132 decoded.push_str(remaining);
134
135 Ok(decoded.into())
136}
137
138fn parse_delimited_q<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
140 input: &'a str,
141) -> IResult<&'a str, (&'a str, EncodedWord<'a>), E> {
142 pair(
143 take_until("=?"),
144 delimited(tag("=?"), parse_internal_q, tag("?=")),
145 )
146 .parse(input)
147}
148
149fn parse_internal_q<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
151 input: &'a str,
152) -> IResult<&'a str, EncodedWord<'a>, E> {
153 map(
154 separated_pair(
155 opt(take_until1("?")),
156 tag("?"),
157 separated_pair(
158 take_until("?"),
159 tag("?"),
160 alt((take_until("?="), |input| Ok(("", input)))),
161 ),
162 ),
163 |(charset, (encoding, input))| EncodedWord {
164 charset,
165 encoding,
166 input,
167 },
168 )
169 .parse(input)
170}
171
172struct EncodedWord<'a> {
173 charset: Option<&'a str>,
174 encoding: &'a str,
175 input: &'a str,
176}
177
178impl EncodedWord<'_> {
179 fn decode_word(&self) -> Result<String, ExpressionError> {
180 let decoded = match self.encoding {
184 "B" | "b" => base64_simd::STANDARD
185 .decode_to_vec(self.input.as_bytes())
186 .map_err(|_| "Unable to decode base64 value")?,
187 "Q" | "q" => {
188 let to_decode = self.input.replace('_', " ");
192 let trimmed = to_decode.trim_end();
193 let mut result =
194 quoted_printable::decode(trimmed, quoted_printable::ParseMode::Robust);
195 if let Ok(ref mut d) = result
196 && to_decode.len() != trimmed.len()
197 {
198 d.extend_from_slice(&to_decode.as_bytes()[trimmed.len()..]);
199 }
200 result.map_err(|_| "Unable to decode quoted_printable value")?
201 }
202 _ => return Err(format!("Invalid encoding: {:?}", self.encoding).into()),
203 };
204
205 let charset = self.charset.unwrap_or("utf-8");
207 let charset = Charset::for_label_no_replacement(charset.as_bytes())
208 .ok_or_else(|| format!("Unable to decode {charset:?} value"))?;
209 let (cow, _) = charset.decode_without_bom_handling(&decoded);
210 Ok(cow.into_owned())
211 }
212}
213
214#[cfg(test)]
215mod test {
216 use nom_language::error::VerboseError;
217
218 use crate::value;
219
220 use super::*;
221
222 #[test]
223 fn internal() {
224 let (remaining, word) =
225 parse_internal_q::<VerboseError<&str>>("utf-8?Q?hello=5Fworld=40example=2ecom")
226 .unwrap();
227 assert_eq!(remaining, "");
228 assert_eq!(word.charset, Some("utf-8"));
229 assert_eq!(word.encoding, "Q");
230 assert_eq!(word.input, "hello=5Fworld=40example=2ecom");
231 }
232
233 #[test]
234 fn internal_no_charset() {
235 let (remaining, word) =
236 parse_internal_q::<VerboseError<&str>>("?Q?hello=5Fworld=40example=2ecom").unwrap();
237 assert_eq!(remaining, "");
238 assert_eq!(word.charset, None);
239 assert_eq!(word.encoding, "Q");
240 assert_eq!(word.input, "hello=5Fworld=40example=2ecom");
241 }
242
243 test_function![
244 decode_mime_q=> DecodeMimeQ;
245
246 non_utf8_charset {
247 args: func_args![value: value!("Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=")],
248 want: Ok(value!("Subject: ¡Hola, señor!")),
249 tdef: TypeDef::bytes().fallible(),
250 }
251
252 missing_encoding{
253 args: func_args![value: value!("Subject: =?iso-8859-1??=A1Hola,_se=F1or!?=")],
254 want: Err("Invalid encoding: \"\""),
255 tdef: TypeDef::bytes().fallible(),
256 }
257
258 unknown_charset{
259 args: func_args![value: value!("Subject: =?iso-9001?Q?hello=5Fworld=40example=2ecom?=")],
260 want: Err("Unable to decode \"iso-9001\" value"),
261 tdef: TypeDef::bytes().fallible(),
262 }
263
264 no_start{
265 args: func_args![value: value!("Hello world.")],
266 want: Ok(value!("Hello world.")),
267 tdef: TypeDef::bytes().fallible(),
268 }
269
270 not_encoded{
271 args: func_args![value: value!("Is =? equal to ?= or not?")],
272 want: Ok(value!("Is =? equal to ?= or not?")),
273 tdef: TypeDef::bytes().fallible(),
274 }
275
276 partial{
277 args: func_args![value: value!("Is =? equal or not?")],
278 want: Ok(value!("Is =? equal or not?")),
279 tdef: TypeDef::bytes().fallible(),
280 }
281
282 empty{
283 args: func_args![value: value!("")],
284 want: Ok(value!("")),
285 tdef: TypeDef::bytes().fallible(),
286 }
287
288 ];
289}