vrl/stdlib/
parse_key_value.rs

1use crate::compiler::function::EnumVariant;
2use crate::compiler::prelude::*;
3use crate::value;
4use nom::{
5    self, IResult, Parser,
6    branch::alt,
7    bytes::complete::{escaped, tag, take, take_until},
8    character::complete::{char, satisfy, space0},
9    combinator::{eof, map, opt, peek, rest, verify},
10    error::{ContextError, ParseError},
11    multi::{many_m_n, many0, many1, separated_list1},
12    sequence::{delimited, preceded, terminated},
13};
14use nom_language::error::VerboseError;
15use std::{
16    borrow::Cow,
17    collections::{BTreeMap, btree_map::Entry},
18    iter::Peekable,
19    str::{Chars, FromStr},
20    sync::LazyLock,
21};
22
23static DEFAULT_KEY_VALUE_DELIMITER: LazyLock<Value> =
24    LazyLock::new(|| Value::Bytes(Bytes::from("=")));
25static DEFAULT_FIELD_DELIMITER: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from(" ")));
26static DEFAULT_WHITESPACE: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from("lenient")));
27static DEFAULT_ACCEPT_STANDALONE_KEY: LazyLock<Value> = LazyLock::new(|| Value::Boolean(true));
28
29static WHITESPACE_ENUM: &[EnumVariant] = &[
30    EnumVariant {
31        value: "lenient",
32        description: "Ignore whitespace.",
33    },
34    EnumVariant {
35        value: "strict",
36        description: "Parse whitespace as normal character.",
37    },
38];
39
40static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
41    vec![
42        Parameter::required("value", kind::BYTES, "The string to parse."),
43        Parameter::optional("key_value_delimiter", kind::BYTES, "The string that separates the key from the value.")
44            .default(&DEFAULT_KEY_VALUE_DELIMITER),
45        Parameter::optional("field_delimiter", kind::BYTES, "The string that separates each key-value pair.")
46            .default(&DEFAULT_FIELD_DELIMITER),
47        Parameter::optional("whitespace", kind::BYTES, "Defines the acceptance of unnecessary whitespace surrounding the configured `key_value_delimiter`.")
48            .default(&DEFAULT_WHITESPACE)
49            .enum_variants(WHITESPACE_ENUM),
50        Parameter::optional("accept_standalone_key", kind::BOOLEAN, "Whether a standalone key should be accepted, the resulting object associates such keys with the boolean value `true`.")
51            .default(&DEFAULT_ACCEPT_STANDALONE_KEY),
52    ]
53});
54
55pub fn parse_key_value(
56    bytes: &Value,
57    key_value_delimiter: &Value,
58    field_delimiter: &Value,
59    standalone_key: Value,
60    whitespace: Whitespace,
61) -> Resolved {
62    let bytes = bytes.try_bytes_utf8_lossy()?;
63    let key_value_delimiter = key_value_delimiter.try_bytes_utf8_lossy()?;
64    let field_delimiter = field_delimiter.try_bytes_utf8_lossy()?;
65    let standalone_key = standalone_key.try_boolean()?;
66    let values = parse(
67        &bytes,
68        &key_value_delimiter,
69        &field_delimiter,
70        whitespace,
71        standalone_key,
72    )?;
73
74    // Construct Value::Object by grouping values with the same key into an array.
75    // This logic depends on values not being arrays which is true for this parser.
76    let mut map = BTreeMap::new();
77    for (key, value) in values {
78        match map.entry(key) {
79            Entry::Vacant(entry) => {
80                entry.insert(value);
81            }
82            Entry::Occupied(mut entry) => {
83                if let Value::Boolean(true) = value {
84                    // We are done
85                } else {
86                    let existing = entry.get_mut();
87                    match existing {
88                        // A key without value
89                        Value::Boolean(true) => *existing = value,
90                        Value::Array(array) => array.push(value),
91                        _ => {
92                            let values = vec![std::mem::replace(existing, Value::Null), value];
93                            *existing = Value::Array(values);
94                        }
95                    }
96                }
97            }
98        }
99    }
100    Ok(Value::Object(map))
101}
102
103#[derive(Clone, Copy, Debug)]
104pub struct ParseKeyValue;
105
106impl Function for ParseKeyValue {
107    fn identifier(&self) -> &'static str {
108        "parse_key_value"
109    }
110
111    fn usage(&self) -> &'static str {
112        indoc! {r#"
113            Parses the `value` in key-value format. Also known as [logfmt](https://brandur.org/logfmt).
114
115            * Keys and values can be wrapped with `"`.
116            * `"` characters can be escaped using `\`.
117        "#}
118    }
119
120    fn category(&self) -> &'static str {
121        Category::Parse.as_ref()
122    }
123
124    fn internal_failure_reasons(&self) -> &'static [&'static str] {
125        &["`value` is not a properly formatted key-value string."]
126    }
127
128    fn return_kind(&self) -> u16 {
129        kind::OBJECT
130    }
131
132    fn notices(&self) -> &'static [&'static str] {
133        &[indoc! {"
134            All values are returned as strings or as an array of strings for duplicate keys. We
135            recommend manually coercing values to desired types as you see fit.
136        "}]
137    }
138
139    fn parameters(&self) -> &'static [Parameter] {
140        PARAMETERS.as_slice()
141    }
142
143    fn examples(&self) -> &'static [Example] {
144        &[
145            example! {
146                title: "Parse simple key value pairs",
147                source: r#"parse_key_value!("zork=zook zonk=nork")"#,
148                result: Ok(r#"{"zork": "zook", "zonk": "nork"}"#),
149            },
150            example! {
151                title: "Parse logfmt log",
152                source: indoc! {r#"
153                    parse_key_value!(
154                        "@timestamp=\"Sun Jan 10 16:47:39 EST 2021\" level=info msg=\"Stopping all fetchers\" tag#production=stopping_fetchers id=ConsumerFetcherManager-1382721708341 module=kafka.consumer.ConsumerFetcherManager"
155                    )
156                "#},
157                result: Ok(indoc! {r#"{
158                    "@timestamp": "Sun Jan 10 16:47:39 EST 2021",
159                    "level": "info",
160                    "msg": "Stopping all fetchers",
161                    "tag#production": "stopping_fetchers",
162                    "id": "ConsumerFetcherManager-1382721708341",
163                    "module": "kafka.consumer.ConsumerFetcherManager"
164                }"#}),
165            },
166            example! {
167                title: "Parse comma delimited log",
168                source: indoc! {r#"
169                    parse_key_value!(
170                        "path:\"/cart_link\", host:store.app.com, fwd: \"102.30.171.16\", dyno: web.1, connect:0ms, service:87ms, status:304, bytes:632, protocol:https",
171                        field_delimiter: ",",
172                        key_value_delimiter: ":"
173                    )
174                "#},
175                result: Ok(indoc! {r#"{
176                    "path": "/cart_link",
177                    "host": "store.app.com",
178                    "fwd": "102.30.171.16",
179                    "dyno": "web.1",
180                    "connect": "0ms",
181                    "service": "87ms",
182                    "status": "304",
183                    "bytes": "632",
184                    "protocol": "https"
185                }"#}),
186            },
187            example! {
188                title: "Parse comma delimited log with standalone keys",
189                source: indoc! {r#"
190                    parse_key_value!(
191                        "env:prod,service:backend,region:eu-east1,beta",
192                        field_delimiter: ",",
193                        key_value_delimiter: ":",
194                    )
195                "#},
196                result: Ok(indoc! {r#"{
197                    "env": "prod",
198                    "service": "backend",
199                    "region": "eu-east1",
200                    "beta": true
201                }"#}),
202            },
203            example! {
204                title: "Parse duplicate keys",
205                source: indoc! {r#"
206                    parse_key_value!(
207                        "at=info,method=GET,path=\"/index\",status=200,tags=dev,tags=dummy",
208                        field_delimiter: ",",
209                        key_value_delimiter: "=",
210                    )
211                "#},
212                result: Ok(indoc! {r#"{
213                    "at": "info",
214                    "method": "GET",
215                    "path": "/index",
216                    "status": "200",
217                    "tags": [
218                        "dev",
219                        "dummy"
220                    ]
221                }"#}),
222            },
223            example! {
224                title: "Parse with strict whitespace",
225                source: r#"parse_key_value!(s'app=my-app ip=1.2.3.4 user= msg=hello-world', whitespace: "strict")"#,
226                result: Ok(
227                    r#"{"app": "my-app", "ip": "1.2.3.4", "user": "", "msg": "hello-world"}"#,
228                ),
229            },
230        ]
231    }
232
233    fn compile(
234        &self,
235        state: &state::TypeState,
236        _ctx: &mut FunctionCompileContext,
237        arguments: ArgumentList,
238    ) -> Compiled {
239        let value = arguments.required("value");
240
241        let key_value_delimiter = arguments.optional("key_value_delimiter");
242
243        let field_delimiter = arguments.optional("field_delimiter");
244
245        let whitespace = arguments
246            .optional_enum("whitespace", &Whitespace::all_value(), state)?
247            .unwrap_or_else(|| DEFAULT_WHITESPACE.clone())
248            .try_bytes_utf8_lossy()
249            .map(|s| Whitespace::from_str(&s).expect("validated enum"))
250            .expect("whitespace not bytes");
251
252        let standalone_key = arguments.optional("accept_standalone_key");
253
254        Ok(ParseKeyValueFn {
255            value,
256            key_value_delimiter,
257            field_delimiter,
258            whitespace,
259            standalone_key,
260        }
261        .as_expr())
262    }
263}
264
265#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
266pub(crate) enum Whitespace {
267    Strict,
268    #[default]
269    Lenient,
270}
271
272impl Whitespace {
273    fn all_value() -> Vec<Value> {
274        use Whitespace::{Lenient, Strict};
275
276        vec![Strict, Lenient]
277            .into_iter()
278            .map(|u| u.as_str().into())
279            .collect::<Vec<_>>()
280    }
281
282    const fn as_str(self) -> &'static str {
283        use Whitespace::{Lenient, Strict};
284
285        match self {
286            Strict => "strict",
287            Lenient => "lenient",
288        }
289    }
290}
291
292impl FromStr for Whitespace {
293    type Err = &'static str;
294
295    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
296        use Whitespace::{Lenient, Strict};
297
298        match s {
299            "strict" => Ok(Strict),
300            "lenient" => Ok(Lenient),
301            _ => Err("unknown whitespace variant"),
302        }
303    }
304}
305
306#[derive(Clone, Debug)]
307pub(crate) struct ParseKeyValueFn {
308    pub(crate) value: Box<dyn Expression>,
309    pub(crate) key_value_delimiter: Option<Box<dyn Expression>>,
310    pub(crate) field_delimiter: Option<Box<dyn Expression>>,
311    pub(crate) whitespace: Whitespace,
312    pub(crate) standalone_key: Option<Box<dyn Expression>>,
313}
314
315impl FunctionExpression for ParseKeyValueFn {
316    fn resolve(&self, ctx: &mut Context) -> Resolved {
317        let bytes = self.value.resolve(ctx)?;
318        let key_value_delimiter = self
319            .key_value_delimiter
320            .map_resolve_with_default(ctx, || DEFAULT_KEY_VALUE_DELIMITER.clone())?;
321        let field_delimiter = self
322            .field_delimiter
323            .map_resolve_with_default(ctx, || DEFAULT_FIELD_DELIMITER.clone())?;
324        let standalone_key = self
325            .standalone_key
326            .map_resolve_with_default(ctx, || DEFAULT_ACCEPT_STANDALONE_KEY.clone())?;
327        let whitespace = self.whitespace;
328
329        parse_key_value(
330            &bytes,
331            &key_value_delimiter,
332            &field_delimiter,
333            standalone_key,
334            whitespace,
335        )
336    }
337
338    fn type_def(&self, _: &state::TypeState) -> TypeDef {
339        type_def()
340    }
341}
342
343fn parse<'a>(
344    input: &'a str,
345    key_value_delimiter: &'a str,
346    field_delimiter: &'a str,
347    whitespace: Whitespace,
348    standalone_key: bool,
349) -> ExpressionResult<Vec<(KeyString, Value)>> {
350    let (rest, result) = parse_line(
351        input,
352        key_value_delimiter,
353        field_delimiter,
354        whitespace,
355        standalone_key,
356    )
357    .map_err(|e| match e {
358        nom::Err::Error(e) | nom::Err::Failure(e) => {
359            // Create a descriptive error message if possible.
360            nom_language::error::convert_error(input, e)
361        }
362        nom::Err::Incomplete(_) => e.to_string(),
363    })?;
364
365    if rest.trim().is_empty() {
366        Ok(result)
367    } else {
368        Err("could not parse whole line successfully".into())
369    }
370}
371
372/// Parse the line as a separated list of key value pairs.
373fn parse_line<'a>(
374    input: &'a str,
375    key_value_delimiter: &'a str,
376    field_delimiter: &'a str,
377    whitespace: Whitespace,
378    standalone_key: bool,
379) -> IResult<&'a str, Vec<(KeyString, Value)>, VerboseError<&'a str>> {
380    separated_list1(
381        parse_field_delimiter(field_delimiter),
382        parse_key_value_(
383            key_value_delimiter,
384            field_delimiter,
385            whitespace,
386            standalone_key,
387        ),
388    )
389    .parse(input)
390}
391
392/// Parses the `field_delimiter` between the key/value pairs.
393/// If the `field_delimiter` is a space, we parse as many as we can,
394/// If it is not a space eat any whitespace before our `field_delimiter` as well as the `field_delimiter`.
395fn parse_field_delimiter<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
396    field_delimiter: &'a str,
397) -> impl Fn(&'a str) -> IResult<&'a str, &'a str, E> {
398    move |input| {
399        if field_delimiter == " " {
400            map(many1(tag(field_delimiter)), |_| " ").parse(input)
401        } else {
402            preceded(many0(tag(" ")), tag(field_delimiter)).parse(input)
403        }
404    }
405}
406
407/// Parse a single `key=value` tuple.
408/// Always accepts `key=`
409/// Accept standalone `key` if `standalone_key` is `true`
410fn parse_key_value_<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
411    key_value_delimiter: &'a str,
412    field_delimiter: &'a str,
413    whitespace: Whitespace,
414    standalone_key: bool,
415) -> impl Fn(&'a str) -> IResult<&'a str, (KeyString, Value), E> {
416    move |input| {
417        map(
418            |input| match whitespace {
419                Whitespace::Strict => (
420                    preceded(
421                        space0,
422                        parse_key(key_value_delimiter, field_delimiter, standalone_key),
423                    ),
424                    many_m_n(usize::from(!standalone_key), 1, tag(key_value_delimiter)),
425                    parse_value(field_delimiter),
426                )
427                    .parse(input),
428                Whitespace::Lenient => (
429                    preceded(
430                        space0,
431                        parse_key(key_value_delimiter, field_delimiter, standalone_key),
432                    ),
433                    many_m_n(
434                        usize::from(!standalone_key),
435                        1,
436                        delimited(space0, tag(key_value_delimiter), space0),
437                    ),
438                    parse_value(field_delimiter),
439                )
440                    .parse(input),
441            },
442            |(field, sep, value): (Cow<'_, str>, Vec<&str>, Value)| {
443                (
444                    field.to_string().into(),
445                    if sep.len() == 1 { value } else { value!(true) },
446                )
447            },
448        )
449        .parse(input)
450    }
451}
452
453fn escape_str(s: &str) -> Cow<'_, str> {
454    if s.contains('\\') {
455        let mut out = String::new();
456        let mut chars = s.chars().peekable();
457
458        while let Some(c) = chars.next() {
459            out.push(escape_char(c, &mut chars));
460        }
461        Cow::Owned(out)
462    } else {
463        Cow::Borrowed(s)
464    }
465}
466
467fn escape_char(c: char, rest: &mut Peekable<Chars>) -> char {
468    if c == '\\' {
469        match rest.peek() {
470            Some('n') => {
471                let _ = rest.next();
472                '\n'
473            }
474            Some('\\') => {
475                let _ = rest.next();
476                '\\'
477            }
478            Some('"') => {
479                let _ = rest.next();
480                '\"'
481            }
482            // Some(_): ignore escape sequences not added by encode_key_value and return the backslash untouched
483            // None: // trailing escape char is a little odd... Might need to error here!
484            Some(_) | None => c,
485        }
486    } else {
487        c
488    }
489}
490
491/// Parses a string delimited by the given character.
492/// Can be escaped using `\`.
493/// The terminator indicates the character that should follow the delimited field.
494/// This captures the situation where a field is not actually delimited but starts with
495/// some text that appears delimited:
496/// `field: "some kind" of value`
497/// We want to error in this situation rather than return a partially parsed field.
498/// An error means the parser will then attempt to parse this as an undelimited field.
499fn parse_delimited<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
500    delimiter: char,
501    field_terminator: &'a str,
502) -> impl Fn(&'a str) -> IResult<&'a str, Cow<'a, str>, E> {
503    move |input| {
504        terminated(
505            delimited(
506                char(delimiter),
507                map(
508                    opt(escaped(
509                        satisfy(|c| c != '\\' && c != delimiter),
510                        '\\',
511                        // match literally any character, there are no invalid escape sequences
512                        take(1usize),
513                    )),
514                    // process the escape sequences that we encode
515                    |inner| inner.map_or(Cow::Borrowed(""), escape_str),
516                ),
517                char(delimiter),
518            ),
519            peek(alt((
520                parse_field_delimiter(field_terminator),
521                preceded(space0, eof),
522            ))),
523        )
524        .parse(input)
525    }
526}
527
528/// An undelimited value is all the text until our `field_delimiter`, or if it is the last value in the line,
529/// just take the rest of the string.
530fn parse_undelimited<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
531    field_delimiter: &'a str,
532) -> impl Fn(&'a str) -> IResult<&'a str, Cow<'a, str>, E> {
533    move |input| {
534        map(alt((take_until(field_delimiter), rest)), |s: &'_ str| {
535            Cow::Borrowed(s.trim())
536        })
537        .parse(input)
538    }
539}
540
541/// Parses the value.
542/// The value has two parsing strategies.
543///
544/// 1. Parse as a delimited field - currently the delimiter is hardcoded to a `"`.
545/// 2. If it does not start with one of the trim values, it is not a delimited field and we parse up to
546///    the next `field_delimiter` or the eof.
547///
548fn parse_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
549    field_delimiter: &'a str,
550) -> impl Fn(&'a str) -> IResult<&'a str, Value, E> {
551    move |input| {
552        map(
553            alt((
554                parse_delimited('\'', field_delimiter),
555                parse_delimited('"', field_delimiter),
556                parse_undelimited(field_delimiter),
557            )),
558            Into::into,
559        )
560        .parse(input)
561    }
562}
563
564type ParseKeyIResult<'a, E> = IResult<&'a str, Cow<'a, str>, E>;
565
566/// Parses the key.
567/// Overall parsing strategies are the same as `parse_value`, but we don't need to convert the result to a `Value`.
568/// Standalone key are handled here so a quoted standalone key that contains a delimiter will be dealt with correctly.
569fn parse_key<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
570    key_value_delimiter: &'a str,
571    field_delimiter: &'a str,
572    standalone_key: bool,
573) -> Box<dyn Fn(&'a str) -> ParseKeyIResult<'a, E> + 'a> {
574    if standalone_key {
575        Box::new(move |input| {
576            verify(
577                alt((
578                    parse_delimited('\'', key_value_delimiter),
579                    parse_delimited('\'', field_delimiter),
580                    parse_delimited('"', key_value_delimiter),
581                    parse_delimited('"', field_delimiter),
582                    verify(parse_undelimited(key_value_delimiter), |s: &str| {
583                        !s.is_empty() && !s.contains(field_delimiter)
584                    }),
585                    parse_undelimited(field_delimiter),
586                )),
587                |key: &str| !key.is_empty(),
588            )
589            .parse(input)
590        })
591    } else {
592        Box::new(move |input| {
593            verify(
594                alt((
595                    parse_delimited('\'', key_value_delimiter),
596                    parse_delimited('"', key_value_delimiter),
597                    parse_undelimited(key_value_delimiter),
598                )),
599                |key: &str| !key.is_empty(),
600            )
601            .parse(input)
602        })
603    }
604}
605
606fn type_def() -> TypeDef {
607    TypeDef::object(Collection::from_unknown(
608        Kind::boolean() | Kind::bytes() | Kind::array(Collection::from_unknown(Kind::bytes())),
609    ))
610    .fallible()
611}
612
613#[cfg(test)]
614mod test {
615    use super::*;
616
617    #[test]
618    fn test_quote_and_escape_char() {
619        assert_eq!(
620            Ok(vec![("key".to_string().into(), r"a\a".into()),]),
621            parse(r#"key="a\a""#, "=", " ", Whitespace::Strict, true,)
622        );
623
624        assert_eq!(
625            Ok(vec![(r"a\ a".to_string().into(), "val".into()),]),
626            parse(r#""a\ a"=val"#, "=", " ", Whitespace::Strict, true,)
627        );
628    }
629
630    #[test]
631    fn test_parse() {
632        assert_eq!(
633            Ok(vec![
634                ("ook".to_string().into(), "pook".into()),
635                (
636                    "@timestamp".to_string().into(),
637                    "2020-12-31T12:43:22.2322232Z".into()
638                ),
639                ("key#hash".to_string().into(), "value".into()),
640                (
641                    "key=with=special=characters".to_string().into(),
642                    "value".into()
643                ),
644                ("key".to_string().into(), "with special=characters".into()),
645            ]),
646            parse(
647                r#"ook=pook @timestamp=2020-12-31T12:43:22.2322232Z key#hash=value "key=with=special=characters"=value key="with special=characters""#,
648                "=",
649                " ",
650                Whitespace::Lenient,
651                false,
652            )
653        );
654    }
655
656    #[test]
657    fn test_parse_key_value() {
658        assert_eq!(
659            Ok(("", ("ook".to_string().into(), "pook".into()))),
660            parse_key_value_::<VerboseError<&str>>("=", " ", Whitespace::Lenient, false)(
661                "ook=pook"
662            )
663        );
664
665        assert_eq!(
666            Ok(("", ("key".to_string().into(), "".into()))),
667            parse_key_value_::<VerboseError<&str>>("=", " ", Whitespace::Strict, false)("key=")
668        );
669
670        assert!(
671            parse_key_value_::<VerboseError<&str>>("=", " ", Whitespace::Strict, false)("=value")
672                .is_err()
673        );
674    }
675
676    #[test]
677    fn test_parse_key_values() {
678        assert_eq!(
679            Ok(vec![
680                ("ook".to_string().into(), "pook".into()),
681                ("onk".to_string().into(), "ponk".into())
682            ]),
683            parse("ook=pook onk=ponk", "=", " ", Whitespace::Lenient, false)
684        );
685    }
686
687    #[test]
688    fn test_parse_key_values_strict() {
689        assert_eq!(
690            Ok(vec![
691                ("ook".to_string().into(), "".into()),
692                ("onk".to_string().into(), "ponk".into())
693            ]),
694            parse("ook= onk=ponk", "=", " ", Whitespace::Strict, false)
695        );
696    }
697
698    #[test]
699    fn test_parse_standalone_key() {
700        assert_eq!(
701            Ok(vec![
702                ("foo".to_string().into(), "bar".into()),
703                ("foobar".to_string().into(), value!(true))
704            ]),
705            parse("foo:bar ,   foobar   ", ":", ",", Whitespace::Lenient, true)
706        );
707    }
708
709    #[test]
710    fn test_multiple_standalone_key() {
711        assert_eq!(
712            Ok(vec![
713                ("foo".to_string().into(), "bar".into()),
714                ("foobar".to_string().into(), value!(true)),
715                ("bar".to_string().into(), "baz".into()),
716                ("barfoo".to_string().into(), value!(true)),
717            ]),
718            parse(
719                "foo=bar foobar bar=baz barfoo",
720                "=",
721                " ",
722                Whitespace::Lenient,
723                true
724            )
725        );
726    }
727
728    #[test]
729    fn test_only_standalone_key() {
730        assert_eq!(
731            Ok(vec![
732                ("foo".to_string().into(), value!(true)),
733                ("bar".to_string().into(), value!(true)),
734                ("foobar".to_string().into(), value!(true)),
735                ("baz".to_string().into(), value!(true)),
736                ("barfoo".to_string().into(), value!(true)),
737            ]),
738            parse(
739                "foo bar foobar baz barfoo",
740                "=",
741                " ",
742                Whitespace::Lenient,
743                true
744            )
745        );
746    }
747
748    #[test]
749    fn test_parse_single_standalone_key() {
750        assert_eq!(
751            Ok(vec![("foobar".to_string().into(), value!(true))]),
752            parse("foobar", ":", ",", Whitespace::Lenient, true)
753        );
754    }
755
756    #[test]
757    fn test_parse_standalone_key_strict() {
758        assert_eq!(
759            Ok(vec![
760                ("foo".to_string().into(), "bar".into()),
761                ("foobar".to_string().into(), value!(true))
762            ]),
763            parse("foo:bar ,   foobar   ", ":", ",", Whitespace::Strict, true)
764        );
765    }
766
767    #[test]
768    fn test_parse_tab_delimiter() {
769        let res = parse_field_delimiter::<VerboseError<&str>>("\t")(" \tzonk");
770        assert_eq!(("zonk", "\t"), res.unwrap());
771    }
772
773    #[test]
774    fn test_parse_key() {
775        // delimited
776        assert_eq!(
777            Ok(("", Cow::Borrowed("noog"))),
778            parse_key::<VerboseError<&str>>("=", " ", false)(r#""noog""#)
779        );
780
781        // undelimited
782        assert_eq!(
783            Ok(("", Cow::Borrowed("noog"))),
784            parse_key::<VerboseError<&str>>("=", " ", false)("noog")
785        );
786
787        // delimited with escaped char (1)
788        assert_eq!(
789            Ok(("=baz", Cow::Borrowed(r#"foo " bar"#))),
790            parse_key::<VerboseError<&str>>("=", " ", false)(r#""foo \" bar"=baz"#)
791        );
792
793        // delimited with escaped char (2)
794        assert_eq!(
795            Ok(("=baz", Cow::Borrowed(r#"foo \ " \ bar"#))),
796            parse_key::<VerboseError<&str>>("=", " ", false)(r#""foo \\ \" \ bar"=baz"#)
797        );
798
799        // delimited with escaped char (3)
800        assert_eq!(
801            Ok(("=baz", Cow::Borrowed(r"foo \ bar"))),
802            parse_key::<VerboseError<&str>>("=", " ", false)(r#""foo \ bar"=baz"#)
803        );
804
805        // Standalone key
806        assert_eq!(
807            Ok((" bar=baz", Cow::Borrowed("foo"))),
808            parse_key::<VerboseError<&str>>("=", " ", true)("foo bar=baz")
809        );
810
811        // empty is invalid
812        assert!(parse_key::<VerboseError<&str>>("=", " ", true)("").is_err());
813        assert!(parse_key::<VerboseError<&str>>("=", " ", false)("").is_err());
814
815        // quoted but empty also invalid
816        assert!(parse_key::<VerboseError<&str>>("=", " ", true)(r#""""#).is_err());
817        assert!(parse_key::<VerboseError<&str>>("=", " ", false)(r#""""#).is_err());
818    }
819
820    #[test]
821    fn test_parse_value() {
822        // delimited
823        assert_eq!(
824            Ok(("", "noog".into())),
825            parse_value::<VerboseError<&str>>(" ")(r#""noog""#)
826        );
827
828        // undelimited
829        assert_eq!(
830            Ok(("", "noog".into())),
831            parse_value::<VerboseError<&str>>(" ")("noog")
832        );
833
834        // empty delimited
835        assert_eq!(
836            Ok(("", "".into())),
837            parse_value::<VerboseError<&str>>(" ")(r#""""#)
838        );
839
840        // empty undelimited
841        assert_eq!(
842            Ok(("", "".into())),
843            parse_value::<VerboseError<&str>>(" ")("")
844        );
845    }
846
847    #[test]
848    fn test_parse_delimited_with_single_quotes() {
849        assert_eq!(
850            Ok(("", Cow::Borrowed("test"))),
851            parse_delimited::<VerboseError<&str>>('\'', " ")("'test'")
852        );
853    }
854
855    #[test]
856    fn test_parse_key_values_with_single_quotes() {
857        assert_eq!(
858            Ok(vec![
859                ("key1".to_string().into(), "val1".into()),
860                ("key2".to_string().into(), "val2".into())
861            ]),
862            parse("key1=val1,key2='val2'", "=", ",", Whitespace::Strict, false)
863        );
864    }
865
866    #[test]
867    fn test_parse_key_values_with_single_quotes_and_nested_double_quotes() {
868        assert_eq!(
869            Ok(vec![
870                ("key1".to_string().into(), "val1".into()),
871                (
872                    "key2".to_string().into(),
873                    "some value with \"nested quotes\"".into()
874                )
875            ]),
876            parse(
877                r#"key1=val1,key2='some value with "nested quotes"'"#,
878                "=",
879                ",",
880                Whitespace::Strict,
881                false
882            )
883        );
884    }
885
886    #[test]
887    fn test_parse_delimited_with_internal_quotes() {
888        assert!(parse_delimited::<VerboseError<&str>>('"', "=")(r#""noog" nonk"#).is_err());
889    }
890
891    #[test]
892    fn test_parse_delimited_with_internal_delimiters() {
893        assert_eq!(
894            Ok(("", Cow::Borrowed("noog nonk"))),
895            parse_delimited::<VerboseError<&str>>('"', " ")(r#""noog nonk""#)
896        );
897    }
898
899    #[test]
900    fn test_parse_undelimited_with_quotes() {
901        assert_eq!(
902            Ok(("", Cow::Borrowed(r#""noog" nonk"#))),
903            parse_undelimited::<VerboseError<&str>>(":")(r#""noog" nonk"#)
904        );
905    }
906
907    test_function![
908        parse_key_value => ParseKeyValue;
909
910        default {
911            args: func_args! [
912                value: r#"at=info method=GET path=/ host=myapp.herokuapp.com request_id=8601b555-6a83-4c12-8269-97c8e32cdb22 fwd="204.204.204.204" dyno=web.1 connect=1ms service=18ms status=200 bytes=13 tls_version=tls1.1 protocol=http"#,
913            ],
914            want: Ok(value!({at: "info",
915                             method: "GET",
916                             path: "/",
917                             host: "myapp.herokuapp.com",
918                             request_id: "8601b555-6a83-4c12-8269-97c8e32cdb22",
919                             fwd: "204.204.204.204",
920                             dyno: "web.1",
921                             connect: "1ms",
922                             service: "18ms",
923                             status: "200",
924                             bytes: "13",
925                             tls_version: "tls1.1",
926                             protocol: "http"})),
927            tdef: type_def(),
928        }
929
930        logfmt {
931            args: func_args! [
932                value: r#"level=info msg="Stopping all fetchers" tag=stopping_fetchers id=ConsumerFetcherManager-1382721708341 module=kafka.consumer.ConsumerFetcherManager"#
933            ],
934            want: Ok(value!({level: "info",
935                             msg: "Stopping all fetchers",
936                             tag: "stopping_fetchers",
937                             id: "ConsumerFetcherManager-1382721708341",
938                             module: "kafka.consumer.ConsumerFetcherManager"})),
939            tdef: type_def(),
940        }
941
942        // From https://github.com/vectordotdev/vector/issues/5347
943        real_case {
944            args: func_args! [
945                value: r#"SerialNum=100018002000001906146520 GenTime="2019-10-24 14:25:03" SrcIP=10.10.254.2 DstIP=10.10.254.7 Protocol=UDP SrcPort=137 DstPort=137 PolicyID=3 Action=PERMIT Content="Session Backout""#
946            ],
947            want: Ok(value!({SerialNum: "100018002000001906146520",
948                             GenTime: "2019-10-24 14:25:03",
949                             SrcIP: "10.10.254.2",
950                             DstIP: "10.10.254.7",
951                             Protocol: "UDP",
952                             SrcPort: "137",
953                             DstPort: "137",
954                             PolicyID: "3",
955                             Action: "PERMIT",
956                             Content: "Session Backout"})),
957            tdef: type_def(),
958        }
959
960        strict {
961            args: func_args! [
962                value: "foo= bar= tar=data",
963                whitespace: "strict"
964            ],
965            want: Ok(value!({foo: "",
966                             bar: "",
967                             tar: "data"})),
968            tdef: type_def(),
969        }
970
971        spaces {
972            args: func_args! [
973                value: r#""zork one" : "zoog\"zink\"zork"        nonk          : nink"#,
974                key_value_delimiter: ":",
975            ],
976            want: Ok(value!({"zork one": r#"zoog"zink"zork"#,
977                             nonk: "nink"})),
978            tdef: type_def(),
979        }
980
981        delimited {
982            args: func_args! [
983                value: r#""zork one":"zoog\"zink\"zork", nonk:nink"#,
984                key_value_delimiter: ":",
985                field_delimiter: ",",
986            ],
987            want: Ok(value!({"zork one": r#"zoog"zink"zork"#,
988                             nonk: "nink"})),
989            tdef: type_def(),
990        }
991
992        delimited_with_spaces {
993            args: func_args! [
994                value: r#""zork one" : "zoog\"zink\"zork"  ,      nonk          : nink"#,
995                key_value_delimiter: ":",
996                field_delimiter: ",",
997            ],
998            want: Ok(value!({"zork one": r#"zoog"zink"zork"#,
999                             nonk: "nink"})),
1000            tdef: type_def(),
1001        }
1002
1003        multiple_chars {
1004            args: func_args! [
1005                value: r#""zork one" -- "zoog\"zink\"zork"  ||    nonk          -- nink"#,
1006                key_value_delimiter: "--",
1007                field_delimiter: "||",
1008            ],
1009            want: Ok(value!({"zork one": r#"zoog"zink"zork"#,
1010                             nonk: "nink"})),
1011            tdef: type_def(),
1012        }
1013
1014        error {
1015            args: func_args! [
1016                value: "I am not a valid line.",
1017                key_value_delimiter: "--",
1018                field_delimiter: "||",
1019                accept_standalone_key: false,
1020            ],
1021            want: Err("0: at line 1, in Tag:\nI am not a valid line.\n                      ^\n\n1: at line 1, in ManyMN:\nI am not a valid line.\n                      ^\n\n"),
1022            tdef: type_def(),
1023        }
1024
1025        // The following case demonstrates a scenario that could potentially be considered an
1026        // error, but isn't. It is possible that we are missing a separator here (between nink and
1027        // norgle), but it parses it successfully and just assumes all the text after the
1028        // key_value_delimiter is the value since there is no terminator to stop the parsing.
1029        missing_separator {
1030            args: func_args! [
1031                value: "zork: zoog, nonk: nink norgle: noog",
1032                key_value_delimiter: ":",
1033                field_delimiter: ",",
1034            ],
1035            want: Ok(value!({zork: "zoog",
1036                             nonk: "nink norgle: noog"})),
1037            tdef: type_def(),
1038        }
1039
1040        // If the value field is delimited and we miss the separator,
1041        // the following field is consumed by the current one.
1042        missing_separator_delimited {
1043            args: func_args! [
1044                value: r#"zork: zoog, nonk: "nink" norgle: noog"#,
1045                key_value_delimiter: ":",
1046                field_delimiter: ",",
1047            ],
1048            want: Ok(value!({zork: "zoog",
1049                             nonk: r#""nink" norgle: noog"#})),
1050            tdef: type_def(),
1051        }
1052
1053        multi_line_with_quotes {
1054            args: func_args! [
1055                value: "To: tom\ntest: \"tom\" test",
1056                key_value_delimiter: ":",
1057                field_delimiter: "\n",
1058            ],
1059            want: Ok(value!({"To": "tom",
1060                             "test": "\"tom\" test"})),
1061            tdef: type_def(),
1062        }
1063
1064        multi_line_with_quotes_spaces {
1065            args: func_args! [
1066                value: "To: tom\ntest: \"tom test\"  ",
1067                key_value_delimiter: ":",
1068                field_delimiter: "\n",
1069            ],
1070            want: Ok(value!({"To": "tom",
1071                             "test": "tom test"})),
1072            tdef: type_def(),
1073        }
1074
1075        duplicate_keys {
1076            args: func_args! [
1077                value: r#"Cc:"tom" Cc:"bob""#,
1078                key_value_delimiter: ":",
1079                field_delimiter: " ",
1080            ],
1081            want: Ok(value!({"Cc": ["tom", "bob"]})),
1082            tdef: type_def(),
1083        }
1084
1085        duplicate_keys_no_value {
1086            args: func_args! [
1087                value: r#"Cc Cc:"bob""#,
1088                key_value_delimiter: ":",
1089                field_delimiter: " ",
1090            ],
1091            want: Ok(value!({"Cc": "bob"})),
1092            tdef: type_def(),
1093        }
1094
1095        escaped_tab_escapes_in_quoted_value {
1096            args: func_args! [
1097                value: r#"level=info field="escaped tabs \t\t""#,
1098                key_value_delimiter: "=",
1099                field_delimiter: " ",
1100            ],
1101            want: Ok(value!({
1102                "field": "escaped tabs \\t\\t",
1103                "level": "info",
1104            })),
1105            tdef: type_def(),
1106        }
1107
1108        escaped_quote_escapes_in_quoted_value {
1109            args: func_args! [
1110                value: r#"level=info field="quote -> \" <-""#,
1111                key_value_delimiter: "=",
1112                field_delimiter: " ",
1113            ],
1114            want: Ok(value!({
1115                "field": "quote -> \" <-",
1116                "level": "info",
1117            })),
1118            tdef: type_def(),
1119        }
1120
1121        invalid_quotes_in_quoted_value {
1122            args: func_args! [
1123                value: r#"level=error field="no quote here """#,
1124                //                 this extra quote causes  ~
1125                //               the quoted parser to fail
1126                //                        and these quotes ~~
1127                //       cause the unquoted parser to fail
1128                //             when there is an empty key!
1129
1130                key_value_delimiter: "=",
1131                field_delimiter: " ",
1132            ],
1133            want: Err("could not parse whole line successfully"),
1134            tdef: type_def(),
1135        }
1136
1137        empty_keys_are_invalid {
1138            args: func_args! [
1139                value: "level=info =(key)",
1140                key_value_delimiter: "=",
1141                field_delimiter: " ",
1142            ],
1143            want: Ok(value!({
1144                "=(key)": true,
1145                "level": "info",
1146            })),
1147            tdef: type_def(),
1148        }
1149
1150        unquoted_field_delimiter_followed_by_key_value_delimiter {
1151            args: func_args! [
1152                value: r"argh=no =",
1153                key_value_delimiter: "=",
1154                field_delimiter: " ",
1155            ],
1156            want: Ok(value!({
1157                "argh": "no",
1158                "=": true,
1159            })),
1160            tdef: type_def(),
1161        }
1162
1163        field_delimiter_followed_by_unpaired_quote_followed_by_key_value_delimiter {
1164            args: func_args! [
1165                value: r"argh=no '=",
1166                key_value_delimiter: "=",
1167                field_delimiter: " ",
1168            ],
1169            want: Ok(value!({
1170                "argh": "no",
1171                "'": "",
1172            })),
1173            tdef: type_def(),
1174        }
1175
1176        quoted_key_value_delimiter {
1177            args: func_args! [
1178                value: r#"argh="no =""#,
1179                key_value_delimiter: "=",
1180                field_delimiter: " ",
1181            ],
1182            want: Ok(value!({
1183                "argh": "no =",
1184            })),
1185            tdef: type_def(),
1186        }
1187
1188        backslash_key {
1189            args: func_args! [
1190                value: r#"\="oh boy""#,
1191                key_value_delimiter: "=",
1192                field_delimiter: " ",
1193            ],
1194            want: Ok(value!({
1195                "\\": "oh boy",
1196            })),
1197            tdef: type_def(),
1198        }
1199    ];
1200}