vrl/core/
tokenize.rs

1use nom::{
2    Parser,
3    branch::alt,
4    bytes::complete::{escaped, is_not, tag},
5    character::complete::{one_of, space0},
6    combinator::{all_consuming, map, opt, rest, verify},
7    error::ErrorKind,
8    multi::many0,
9    sequence::{delimited, terminated},
10};
11
12/// Parses the specified `input` and returns a vector of tokens.
13///
14/// # Panics
15/// Parsing is expected to always succeed. Panics if there is a parsing error.
16pub fn parse(input: &str) -> Vec<&str> {
17    let simple = is_not::<_, _, (&str, ErrorKind)>(" \t[\"");
18    let string = delimited(
19        tag("\""),
20        map(opt(escaped(is_not("\"\\"), '\\', one_of("\"\\"))), |o| {
21            o.unwrap_or("")
22        }),
23        tag("\""),
24    );
25    let bracket = delimited(
26        tag("["),
27        map(opt(escaped(is_not("]\\"), '\\', one_of("]\\"))), |o| {
28            o.unwrap_or("")
29        }),
30        tag("]"),
31    );
32
33    // fall back to returning the rest of the input, if any
34    let remainder = verify(rest, |s: &str| !s.is_empty());
35    let field = alt((bracket, string, simple, remainder));
36
37    all_consuming(many0(terminated(field, space0)))
38        .parse(input)
39        .expect("parser should always succeed")
40        .1
41}
42
43#[cfg(test)]
44mod tests {
45    use super::parse;
46
47    #[test]
48    fn basic() {
49        assert_eq!(parse("foo"), &["foo"]);
50    }
51
52    #[test]
53    fn multiple() {
54        assert_eq!(parse("foo bar"), &["foo", "bar"]);
55    }
56
57    #[test]
58    fn more_space() {
59        assert_eq!(parse("foo\t bar"), &["foo", "bar"]);
60    }
61
62    #[test]
63    fn so_much_space() {
64        assert_eq!(parse("foo  \t bar     baz"), &["foo", "bar", "baz"]);
65    }
66
67    #[test]
68    fn quotes() {
69        assert_eq!(parse(r#"foo "bar baz""#), &["foo", "bar baz"]);
70    }
71
72    #[test]
73    fn empty_quotes() {
74        assert_eq!(parse(r#"foo """#), &["foo", ""]);
75    }
76
77    #[test]
78    fn escaped_quotes() {
79        assert_eq!(
80            parse(r#"foo "bar \" \" baz""#),
81            &["foo", r#"bar \" \" baz"#],
82        );
83    }
84
85    #[test]
86    fn unclosed_quotes() {
87        assert_eq!(parse(r#"foo "bar"#), &["foo", "\"bar"],);
88    }
89
90    #[test]
91    fn brackets() {
92        assert_eq!(parse("[foo.bar = baz] quux"), &["foo.bar = baz", "quux"],);
93    }
94
95    #[test]
96    fn empty_brackets() {
97        assert_eq!(parse("[] quux"), &["", "quux"],);
98    }
99
100    #[test]
101    fn escaped_brackets() {
102        assert_eq!(
103            parse(r#"[foo " [[ \] "" bar] baz"#),
104            &[r#"foo " [[ \] "" bar"#, "baz"],
105        );
106    }
107
108    #[test]
109    fn unclosed_brackets() {
110        assert_eq!(parse("foo [bar"), &["foo", "[bar"],);
111    }
112
113    #[test]
114    fn truncated_field() {
115        assert_eq!(
116            parse("foo bar[baz]: quux"),
117            &["foo", "bar", "baz", ":", "quux"]
118        );
119        assert_eq!(parse("foo bar[baz quux"), &["foo", "bar", "[baz quux"]);
120    }
121
122    #[test]
123    fn dash_field() {
124        assert_eq!(parse("foo - bar"), &["foo", "-", "bar"]);
125    }
126
127    #[test]
128    fn from_fuzzing() {
129        assert_eq!(parse("").len(), 0);
130        assert_eq!(parse("f] bar"), &["f]", "bar"]);
131        assert_eq!(parse("f\" bar"), &["f", "\" bar"]);
132        assert_eq!(parse("f[f bar"), &["f", "[f bar"]);
133        assert_eq!(parse("f\"f bar"), &["f", "\"f bar"]);
134        assert_eq!(parse("[][x"), &["", "[x"]);
135        assert_eq!(parse("x[][x"), &["x", "", "[x"]);
136    }
137}