vrl/parsing/
ruby_hash.rs

1use crate::compiler::prelude::*;
2use nom::{
3    AsChar, IResult, Input, Parser,
4    branch::alt,
5    bytes::complete::{escaped, tag, take_while, take_while1},
6    character::complete::{char, digit1, satisfy},
7    combinator::{cut, map, opt, recognize, value},
8    error::{Context, ContextError, FromExternalError, ParseError, context},
9    multi::{many1, separated_list0},
10    number::complete::double,
11    sequence::{preceded, separated_pair, terminated},
12};
13use std::num::ParseIntError;
14
15pub fn parse_ruby_hash(input: &str) -> ExpressionResult<Value> {
16    let result = parse_hash(input)
17        .map_err(|err| match err {
18            nom::Err::Error(err) | nom::Err::Failure(err) => {
19                // Create a descriptive error message if possible.
20                nom_language::error::convert_error(input, err)
21            }
22            nom::Err::Incomplete(_) => err.to_string(),
23        })
24        .and_then(|(rest, result)| {
25            rest.trim()
26                .is_empty()
27                .then_some(result)
28                .ok_or_else(|| "could not parse whole line successfully".into())
29        })?;
30
31    Ok(result)
32}
33
34trait HashParseError<T>: ParseError<T> + ContextError<T> + FromExternalError<T, ParseIntError> {}
35impl<T, E: ParseError<T> + ContextError<T> + FromExternalError<T, ParseIntError>> HashParseError<T>
36    for E
37{
38}
39
40fn sp<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
41    let chars = " \t\r\n";
42
43    take_while(move |c| chars.contains(c))(input)
44}
45
46fn parse_inner_str<'a, E: ParseError<&'a str>>(
47    delimiter: char,
48) -> impl FnMut(&'a str) -> IResult<&'a str, &'a str, E> {
49    move |input| {
50        map(
51            opt(escaped(
52                recognize(many1((
53                    take_while1(|c: char| c != '\\' && c != delimiter),
54                    // Consume \something
55                    opt((
56                        satisfy(|c| c == '\\'),
57                        satisfy(|c| c != '\\' && c != delimiter),
58                    )),
59                ))),
60                '\\',
61                satisfy(|c| c == '\\' || c == delimiter),
62            )),
63            |inner| inner.unwrap_or(""),
64        )
65        .parse(input)
66    }
67}
68
69/// Parses text with a given delimiter.
70fn parse_str<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
71    delimiter: char,
72) -> Context<impl Parser<&'a str, Output = &'a str, Error = E>> {
73    context(
74        "string",
75        preceded(
76            char(delimiter),
77            cut(terminated(parse_inner_str(delimiter), char(delimiter))),
78        ),
79    )
80}
81
82fn parse_boolean<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, bool, E> {
83    let parse_true = value(true, tag("true"));
84    let parse_false = value(false, tag("false"));
85
86    alt((parse_true, parse_false)).parse(input)
87}
88
89fn parse_nil<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Value, E> {
90    value(Value::Null, tag("nil")).parse(input)
91}
92
93fn parse_bytes<'a, E: HashParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Bytes, E> {
94    context(
95        "bytes",
96        map(alt((parse_str('"'), parse_str('\''))), |value| {
97            Bytes::copy_from_slice(value.as_bytes())
98        }),
99    )
100    .parse(input)
101}
102
103fn parse_symbol_key<T, E: ParseError<T>>(input: T) -> IResult<T, T, E>
104where
105    T: Input,
106    <T as Input>::Item: AsChar,
107{
108    take_while1(move |item: <T as Input>::Item| {
109        let c = item.as_char();
110        c.is_alphanum() || c == '_'
111    })(input)
112}
113
114fn parse_colon_key<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
115    input: &'a str,
116) -> IResult<&'a str, KeyString, E> {
117    map(
118        preceded(
119            char(':'),
120            alt((parse_str('"'), parse_str('\''), parse_symbol_key)),
121        ),
122        KeyString::from,
123    )
124    .parse(input)
125}
126
127// This parse_key function allows some cases that shouldn't be produced by ruby.
128// For example, { foo => "bar" } shouldn't be parsed but { foo: "bar" } should.
129// Considering that Vector's goal is to parse log produced by other applications
130// and that Vector is NOT a ruby parser, cases like the following one are ignored
131// because they shouldn't appear in the logs.
132// That being said, handling all the corner cases from Ruby's syntax would imply
133// increasing a lot the code complexity which is probably not necessary considering
134// that Vector is not a Ruby parser.
135fn parse_key<'a, E: HashParseError<&'a str>>(input: &'a str) -> IResult<&'a str, KeyString, E> {
136    alt((
137        map(
138            alt((parse_str('"'), parse_str('\''), parse_symbol_key, digit1)),
139            KeyString::from,
140        ),
141        parse_colon_key,
142    ))
143    .parse(input)
144}
145
146fn parse_array<'a, E: HashParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Value, E> {
147    context(
148        "array",
149        map(
150            preceded(
151                char('['),
152                cut(terminated(
153                    separated_list0(preceded(sp, char(',')), parse_value),
154                    preceded(sp, char(']')),
155                )),
156            ),
157            Value::Array,
158        ),
159    )
160    .parse(input)
161}
162
163fn parse_key_value<'a, E: HashParseError<&'a str>>(
164    input: &'a str,
165) -> IResult<&'a str, (KeyString, Value), E> {
166    separated_pair(
167        preceded(sp, parse_key),
168        cut(preceded(sp, alt((tag(":"), tag("=>"))))),
169        parse_value,
170    )
171    .parse(input)
172}
173
174fn parse_hash<'a, E: HashParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Value, E> {
175    context(
176        "map",
177        map(
178            preceded(
179                char('{'),
180                cut(terminated(
181                    map(
182                        separated_list0(preceded(sp, char(',')), parse_key_value),
183                        |tuple_vec| tuple_vec.into_iter().collect(),
184                    ),
185                    preceded(sp, char('}')),
186                )),
187            ),
188            Value::Object,
189        ),
190    )
191    .parse(input)
192}
193
194fn parse_value<'a, E: HashParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Value, E> {
195    preceded(
196        sp,
197        alt((
198            parse_nil,
199            parse_hash,
200            parse_array,
201            map(parse_colon_key, Value::from),
202            map(parse_bytes, Value::Bytes),
203            map(double, |value| Value::Float(NotNan::new(value).unwrap())),
204            map(parse_boolean, Value::Boolean),
205        )),
206    )
207    .parse(input)
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213
214    #[test]
215    fn test_parse_empty_object() {
216        let result = parse_ruby_hash("{}").unwrap();
217        assert!(result.is_object());
218    }
219
220    #[test]
221    fn test_parse_arrow_empty_array() {
222        parse_ruby_hash("{ :array => [] }").unwrap();
223    }
224
225    #[test]
226    fn test_parse_symbol_key() {
227        let result = parse_ruby_hash(r#"{ :key => "foo", :number => 500 }"#).unwrap();
228        assert!(result.is_object());
229        let result = result.as_object().unwrap();
230        let value = result.get("key").unwrap();
231        assert!(value.is_bytes());
232        assert_eq!(value.as_bytes().unwrap(), "foo");
233        assert!(result.get("number").unwrap().is_float());
234    }
235
236    #[test]
237    fn test_parse_symbol_colon_separator() {
238        let result = parse_ruby_hash(r#"{ key: "foo" }"#).unwrap();
239        assert!(result.is_object());
240        let result = result.as_object().unwrap();
241        let value = result.get("key").unwrap();
242        assert!(value.is_bytes());
243        assert_eq!(value.as_bytes().unwrap(), "foo");
244    }
245
246    #[test]
247    fn test_parse_arrow_object() {
248        let result = parse_ruby_hash(
249            r#"{ "hello" => "world", "number" => 42, "float" => 4.2, "array" => [1, 2.3], "object" => { "nope" => nil } }"#,
250        )
251            .unwrap();
252        assert!(result.is_object());
253        let result = result.as_object().unwrap();
254        assert!(result.get("hello").unwrap().is_bytes());
255        assert!(result.get("number").unwrap().is_float());
256        assert!(result.get("float").unwrap().is_float());
257        assert!(result.get("array").unwrap().is_array());
258        assert!(result.get("object").unwrap().is_object());
259        let child = result.get("object").unwrap().as_object().unwrap();
260        assert!(child.get("nope").unwrap().is_null());
261    }
262
263    #[test]
264    fn test_parse_arrow_object_key_number() {
265        let result = parse_ruby_hash(r#"{ 42 => "hello world" }"#).unwrap();
266        assert!(result.is_object());
267        let result = result.as_object().unwrap();
268        assert!(result.get("42").unwrap().is_bytes());
269    }
270
271    #[test]
272    fn test_parse_arrow_object_key_colon() {
273        let result = parse_ruby_hash(
274            r#"{ :colon => "hello world", :"double" => "quote", :'simple' => "quote" }"#,
275        )
276        .unwrap();
277        assert!(result.is_object());
278        let result = result.as_object().unwrap();
279        assert!(result.get("colon").unwrap().is_bytes());
280        assert!(result.get("double").unwrap().is_bytes());
281        assert!(result.get("simple").unwrap().is_bytes());
282    }
283
284    #[test]
285    fn test_parse_arrow_object_key_underscore() {
286        let result = parse_ruby_hash(r#"{ :with_underscore => "hello world" }"#).unwrap();
287        assert!(result.is_object());
288        let result = result.as_object().unwrap();
289        assert!(result.get("with_underscore").unwrap().is_bytes());
290    }
291
292    #[test]
293    fn test_parse_colon_object_double_quote() {
294        let result = parse_ruby_hash(r#"{ "hello": "world" }"#).unwrap();
295        assert!(result.is_object());
296        let result = result.as_object().unwrap();
297        let value = result.get("hello").unwrap();
298        assert_eq!(value, &Value::Bytes("world".into()));
299    }
300
301    #[test]
302    fn test_parse_colon_object_single_quote() {
303        let result = parse_ruby_hash("{ 'hello': 'world' }").unwrap();
304        assert!(result.is_object());
305        let result = result.as_object().unwrap();
306        let value = result.get("hello").unwrap();
307        assert_eq!(value, &Value::Bytes("world".into()));
308    }
309
310    #[test]
311    fn test_parse_colon_object_no_quote() {
312        let result = parse_ruby_hash(r#"{ hello: "world" }"#).unwrap();
313        assert!(result.is_object());
314        let result = result.as_object().unwrap();
315        let value = result.get("hello").unwrap();
316        assert_eq!(value, &Value::Bytes("world".into()));
317    }
318
319    #[test]
320    fn test_parse_dash() {
321        let result = parse_ruby_hash(r#"{ "with-dash" => "foo" }"#).unwrap();
322        assert!(result.is_object());
323        let result = result.as_object().unwrap();
324        assert!(result.get("with-dash").unwrap().is_bytes());
325    }
326
327    #[test]
328    fn test_parse_quote() {
329        let result = parse_ruby_hash(r#"{ "with'quote" => "and\"double\"quote" }"#).unwrap();
330        assert!(result.is_object());
331        let result = result.as_object().unwrap();
332        let value = result.get("with'quote").unwrap();
333        assert_eq!(value, &Value::Bytes("and\\\"double\\\"quote".into()));
334    }
335
336    #[test]
337    fn test_parse_weird_format() {
338        let result =
339            parse_ruby_hash(r#"{:hello=>"world",'number'=>42,"weird"=>'format\'here'}"#).unwrap();
340        assert!(result.is_object());
341        let result = result.as_object().unwrap();
342        assert!(result.get("hello").unwrap().is_bytes());
343        assert!(result.get("number").unwrap().is_float());
344    }
345
346    #[test]
347    fn test_non_hash() {
348        assert!(parse_ruby_hash(r#""hello world""#).is_err());
349    }
350}