vrl/datadog/grok/
parse_grok.rs

1use super::{
2    grok_filter::apply_filter,
3    parse_grok_rules::{GrokField, GrokRule},
4};
5use crate::path::parse_value_path;
6use crate::value::{ObjectMap, Value};
7use std::collections::BTreeMap;
8
9/// Errors which cause the Datadog grok algorithm to stop processing and not return a parsed result.
10#[derive(thiserror::Error, Debug, PartialEq, Eq)]
11pub enum FatalError {
12    #[error("value does not match any rule")]
13    NoMatch,
14    #[error("failure during regex engine runtime for match of the pattern against the value.")]
15    RegexEngineError,
16}
17
18/// Errors that do not prohibit the Datadog grok algorithm from continuing processing.
19#[derive(thiserror::Error, Debug, PartialEq, Eq)]
20pub enum InternalError {
21    /// When this error is encountered, the value associated with the filter is not parsed into the
22    /// resulting object.
23    #[error("failed to apply filter '{}' to '{}'", .0, .1)]
24    FailedToApplyFilter(String, String),
25}
26
27#[derive(PartialEq, Debug)]
28pub struct ParsedGrokObject {
29    /// Resulting parsed object from the Grok operation.
30    pub parsed: Value,
31    /// List of internal errors that were encounted during the parsing.
32    pub internal_errors: Vec<InternalError>,
33}
34
35/// Parses a given source field value by applying the list of grok rules until the first match found.
36pub fn parse_grok(
37    source_field: &str,
38    grok_rules: &[GrokRule],
39) -> Result<ParsedGrokObject, FatalError> {
40    for rule in grok_rules {
41        match apply_grok_rule(source_field, rule) {
42            Err(FatalError::NoMatch) => continue,
43            other => return other,
44        }
45    }
46    Err(FatalError::NoMatch)
47}
48
49/// Tries to parse a given string with a given grok rule.
50/// Returns a parsed object and any internal errors encountered during operation, or errors
51/// if any were fatal.
52///
53/// Fatal Errors:
54/// - NoMatch - this rule does not match a given string
55/// - FailedToMatch - there was a runtime error while matching the compiled pattern against the source
56///
57/// Internal Errors:
58/// - FailedToApplyFilter - matches the rule, but there was a runtime error while applying on of the filters
59fn apply_grok_rule(source: &str, grok_rule: &GrokRule) -> Result<ParsedGrokObject, FatalError> {
60    let mut parsed = Value::Object(BTreeMap::new());
61    let mut internal_errors = vec![];
62
63    match grok_rule.pattern.match_against(source) {
64        Ok(Some(matches)) => {
65            for (name, match_str) in matches.iter() {
66                if match_str.is_empty() {
67                    continue;
68                }
69
70                let mut value = Some(Value::from(match_str));
71
72                if let Some(GrokField {
73                    lookup: field,
74                    filters,
75                }) = grok_rule.fields.get(name)
76                {
77                    for filter in filters {
78                        if let Some(ref mut v) = value {
79                            value = match apply_filter(v, filter) {
80                                Ok(Value::Null) => None,
81                                Ok(v) if v.is_object() => Some(parse_keys_as_path(v)),
82                                Ok(v) => Some(v),
83                                Err(e) => {
84                                    internal_errors.push(e);
85                                    None
86                                }
87                            };
88                        }
89                    }
90
91                    if let Some(value) = value {
92                        match value {
93                            // root-level maps must be merged
94                            Value::Object(map) if field.is_root() => {
95                                parsed.as_object_mut().expect("root is object").extend(map);
96                            }
97                            // anything else at the root leve must be ignored
98                            _ if field.is_root() => {}
99                            // otherwise just apply VRL lookup insert logic
100                            _ => match parsed.get(field).cloned() {
101                                Some(Value::Array(mut values)) => {
102                                    values.push(value);
103                                    parsed.insert(field, values);
104                                }
105                                Some(v) => {
106                                    parsed.insert(field, Value::Array(vec![v, value]));
107                                }
108                                None => {
109                                    parsed.insert(field, value);
110                                }
111                            },
112                        };
113                    }
114                } else {
115                    // this must be a regex named capturing group (?<name>group),
116                    // where name can only be alphanumeric - thus we do not need to parse field names(no nested fields)
117                    parsed
118                        .as_object_mut()
119                        .expect("parsed value is not an object")
120                        .insert(name.to_string().into(), value.into());
121                }
122            }
123
124            postprocess_value(&mut parsed);
125
126            Ok(ParsedGrokObject {
127                parsed,
128                internal_errors,
129            })
130        }
131        Ok(None) => Err(FatalError::NoMatch),
132        Err(e) => Err(e),
133    }
134}
135
136// parse all internal object keys as path
137fn parse_keys_as_path(value: Value) -> Value {
138    match value {
139        Value::Object(map) => {
140            let mut result = Value::Object(ObjectMap::new());
141            for (k, v) in map.into_iter() {
142                let path = parse_value_path(&k)
143                    .unwrap_or_else(|_| crate::owned_value_path!(&k.to_string()));
144                result.insert(&path, parse_keys_as_path(v));
145            }
146            result
147        }
148        Value::Array(a) => Value::Array(a.into_iter().map(parse_keys_as_path).collect()),
149        v => v,
150    }
151}
152
153/// postprocess parsed values
154fn postprocess_value(value: &mut Value) {
155    // remove empty objects
156    match value {
157        Value::Array(a) => a.iter_mut().for_each(postprocess_value),
158        Value::Object(map) => {
159            map.values_mut().for_each(postprocess_value);
160            map.retain(|_, value| {
161                !matches!(value, Value::Object(v) if v.is_empty()) && !matches!(value, Value::Null)
162            })
163        }
164        _ => {}
165    }
166}
167
168#[cfg(test)]
169mod tests {
170    use crate::btreemap;
171    use crate::value::Value;
172    use chrono::{Datelike, NaiveDate, Timelike, Utc};
173    use ordered_float::NotNan;
174    use tracing_test::traced_test;
175
176    use super::super::parse_grok_rules::parse_grok_rules;
177    use super::*;
178
179    const FIXTURE_ROOT: &str = "tests/data/fixtures/parse_grok";
180
181    #[test]
182    fn parses_simple_grok() {
183        let rules = parse_grok_rules(
184            &[
185                "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}"
186                    .to_string(),
187            ],
188            BTreeMap::new(),
189        )
190        .expect("couldn't parse rules");
191        let parsed = parse_grok("2020-10-02T23:22:12.223222Z info Hello world", &rules)
192            .unwrap()
193            .parsed;
194
195        assert_eq!(
196            parsed,
197            Value::from(btreemap! {
198                "timestamp" => "2020-10-02T23:22:12.223222Z",
199                "level" => "info",
200                "message" => "Hello world"
201            })
202        );
203    }
204
205    #[test]
206    fn parses_complex_grok() {
207        let rules = parse_grok_rules(
208            // patterns
209            &[
210                "%{access.common}".to_string(),
211                r#"%{access.common} (%{number:duration:scale(1000000000)} )?"%{_referer}" "%{_user_agent}"( "%{_x_forwarded_for}")?.*"#.to_string()
212            ],
213            // aliases
214            btreemap! {
215                "access.common" => r#"%{_client_ip} %{_ident} %{_auth} \[%{_date_access}\] "(?>%{_method} |)%{_url}(?> %{_version}|)" %{_status_code} (?>%{_bytes_written}|-)"#.to_string(),
216                "_auth" => r#"%{notSpace:http.auth:nullIf("-")}"#.to_string(),
217                "_bytes_written" => "%{integer:network.bytes_written}".to_string(),
218                "_client_ip" => "%{ipOrHost:network.client.ip}".to_string(),
219                "_version" => r#"HTTP\/%{regex("\\d+\\.\\d+"):http.version}"#.to_string(),
220                "_url" => "%{notSpace:http.url}".to_string(),
221                "_ident" => "%{notSpace:http.ident}".to_string(),
222                "_user_agent" => r#"%{regex("[^\\\"]*"):http.useragent}"#.to_string(),
223                "_referer" => "%{notSpace:http.referer}".to_string(),
224                "_status_code" => "%{integer:http.status_code}".to_string(),
225                "_method" => "%{word:http.method}".to_string(),
226                "_date_access" => "%{notSpace:date_access}".to_string(),
227                "_x_forwarded_for" => r#"%{regex("[^\\\"]*"):http._x_forwarded_for:nullIf("-")}"#.to_string()}).expect("couldn't parse rules");
228
229        let input = r#"127.0.0.1 - frank [13/Jul/2016:10:55:36] "GET /apache_pb.gif HTTP/1.0" 200 2326 0.202 "http://www.perdu.com/" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36" "-""#;
230        let parsed = parse_grok(input, &rules).unwrap().parsed;
231
232        assert_eq!(
233            parsed,
234            Value::from(btreemap! {
235                "date_access" => "13/Jul/2016:10:55:36",
236                "duration" => 202000000,
237                "http" => btreemap! {
238                    "auth" => "frank",
239                    "ident" => "-",
240                    "method" => "GET",
241                    "status_code" => 200,
242                    "url" => "/apache_pb.gif",
243                    "version" => "1.0",
244                    "referer" => "http://www.perdu.com/",
245                    "useragent" => "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
246                },
247                "network" => btreemap! {
248                    "bytes_written" => 2326,
249                    "client" => btreemap! {
250                        "ip" => "127.0.0.1"
251                    }
252                }
253            })
254        );
255    }
256
257    #[test]
258    fn supports_matchers() {
259        test_grok_pattern(vec![
260            ("%{number:field}", "-1.2", Ok(Value::from(-1.2_f64))),
261            ("%{number:field}", "-1", Ok(Value::from(-1))),
262            ("%{numberExt:field}", "-1234e+3", Ok(Value::from(-1234000))),
263            ("%{numberExt:field}", ".1e+3", Ok(Value::from(100))),
264            ("%{integer:field}", "-2", Ok(Value::from(-2))),
265            ("%{integerExt:field}", "+2", Ok(Value::from(2))),
266            ("%{integerExt:field}", "-2", Ok(Value::from(-2))),
267            ("%{integerExt:field}", "-1e+2", Ok(Value::from(-100))),
268            ("%{integerExt:field}", "1234.1e+5", Err(FatalError::NoMatch)),
269        ]);
270    }
271
272    #[test]
273    fn supports_filters() {
274        test_grok_pattern(vec![
275            ("%{data:field:number}", "1.0", Ok(Value::from(1))),
276            ("%{data:field:integer}", "1", Ok(Value::from(1))),
277            (
278                "%{data:field:lowercase}",
279                "aBC",
280                Ok(Value::Bytes("abc".into())),
281            ),
282            (
283                "%{data:field:uppercase}",
284                "Abc",
285                Ok(Value::Bytes("ABC".into())),
286            ),
287            ("%{integer:field:scale(10)}", "1", Ok(Value::from(10))),
288            ("%{number:field:scale(0.5)}", "10.0", Ok(Value::from(5))),
289        ]);
290    }
291
292    fn test_grok_pattern(tests: Vec<(&str, &str, Result<Value, FatalError>)>) {
293        for (filter, k, v) in tests {
294            let v = v.map(|parsed| ParsedGrokObject {
295                parsed,
296                internal_errors: vec![],
297            });
298            let rules =
299                parse_grok_rules(&[filter.to_string()], BTreeMap::new()).unwrap_or_else(|error| {
300                    panic!("failed to parse {k} with filter {filter}: {error}")
301                });
302            let parsed = parse_grok(k, &rules);
303
304            if let Ok(v) = v {
305                assert_eq!(
306                    parsed
307                        .unwrap_or_else(|_| panic!("{filter} does not match {k}"))
308                        .parsed,
309                    Value::from(btreemap! {
310                        "field" =>  v.parsed,
311                    }),
312                    "failed to parse {k} with filter {filter}"
313                );
314            } else {
315                assert_eq!(parsed, v, "failed to parse {k} with filter {filter}");
316            }
317        }
318    }
319
320    fn test_full_grok(tests: Vec<(&str, &str, Result<Value, FatalError>)>) {
321        for (filter, k, v) in tests {
322            let v = v.map(|parsed| ParsedGrokObject {
323                parsed,
324                internal_errors: vec![],
325            });
326            let rules = parse_grok_rules(&[filter.to_string()], BTreeMap::new())
327                .unwrap_or_else(|_| panic!("failed to parse {k} with filter {filter}"));
328            let parsed = parse_grok(k, &rules);
329
330            assert_eq!(parsed, v);
331        }
332    }
333
334    fn test_full_grok_internal_errors(
335        tests: Vec<(&str, &str, Result<ParsedGrokObject, FatalError>)>,
336    ) {
337        for (filter, k, v) in tests {
338            let rules = parse_grok_rules(&[filter.to_string()], BTreeMap::new())
339                .unwrap_or_else(|_| panic!("failed to parse {k} with filter {filter}"));
340            let parsed = parse_grok(k, &rules);
341
342            assert_eq!(parsed, v);
343        }
344    }
345
346    #[test]
347    fn fails_on_unknown_pattern_definition() {
348        assert_eq!(
349            parse_grok_rules(&["%{unknown}".to_string()], BTreeMap::new())
350                .unwrap_err()
351                .to_string(),
352            r#"failed to parse grok expression '(?m)\A%{unknown}\z': The given pattern definition name "unknown" could not be found in the definition map"#
353        );
354    }
355
356    #[test]
357    fn fails_on_unknown_filter() {
358        assert_eq!(
359            parse_grok_rules(
360                &["%{data:field:unknownFilter}".to_string()],
361                BTreeMap::new(),
362            )
363            .unwrap_err()
364            .to_string(),
365            "unknown filter 'unknownFilter'"
366        );
367    }
368
369    #[test]
370    fn fails_on_invalid_matcher_parameter() {
371        assert_eq!(
372            parse_grok_rules(&["%{regex(1):field}".to_string()], BTreeMap::new())
373                .unwrap_err()
374                .to_string(),
375            "invalid arguments for the function 'regex'"
376        );
377    }
378
379    #[test]
380    fn fails_on_invalid_filter_parameter() {
381        assert_eq!(
382            parse_grok_rules(&["%{data:field:scale()}".to_string()], BTreeMap::new())
383                .unwrap_err()
384                .to_string(),
385            "invalid arguments for the function 'scale'"
386        );
387    }
388
389    #[test]
390    fn regex_with_empty_field() {
391        test_grok_pattern(vec![(
392            r#"%{regex("\\d+\\.\\d+")} %{data:field}"#,
393            "1.0 field_value",
394            Ok(Value::from("field_value")),
395        )]);
396    }
397
398    #[test]
399    fn does_not_merge_field_maps() {
400        // only root-level maps are merged
401        test_full_grok(vec![(
402            "'%{data:nested.json:json}' '%{data:nested.json:json}'",
403            r#"'{ "json_field1": "value2" }' '{ "json_field2": "value3" }'"#,
404            Ok(Value::from(btreemap! {
405                "nested" => btreemap! {
406                    "json" =>  Value::Array(vec! [
407                        Value::from(btreemap! { "json_field1" => Value::Bytes("value2".into()) }),
408                        Value::from(btreemap! { "json_field2" => Value::Bytes("value3".into()) }),
409                    ]),
410                }
411            })),
412        )]);
413    }
414
415    // if the root-level value, after filters applied, is a map then merge it at the root level,
416    // otherwise ignore it
417    #[test]
418    fn supports_filters_without_fields() {
419        test_full_grok(vec![
420            (
421                "%{data::json}",
422                r#"{ "json_field1": "value2" }"#,
423                Ok(Value::from(btreemap! {
424                    "json_field1" => Value::Bytes("value2".into()),
425                })),
426            ),
427            // ignore non-map root-level fields
428            (
429                "%{notSpace:standalone_field} %{data::integer}",
430                "value1 1",
431                Ok(Value::from(btreemap! {
432                    "standalone_field" => Value::Bytes("value1".into()),
433                })),
434            ),
435        ]);
436
437        test_full_grok_internal_errors(vec![(
438            "%{notSpace:standalone_field} '%{data::json}' '%{data::json}' %{number::number}",
439            r#"value1 '{ "json_field1": "value2" }' '{ "json_field2": "value3" }' 3"#,
440            Ok(ParsedGrokObject {
441                parsed: Value::from(btreemap! {
442                    "standalone_field" => Value::Bytes("value1".into()),
443                    "json_field1" => Value::Bytes("value2".into()),
444                    "json_field2" => Value::Bytes("value3".into())
445                }),
446
447                internal_errors: vec![InternalError::FailedToApplyFilter(
448                    "Number".to_owned(),
449                    "3".to_owned(),
450                )],
451            }),
452        )]);
453    }
454
455    #[test]
456    fn ignores_field_if_filter_fails() {
457        // empty map for filters like json
458        test_full_grok_internal_errors(vec![(
459            "%{notSpace:field1:integer} %{data:field2:json}",
460            "not_a_number not a json",
461            Ok(ParsedGrokObject {
462                parsed: Value::from(BTreeMap::new()),
463                internal_errors: vec![
464                    InternalError::FailedToApplyFilter(
465                        "Integer".to_owned(),
466                        "\"not_a_number\"".to_owned(),
467                    ),
468                    InternalError::FailedToApplyFilter(
469                        "Json".to_owned(),
470                        "\"not a json\"".to_owned(),
471                    ),
472                ],
473            }),
474        )]);
475    }
476
477    #[test]
478    fn fails_on_no_match() {
479        let rules = parse_grok_rules(
480            &[
481                "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}"
482                    .to_string(),
483            ],
484            BTreeMap::new(),
485        )
486        .expect("couldn't parse rules");
487        let error = parse_grok("an ungrokkable message", &rules).unwrap_err();
488
489        assert_eq!(error, FatalError::NoMatch);
490    }
491
492    #[test]
493    fn fails_on_too_many_match_retries() {
494        let pattern = std::fs::read_to_string(format!(
495            "{FIXTURE_ROOT}/pattern/excessive-match-retries.txt"
496        ))
497        .expect("Failed to read pattern file");
498        let value =
499            std::fs::read_to_string(format!("{FIXTURE_ROOT}/value/excessive-match-retries.txt"))
500                .expect("Failed to read value file");
501
502        let rules = parse_grok_rules(
503            // patterns
504            &[pattern],
505            BTreeMap::new(),
506        )
507        .expect("couldn't parse rules");
508
509        let parsed = parse_grok(&value, &rules);
510
511        assert_eq!(parsed.unwrap_err(), FatalError::RegexEngineError)
512    }
513
514    #[test]
515    fn appends_to_the_same_field() {
516        let rules = parse_grok_rules(
517            &[
518                r#"%{integer:nested.field} %{notSpace:nested.field:uppercase} %{notSpace:nested.field:nullIf("-")}"#
519                    .to_string(),
520            ],
521            BTreeMap::new(),
522        )
523            .expect("couldn't parse rules");
524        let parsed = parse_grok("1 info message", &rules).unwrap().parsed;
525
526        assert_eq!(
527            parsed,
528            Value::from(btreemap! {
529                "nested" => btreemap! {
530                   "field" =>  Value::Array(vec![1.into(), "INFO".into(), "message".into()]),
531                },
532            })
533        );
534    }
535
536    #[test]
537    fn error_on_circular_dependency() {
538        let err = parse_grok_rules(
539            // patterns
540            &["%{pattern1}".to_string()],
541            // aliases with a circular dependency
542            btreemap! {
543            "pattern1" => "%{pattern2}".to_string(),
544            "pattern2" => "%{pattern1}".to_string()},
545        )
546        .unwrap_err();
547        assert_eq!(
548            err.to_string(),
549            "Circular dependency found in the alias 'pattern1'"
550        );
551    }
552
553    #[test]
554    fn extracts_field_with_regex_capture() {
555        test_grok_pattern(vec![(
556            r"(?<field>\w+)",
557            "abc",
558            Ok(Value::Bytes("abc".into())),
559        )]);
560
561        // the group name can only be alphanumeric,
562        // though we don't validate group names(it would be unnecessary overhead at boot-time),
563        // field names are treated as literals, not as lookup paths
564        test_full_grok(vec![(
565            r"(?<nested.field.name>\w+)",
566            "abc",
567            Ok(Value::from(btreemap! {
568                "nested.field.name" => Value::Bytes("abc".into()),
569            })),
570        )]);
571    }
572
573    #[test]
574    fn supports_date_matcher() {
575        let now = Utc::now();
576        let now = NaiveDate::from_ymd_opt(now.year(), now.month(), now.day())
577            .unwrap()
578            .and_hms_opt(12, 13, 14)
579            .unwrap()
580            .and_utc();
581        test_grok_pattern(vec![
582            (
583                r#"%{date("dd/MMM/yyyy"):field}"#,
584                "06/Mar/2013",
585                Ok(Value::Integer(1362528000000)),
586            ),
587            (
588                r#"%{date("EEE MMM dd HH:mm:ss yyyy"):field}"#,
589                "Thu Jun 16 08:29:03 2016",
590                Ok(Value::Integer(1466065743000)),
591            ),
592            (
593                r#"%{date("dd/MMM/yyyy:HH:mm:ss Z"):field}"#,
594                "06/Mar/2013:01:36:30 +0900",
595                Ok(Value::Integer(1362501390000)),
596            ),
597            (
598                r#"%{date("yyyy-MM-dd'T'HH:mm:ss.SSSZ"):field}"#,
599                "2016-11-29T16:21:36.431+0000",
600                Ok(Value::Integer(1480436496431)),
601            ),
602            (
603                r#"%{date("yyyy-MM-dd'T'HH:mm:ss.SSSZZ"):field}"#,
604                "2016-11-29T16:21:36.431+00:00",
605                Ok(Value::Integer(1480436496431)),
606            ),
607            (
608                r#"%{date("dd/MMM/yyyy:HH:mm:ss.SSS"):field}"#,
609                "06/Feb/2009:12:14:14.655",
610                Ok(Value::Integer(1233922454655)),
611            ),
612            (
613                r#"%{date("yyyy-MM-dd HH:mm:ss.SSS z"):field}"#,
614                "2007-08-31 19:22:22.427 CET",
615                Ok(Value::Integer(1188580942427)),
616            ),
617            (
618                r#"%{date("yyyy-MM-dd HH:mm:ss.SSS zzzz"):field}"#,
619                "2007-08-31 19:22:22.427 America/Thule",
620                Ok(Value::Integer(1188598942427)),
621            ),
622            (
623                r#"%{date("yyyy-MM-dd HH:mm:ss.SSS Z"):field}"#,
624                "2007-08-31 19:22:22.427 -03:00",
625                Ok(Value::Integer(1188598942427)),
626            ),
627            (
628                r#"%{date("EEE MMM dd HH:mm:ss yyyy", "Europe/Moscow"):field}"#,
629                "Thu Jun 16 08:29:03 2016",
630                Ok(Value::Integer(1466054943000)),
631            ),
632            (
633                r#"%{date("EEE MMM dd HH:mm:ss yyyy", "UTC+5"):field}"#,
634                "Thu Jun 16 08:29:03 2016",
635                Ok(Value::Integer(1466047743000)),
636            ),
637            (
638                r#"%{date("EEE MMM dd HH:mm:ss yyyy", "+3"):field}"#,
639                "Thu Jun 16 08:29:03 2016",
640                Ok(Value::Integer(1466054943000)),
641            ),
642            (
643                r#"%{date("EEE MMM dd HH:mm:ss yyyy", "+03:00"):field}"#,
644                "Thu Jun 16 08:29:03 2016",
645                Ok(Value::Integer(1466054943000)),
646            ),
647            (
648                r#"%{date("EEE MMM dd HH:mm:ss yyyy", "-0300"):field}"#,
649                "Thu Jun 16 08:29:03 2016",
650                Ok(Value::Integer(1466076543000)),
651            ),
652            (
653                r#"%{date("MMM d y HH:mm:ss z"):field}"#,
654                "Nov 16 2020 13:41:29 GMT",
655                Ok(Value::Integer(1605534089000)),
656            ),
657            (
658                r#"%{date("yyyy-MM-dd HH:mm:ss.SSSS"):field}"#,
659                "2019-11-25 11:21:32.6282",
660                Ok(Value::Integer(1574680892628)),
661            ),
662            (
663                r#"%{date("yyyy-MM-dd'T'HH:mm:ss.SSSZ"):field}"#,
664                "2016-09-02T15:02:29.648Z",
665                Ok(Value::Integer(1472828549648)),
666            ),
667            (
668                r#"%{date("yyMMdd HH:mm:ss"):field}"#,
669                "171113 14:14:20",
670                Ok(Value::Integer(1510582460000)),
671            ),
672            (
673                r#"%{date("M/d/yy HH:mm:ss z"):field}"#,
674                "5/6/18 19:40:59 GMT",
675                Ok(Value::Integer(1525635659000)),
676            ),
677            (
678                r#"%{date("M/d/yy HH:mm:ss z"):field}"#,
679                "11/16/18 19:40:59 GMT",
680                Ok(Value::Integer(1542397259000)),
681            ),
682            (
683                r#"%{date("M/d/yy HH:mm:ss,SSS z"):field}"#,
684                "11/16/18 19:40:59,123 GMT",
685                Ok(Value::Integer(1542397259123)),
686            ),
687            (
688                r#"%{date("M/d/yy HH:mm:ss,SSSS z"):field}"#,
689                "11/16/18 19:40:59,1234 GMT",
690                Ok(Value::Integer(1542397259123)),
691            ),
692            (
693                r#"%{date("M/d/yy HH:mm:ss,SSSSSSSSS z"):field}"#,
694                "11/16/18 19:40:59,123456789 GMT",
695                Ok(Value::Integer(1542397259123)),
696            ),
697            (
698                r#"%{date("M/d/yy HH:mm:ss.SSSS z"):field}"#,
699                "11/16/18 19:40:59.1234 GMT",
700                Ok(Value::Integer(1542397259123)),
701            ),
702            // date is missing - assume the current day
703            (
704                r#"%{date("HH:mm:ss"):field}"#,
705                &format!("{}:{}:{}", now.hour(), now.minute(), now.second()),
706                Ok(Value::Integer(now.timestamp() * 1000)),
707            ),
708            // if the year is missing - assume the current year
709            (
710                r#"%{date("d/M HH:mm:ss"):field}"#,
711                &format!(
712                    "{}/{} {}:{}:{}",
713                    now.day(),
714                    now.month(),
715                    now.hour(),
716                    now.minute(),
717                    now.second()
718                ),
719                Ok(Value::Integer(now.timestamp() * 1000)),
720            ),
721        ]);
722
723        // check error handling
724        assert_eq!(
725            parse_grok_rules(
726                &[r#"%{date("ABC:XYZ"):field}"#.to_string()],
727                BTreeMap::new(),
728            )
729            .unwrap_err()
730            .to_string(),
731            "invalid arguments for the function 'date'"
732        );
733        assert_eq!(
734            parse_grok_rules(
735                &[r#"%{date("EEE MMM dd HH:mm:ss yyyy", "unknown timezone"):field}"#.to_string()],
736                BTreeMap::new(),
737            )
738            .unwrap_err()
739            .to_string(),
740            "invalid arguments for the function 'date'"
741        );
742    }
743
744    #[test]
745    fn supports_array_filter() {
746        test_grok_pattern(vec![
747            (
748                "%{data:field:array}",
749                "[1,2]",
750                Ok(Value::Array(vec!["1".into(), "2".into()])),
751            ),
752            (
753                r#"%{data:field:array("\\t")}"#,
754                "[1\t2]",
755                Ok(Value::Array(vec!["1".into(), "2".into()])),
756            ),
757            (
758                r#"%{data:field:array("[]","\\n")}"#,
759                "[1\n2]",
760                Ok(Value::Array(vec!["1".into(), "2".into()])),
761            ),
762            (
763                r#"%{data:field:array("","-")}"#,
764                "1-2",
765                Ok(Value::Array(vec!["1".into(), "2".into()])),
766            ),
767            (
768                "%{data:field:array(integer)}",
769                "[1,2]",
770                Ok(Value::Array(vec![1.into(), 2.into()])),
771            ),
772            (
773                r#"%{data:field:array(";", integer)}"#,
774                "[1;2]",
775                Ok(Value::Array(vec![1.into(), 2.into()])),
776            ),
777            (
778                r#"%{data:field:array("{}",";", integer)}"#,
779                "{1;2}",
780                Ok(Value::Array(vec![1.into(), 2.into()])),
781            ),
782            (
783                "%{data:field:array(number)}",
784                "[1,2]",
785                Ok(Value::Array(vec![1.into(), 2.into()])),
786            ),
787            (
788                "%{data:field:array(integer)}",
789                "[1,2]",
790                Ok(Value::Array(vec![1.into(), 2.into()])),
791            ),
792            (
793                "%{data:field:array(scale(10))}",
794                "[1,2.1]",
795                Ok(Value::Array(vec![10.into(), 21.into()])),
796            ),
797            (
798                r#"%{data:field:array(";", scale(10))}"#,
799                "[1;2.1]",
800                Ok(Value::Array(vec![10.into(), 21.into()])),
801            ),
802            (
803                r#"%{data:field:array("{}",";", scale(10))}"#,
804                "{1;2.1}",
805                Ok(Value::Array(vec![10.into(), 21.into()])),
806            ),
807        ]);
808
809        test_full_grok_internal_errors(vec![
810            // not an array
811            (
812                "%{data:field:array}",
813                "abc",
814                Ok(ParsedGrokObject {
815                    parsed: Value::from(BTreeMap::new()),
816                    internal_errors: vec![InternalError::FailedToApplyFilter(
817                        "Array(..)".to_owned(),
818                        "\"abc\"".to_owned(),
819                    )],
820                }),
821            ),
822            // failed to apply value filter(values are strings)
823            (
824                "%{data:field:array(scale(10))}",
825                "[a,b]",
826                Ok(ParsedGrokObject {
827                    parsed: Value::from(BTreeMap::new()),
828                    internal_errors: vec![InternalError::FailedToApplyFilter(
829                        "Scale(..)".to_owned(),
830                        "\"a\"".to_owned(),
831                    )],
832                }),
833            ),
834        ]);
835    }
836
837    #[test]
838    fn parses_keyvalue() {
839        test_full_grok(vec![
840            (
841                "%{data::keyvalue}",
842                "key=valueStr",
843                Ok(Value::from(btreemap! {
844                    "key" => "valueStr"
845                })),
846            ),
847            (
848                "%{data::keyvalue}",
849                "key=<valueStr>",
850                Ok(Value::from(btreemap! {
851                    "key" => "valueStr"
852                })),
853            ),
854            (
855                "%{data::keyvalue}",
856                r#""key"="valueStr""#,
857                Ok(Value::from(btreemap! {
858                    "key" => "valueStr"
859                })),
860            ),
861            (
862                "%{data::keyvalue}",
863                "'key'='valueStr'",
864                Ok(Value::from(btreemap! {
865                   "key" => "valueStr"
866                })),
867            ),
868            (
869                "%{data::keyvalue}",
870                "<key>=<valueStr>",
871                Ok(Value::from(btreemap! {
872                    "key" => "valueStr"
873                })),
874            ),
875            (
876                r#"%{data::keyvalue(":")}"#,
877                "key:valueStr",
878                Ok(Value::from(btreemap! {
879                    "key" => "valueStr"
880                })),
881            ),
882            (
883                r#"%{data::keyvalue(":", "/")}"#,
884                r#"key:"/valueStr""#,
885                Ok(Value::from(btreemap! {
886                    "key" => "/valueStr"
887                })),
888            ),
889            (
890                r#"%{data::keyvalue(":", "/")}"#,
891                "/key:/valueStr",
892                Ok(Value::from(btreemap! {
893                    "/key" => "/valueStr"
894                })),
895            ),
896            (
897                r#"%{data::keyvalue(":=", "", "{}")}"#,
898                "key:={valueStr}",
899                Ok(Value::from(btreemap! {
900                    "key" => "valueStr"
901                })),
902            ),
903            // ignore space after the delimiter(comma)
904            (
905                r#"%{data::keyvalue}"#,
906                "key1=value1, key2=value2",
907                Ok(Value::from(btreemap! {
908                    "key1" => "value1",
909                    "key2" => "value2",
910                })),
911            ),
912            // allow space as a legit value character, but trim key/values
913            (
914                r#"%{data::keyvalue("="," ")}"#,
915                "key1=value1, key2 = value 2 ",
916                Ok(Value::from(btreemap! {
917                    "key1" => "value1",
918                    "key2" => "value 2",
919                })),
920            ),
921            (
922                r#"%{data::keyvalue("=", "", "", "|")}"#,
923                "key1=value1|key2=value2",
924                Ok(Value::from(btreemap! {
925                    "key1" => "value1",
926                    "key2" => "value2",
927                })),
928            ),
929            (
930                r#"%{data::keyvalue("=", "", "", "|")}"#,
931                r#"key1="value1"|key2="value2""#,
932                Ok(Value::from(btreemap! {
933                    "key1" => "value1",
934                    "key2" => "value2",
935                })),
936            ),
937            (
938                r#"%{data::keyvalue(":=","","<>")}"#,
939                r#"key1:=valueStr key2:=</valueStr2> key3:="valueStr3""#,
940                Ok(Value::from(btreemap! {
941                    "key1" => "valueStr",
942                    "key2" => "/valueStr2",
943                })),
944            ),
945            (
946                "%{data::keyvalue}",
947                "key1=value1,key2=value2",
948                Ok(Value::from(btreemap! {
949                    "key1" => "value1",
950                    "key2" => "value2",
951                })),
952            ),
953            (
954                "%{data::keyvalue}",
955                "key1=value1;key2=value2",
956                Ok(Value::from(btreemap! {
957                    "key1" => "value1",
958                    "key2" => "value2",
959                })),
960            ),
961            (
962                "%{data::keyvalue}",
963                "key:=valueStr",
964                Ok(Value::from(BTreeMap::new())),
965            ),
966            // empty key or null
967            (
968                "%{data::keyvalue}",
969                "key1= key2=null key3=value3",
970                Ok(Value::from(btreemap! {
971                    "key3" => "value3"
972                })),
973            ),
974            // empty value or null - comma-separated
975            (
976                "%{data::keyvalue}",
977                "key1=,key2=null,key3= ,key4=value4",
978                Ok(Value::from(btreemap! {
979                    "key4" => "value4"
980                })),
981            ),
982            // empty key
983            (
984                "%{data::keyvalue}",
985                "=,=value",
986                Ok(Value::from(BTreeMap::new())),
987            ),
988            // type inference
989            (
990                "%{data::keyvalue}",
991                "float=1.2,boolean=true,null=null,string=abc,integer1=11,integer2=12",
992                Ok(Value::from(btreemap! {
993                    "float" => Value::Float(NotNan::new(1.2).expect("not a float")),
994                    "boolean" => Value::Boolean(true),
995                    "string" => Value::Bytes("abc".into()),
996                    "integer1" => Value::Integer(11),
997                    "integer2" => Value::Integer(12)
998                })),
999            ),
1000            // type inference with extra spaces around field delimiters
1001            (
1002                "%{data::keyvalue}",
1003                "float=1.2 , boolean=true , null=null    ,   string=abc , integer1=11  ,  integer2=12  ",
1004                Ok(Value::from(btreemap! {
1005                    "float" => Value::Float(NotNan::new(1.2).expect("not a float")),
1006                    "boolean" => Value::Boolean(true),
1007                    "string" => Value::Bytes("abc".into()),
1008                    "integer1" => Value::Integer(11),
1009                    "integer2" => Value::Integer(12)
1010                })),
1011            ),
1012            // spaces around key-value delimiter are not allowed
1013            (
1014                "%{data::keyvalue}",
1015                "key = valueStr",
1016                Ok(Value::from(BTreeMap::new())),
1017            ),
1018            (
1019                "%{data::keyvalue}",
1020                "key= valueStr",
1021                Ok(Value::from(BTreeMap::new())),
1022            ),
1023            (
1024                "%{data::keyvalue}",
1025                "key =valueStr",
1026                Ok(Value::from(BTreeMap::new())),
1027            ),
1028            (
1029                r#"%{data::keyvalue(":")}"#,
1030                "kafka_cluster_status:8ca7b736f0aa43e5",
1031                Ok(Value::from(btreemap! {
1032                    "kafka_cluster_status" => "8ca7b736f0aa43e5"
1033                })),
1034            ),
1035            (
1036                "%{data::keyvalue}",
1037                "field=2.0e",
1038                Ok(Value::from(btreemap! {
1039                "field" => "2.0e"
1040                })),
1041            ),
1042            (
1043                r#"%{data::keyvalue("=", "\\w.\\-_@:")}"#,
1044                "IN=eth0 OUT= MAC", // no value
1045                Ok(Value::from(btreemap! {
1046                    "IN" => "eth0"
1047                })),
1048            ),
1049            (
1050                "%{data::keyvalue}",
1051                "db.name=my_db,db.operation=insert",
1052                Ok(Value::from(btreemap! {
1053                    "db" => btreemap! {
1054                        "name" => "my_db",
1055                        "operation" => "insert",
1056                    }
1057                })),
1058            ),
1059            // capture all possilbe key-value pairs from the string
1060            (
1061                "%{data::keyvalue}",
1062                r#" , key1=value1 "key2"="value2",key3=value3 "#,
1063                Ok(Value::from(btreemap! {
1064                    "key1" => "value1",
1065                    "key2" => "value2",
1066                    "key3" => "value3",
1067                })),
1068            ),
1069            (
1070                r#"%{data::keyvalue(": ",",")}"#,
1071                r#"client: 217.92.148.44, server: localhost, request: "HEAD http://174.138.82.103:80/sql/sql-admin/ HTTP/1.1", host: "174.138.82.103""#,
1072                Ok(Value::from(btreemap! {
1073                    "client" => "217.92.148.44",
1074                    "host" => "174.138.82.103",
1075                    "request" => "HEAD http://174.138.82.103:80/sql/sql-admin/ HTTP/1.1",
1076                    "server" => "localhost",
1077                })),
1078            ),
1079            // append values with the same key
1080            (
1081                r#"%{data::keyvalue}"#,
1082                r#"a=1, a=1, a=2"#,
1083                Ok(Value::from(btreemap! {
1084                    "a" => vec![1, 1, 2]
1085                })),
1086            ),
1087            // trim string values
1088            (
1089                r#"%{data::keyvalue("="," ")}"#,
1090                r#"a= foo"#,
1091                Ok(Value::from(btreemap! {
1092                    "a" => "foo"
1093                })),
1094            ),
1095            // ignore if key contains spaces
1096            (
1097                r#"%{data::keyvalue("="," ")}"#,
1098                "a key=value",
1099                Ok(Value::from(btreemap! {})),
1100            ),
1101            // parses valid octal numbers (start with 0) as decimals
1102            (
1103                r#"%{data::keyvalue}"#,
1104                "a=07",
1105                Ok(Value::from(btreemap! {
1106                    "a" => 7
1107                })),
1108            ),
1109            // parses invalid octal numbers (start with 0) as strings
1110            (
1111                r#"%{data::keyvalue}"#,
1112                "a=08",
1113                Ok(Value::from(btreemap! {
1114                    "a" => "08"
1115                })),
1116            ),
1117        ]);
1118    }
1119
1120    #[test]
1121    fn alias_and_main_rule_extract_same_fields_to_array() {
1122        let rules = parse_grok_rules(
1123            // patterns
1124            &["%{notSpace:field:number} %{alias}".to_string()],
1125            // aliases
1126            btreemap! {
1127                "alias" => "%{notSpace:field:integer}".to_string()
1128            },
1129        )
1130        .expect("couldn't parse rules");
1131        let parsed = parse_grok("1 2", &rules).unwrap().parsed;
1132
1133        assert_eq!(
1134            parsed,
1135            Value::from(btreemap! {
1136                 "field" =>  Value::Array(vec![1.into(), 2.into()]),
1137            })
1138        );
1139    }
1140
1141    #[test]
1142    fn alias_with_filter() {
1143        let rules = parse_grok_rules(
1144            // patterns
1145            &["%{alias:field:uppercase}".to_string()],
1146            // aliases
1147            btreemap! {
1148                "alias" => "%{notSpace:subfield1} %{notSpace:subfield2:integer}".to_string()
1149            },
1150        )
1151        .expect("couldn't parse rules");
1152        let parsed = parse_grok("a 1", &rules).unwrap().parsed;
1153
1154        assert_eq!(
1155            parsed,
1156            Value::from(btreemap! {
1157                 "field" =>  Value::Bytes("A 1".into()),
1158                 "subfield1" =>  Value::Bytes("a".into()),
1159                 "subfield2" =>  Value::Integer(1)
1160            })
1161        );
1162    }
1163
1164    #[test]
1165    #[traced_test]
1166    fn does_not_emit_error_log_on_alternatives_with_filters() {
1167        test_full_grok(vec![(
1168            "(%{integer:field_int}|%{data:field_str})",
1169            "abc",
1170            Ok(Value::from(btreemap! {
1171                "field_str" =>  Value::Bytes("abc".into()),
1172            })),
1173        )]);
1174        assert!(!logs_contain("Error applying filter"));
1175    }
1176
1177    #[test]
1178    fn parses_grok_unsafe_field_names() {
1179        test_full_grok(vec![
1180            (
1181                r#"%{data:field["quoted name"]}"#,
1182                "abc",
1183                Ok(Value::from(btreemap! {
1184                "field" => btreemap! {
1185                    "quoted name" => "abc",
1186                    }
1187                })),
1188            ),
1189            (
1190                "%{data:@field-name-with-symbols$}",
1191                "abc",
1192                Ok(Value::from(btreemap! {
1193                "@field-name-with-symbols$" => "abc"})),
1194            ),
1195            (
1196                "%{data:@parent.$child}",
1197                "abc",
1198                Ok(Value::from(btreemap! {
1199                "@parent" => btreemap! {
1200                    "$child" => "abc",
1201                    }
1202                })),
1203            ),
1204        ]);
1205    }
1206
1207    #[test]
1208    fn parses_with_new_lines() {
1209        test_full_grok(vec![
1210            // the DOTALL mode is enabled by default
1211            (
1212                "%{data:field}",
1213                "a\nb",
1214                Ok(Value::from(btreemap! {
1215                    "field" => "a\nb"
1216                })),
1217            ),
1218            // (?s) enables the DOTALL mode
1219            (
1220                "(?s)%{data:field}",
1221                "a\nb",
1222                Ok(Value::from(btreemap! {
1223                    "field" => "a\nb"
1224                })),
1225            ),
1226            (
1227                "%{data:line1}\n%{data:line2}",
1228                "a\nb",
1229                Ok(Value::from(btreemap! {
1230                    "line1" => "a",
1231                    "line2" => "b"
1232                })),
1233            ),
1234            // disable the DOTALL mode with (?-s)
1235            ("(?s)(?-s)%{data:field}", "a\nb", Err(FatalError::NoMatch)),
1236            // disable and then enable the DOTALL mode
1237            (
1238                "(?-s)%{data:field} (?s)%{data:field}",
1239                "abc d\ne",
1240                Ok(Value::from(btreemap! {
1241                    "field" => Value::Array(vec!["abc".into(), "d\ne".into()]),
1242                })),
1243            ),
1244        ]);
1245    }
1246
1247    #[test]
1248    fn supports_rubyhash_filter() {
1249        test_grok_pattern(vec![(
1250            "%{data:field:rubyhash}",
1251            r#"{hello=>"world",'number'=>42.0}"#,
1252            Ok(Value::from(btreemap! {
1253                "hello" => "world",
1254                "number" =>  42.0
1255            })),
1256        )]);
1257    }
1258
1259    #[test]
1260    fn supports_querystring_filter() {
1261        test_grok_pattern(vec![(
1262            "%{data:field:querystring}",
1263            "foo=bar",
1264            Ok(Value::from(btreemap! {
1265                "foo" => "bar",
1266            })),
1267        )]);
1268    }
1269
1270    #[test]
1271    fn supports_boolean_filter() {
1272        test_grok_pattern(vec![
1273            ("%{data:field:boolean}", "True", Ok(Value::Boolean(true))),
1274            (
1275                "%{data:field:boolean}",
1276                "NotTrue",
1277                Ok(Value::Boolean(false)),
1278            ),
1279        ]);
1280    }
1281
1282    #[test]
1283    fn supports_decodeuricomponent_filter() {
1284        test_grok_pattern(vec![(
1285            "%{data:field:decodeuricomponent}",
1286            "%2Fservice%2Ftest",
1287            Ok(Value::Bytes("/service/test".into())),
1288        )]);
1289    }
1290
1291    #[test]
1292    fn supports_xml_filter() {
1293        test_grok_pattern(vec![(
1294            "%{data:field:xml}",
1295            r#"<book category="CHILDREN">
1296                  <title lang="en">Harry Potter</title>
1297                  <author>J K. Rowling</author>
1298                  <year>2005</year>
1299                  <booleanValue>true</booleanValue>
1300                  <nullValue>null</nullValue>
1301                </book>"#,
1302            Ok(Value::from(btreemap! {
1303            "book" => btreemap! {
1304              "year" => "2005",
1305              "category" => "CHILDREN",
1306              "author" => "J K. Rowling",
1307              "booleanValue" => "true",
1308              "nullValue" => "null",
1309              "title" => btreemap! {
1310                "lang" => "en",
1311                "value" => "Harry Potter"
1312              }
1313            }
1314            })),
1315        )]);
1316    }
1317
1318    #[test]
1319    fn parses_sample() {
1320        test_full_grok(vec![(
1321            r#"\[%{date("yyyy-MM-dd HH:mm:ss,SSS"):date}\]\[%{notSpace:level}\s*\]\[%{notSpace:logger.thread_name}-#%{integer:logger.thread_id}\]\[%{notSpace:logger.name}\] .*"#,
1322            r#"[2020-04-03 07:01:55,248][INFO ][exchange-worker-#43][FileWriteAheadLogManager] Started write-ahead log manager [mode=LOG_ONLY]"#,
1323            Ok(Value::from(btreemap! {
1324              "date"=> 1585897315248_i64,
1325              "level"=> "INFO",
1326              "logger"=> btreemap! {
1327                "name"=> "FileWriteAheadLogManager",
1328                "thread_id"=> 43,
1329                "thread_name"=> "exchange-worker"
1330              }
1331            })),
1332        )]);
1333    }
1334
1335    #[test]
1336    fn remove_empty_objects() {
1337        test_full_grok(vec![
1338            (
1339                "%{data::json}",
1340                r#"{"root": {"object": {"empty": {}}, "string": "abc" }}"#,
1341                Ok(Value::Object(btreemap!(
1342                    "root" => btreemap! (
1343                        "string" => "abc"
1344                    )
1345                ))),
1346            ),
1347            (
1348                "%{data:field:json}",
1349                r#"{"root": {"object": {"empty": {}}, "string": "abc" }}"#,
1350                Ok(Value::Object(btreemap!(
1351                    "field" => btreemap!(
1352                        "root" => btreemap! (
1353                            "string" => "abc"
1354                        )
1355                )))),
1356            ),
1357            (
1358                r#"%{notSpace:network.destination.ip:nullIf("-")}"#,
1359                "-",
1360                Ok(Value::Object(btreemap!())),
1361            ),
1362        ]);
1363    }
1364    #[test]
1365    fn parses_json_keys_as_path() {
1366        test_full_grok(vec![(
1367            "%{data::json}",
1368            r#"{"a.b": "c"}"#,
1369            Ok(Value::Object(btreemap!(
1370                "a" => btreemap! (
1371                    "b" => "c"
1372                )
1373            ))),
1374        )]);
1375    }
1376}