1use super::{
2 grok_filter::apply_filter,
3 parse_grok_rules::{GrokField, GrokRule},
4};
5use crate::path::parse_value_path;
6use crate::value::{ObjectMap, Value};
7use std::collections::BTreeMap;
8
9#[derive(thiserror::Error, Debug, PartialEq, Eq)]
11pub enum FatalError {
12 #[error("value does not match any rule")]
13 NoMatch,
14 #[error("failure during regex engine runtime for match of the pattern against the value.")]
15 RegexEngineError,
16}
17
18#[derive(thiserror::Error, Debug, PartialEq, Eq)]
20pub enum InternalError {
21 #[error("failed to apply filter '{}' to '{}'", .0, .1)]
24 FailedToApplyFilter(String, String),
25}
26
27#[derive(PartialEq, Debug)]
28pub struct ParsedGrokObject {
29 pub parsed: Value,
31 pub internal_errors: Vec<InternalError>,
33}
34
35pub fn parse_grok(
37 source_field: &str,
38 grok_rules: &[GrokRule],
39) -> Result<ParsedGrokObject, FatalError> {
40 for rule in grok_rules {
41 match apply_grok_rule(source_field, rule) {
42 Err(FatalError::NoMatch) => continue,
43 other => return other,
44 }
45 }
46 Err(FatalError::NoMatch)
47}
48
49fn apply_grok_rule(source: &str, grok_rule: &GrokRule) -> Result<ParsedGrokObject, FatalError> {
60 let mut parsed = Value::Object(BTreeMap::new());
61 let mut internal_errors = vec![];
62
63 match grok_rule.pattern.match_against(source) {
64 Ok(Some(matches)) => {
65 for (name, match_str) in matches.iter() {
66 if match_str.is_empty() {
67 continue;
68 }
69
70 let mut value = Some(Value::from(match_str));
71
72 if let Some(GrokField {
73 lookup: field,
74 filters,
75 }) = grok_rule.fields.get(name)
76 {
77 for filter in filters {
78 if let Some(ref mut v) = value {
79 value = match apply_filter(v, filter) {
80 Ok(Value::Null) => None,
81 Ok(v) if v.is_object() => Some(parse_keys_as_path(v)),
82 Ok(v) => Some(v),
83 Err(e) => {
84 internal_errors.push(e);
85 None
86 }
87 };
88 }
89 }
90
91 if let Some(value) = value {
92 match value {
93 Value::Object(map) if field.is_root() => {
95 parsed.as_object_mut().expect("root is object").extend(map);
96 }
97 _ if field.is_root() => {}
99 _ => match parsed.get(field).cloned() {
101 Some(Value::Array(mut values)) => {
102 values.push(value);
103 parsed.insert(field, values);
104 }
105 Some(v) => {
106 parsed.insert(field, Value::Array(vec![v, value]));
107 }
108 None => {
109 parsed.insert(field, value);
110 }
111 },
112 };
113 }
114 } else {
115 parsed
118 .as_object_mut()
119 .expect("parsed value is not an object")
120 .insert(name.to_string().into(), value.into());
121 }
122 }
123
124 postprocess_value(&mut parsed);
125
126 Ok(ParsedGrokObject {
127 parsed,
128 internal_errors,
129 })
130 }
131 Ok(None) => Err(FatalError::NoMatch),
132 Err(e) => Err(e),
133 }
134}
135
136fn parse_keys_as_path(value: Value) -> Value {
138 match value {
139 Value::Object(map) => {
140 let mut result = Value::Object(ObjectMap::new());
141 for (k, v) in map.into_iter() {
142 let path = parse_value_path(&k)
143 .unwrap_or_else(|_| crate::owned_value_path!(&k.to_string()));
144 result.insert(&path, parse_keys_as_path(v));
145 }
146 result
147 }
148 Value::Array(a) => Value::Array(a.into_iter().map(parse_keys_as_path).collect()),
149 v => v,
150 }
151}
152
153fn postprocess_value(value: &mut Value) {
155 match value {
157 Value::Array(a) => a.iter_mut().for_each(postprocess_value),
158 Value::Object(map) => {
159 map.values_mut().for_each(postprocess_value);
160 map.retain(|_, value| {
161 !matches!(value, Value::Object(v) if v.is_empty()) && !matches!(value, Value::Null)
162 })
163 }
164 _ => {}
165 }
166}
167
168#[cfg(test)]
169mod tests {
170 use crate::btreemap;
171 use crate::value::Value;
172 use chrono::{Datelike, NaiveDate, Timelike, Utc};
173 use ordered_float::NotNan;
174 use tracing_test::traced_test;
175
176 use super::super::parse_grok_rules::parse_grok_rules;
177 use super::*;
178
179 const FIXTURE_ROOT: &str = "tests/data/fixtures/parse_grok";
180
181 #[test]
182 fn parses_simple_grok() {
183 let rules = parse_grok_rules(
184 &[
185 "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}"
186 .to_string(),
187 ],
188 BTreeMap::new(),
189 )
190 .expect("couldn't parse rules");
191 let parsed = parse_grok("2020-10-02T23:22:12.223222Z info Hello world", &rules)
192 .unwrap()
193 .parsed;
194
195 assert_eq!(
196 parsed,
197 Value::from(btreemap! {
198 "timestamp" => "2020-10-02T23:22:12.223222Z",
199 "level" => "info",
200 "message" => "Hello world"
201 })
202 );
203 }
204
205 #[test]
206 fn parses_complex_grok() {
207 let rules = parse_grok_rules(
208 &[
210 "%{access.common}".to_string(),
211 r#"%{access.common} (%{number:duration:scale(1000000000)} )?"%{_referer}" "%{_user_agent}"( "%{_x_forwarded_for}")?.*"#.to_string()
212 ],
213 btreemap! {
215 "access.common" => r#"%{_client_ip} %{_ident} %{_auth} \[%{_date_access}\] "(?>%{_method} |)%{_url}(?> %{_version}|)" %{_status_code} (?>%{_bytes_written}|-)"#.to_string(),
216 "_auth" => r#"%{notSpace:http.auth:nullIf("-")}"#.to_string(),
217 "_bytes_written" => "%{integer:network.bytes_written}".to_string(),
218 "_client_ip" => "%{ipOrHost:network.client.ip}".to_string(),
219 "_version" => r#"HTTP\/%{regex("\\d+\\.\\d+"):http.version}"#.to_string(),
220 "_url" => "%{notSpace:http.url}".to_string(),
221 "_ident" => "%{notSpace:http.ident}".to_string(),
222 "_user_agent" => r#"%{regex("[^\\\"]*"):http.useragent}"#.to_string(),
223 "_referer" => "%{notSpace:http.referer}".to_string(),
224 "_status_code" => "%{integer:http.status_code}".to_string(),
225 "_method" => "%{word:http.method}".to_string(),
226 "_date_access" => "%{notSpace:date_access}".to_string(),
227 "_x_forwarded_for" => r#"%{regex("[^\\\"]*"):http._x_forwarded_for:nullIf("-")}"#.to_string()}).expect("couldn't parse rules");
228
229 let input = r#"127.0.0.1 - frank [13/Jul/2016:10:55:36] "GET /apache_pb.gif HTTP/1.0" 200 2326 0.202 "http://www.perdu.com/" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36" "-""#;
230 let parsed = parse_grok(input, &rules).unwrap().parsed;
231
232 assert_eq!(
233 parsed,
234 Value::from(btreemap! {
235 "date_access" => "13/Jul/2016:10:55:36",
236 "duration" => 202000000,
237 "http" => btreemap! {
238 "auth" => "frank",
239 "ident" => "-",
240 "method" => "GET",
241 "status_code" => 200,
242 "url" => "/apache_pb.gif",
243 "version" => "1.0",
244 "referer" => "http://www.perdu.com/",
245 "useragent" => "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
246 },
247 "network" => btreemap! {
248 "bytes_written" => 2326,
249 "client" => btreemap! {
250 "ip" => "127.0.0.1"
251 }
252 }
253 })
254 );
255 }
256
257 #[test]
258 fn supports_matchers() {
259 test_grok_pattern(vec![
260 ("%{number:field}", "-1.2", Ok(Value::from(-1.2_f64))),
261 ("%{number:field}", "-1", Ok(Value::from(-1))),
262 ("%{numberExt:field}", "-1234e+3", Ok(Value::from(-1234000))),
263 ("%{numberExt:field}", ".1e+3", Ok(Value::from(100))),
264 ("%{integer:field}", "-2", Ok(Value::from(-2))),
265 ("%{integerExt:field}", "+2", Ok(Value::from(2))),
266 ("%{integerExt:field}", "-2", Ok(Value::from(-2))),
267 ("%{integerExt:field}", "-1e+2", Ok(Value::from(-100))),
268 ("%{integerExt:field}", "1234.1e+5", Err(FatalError::NoMatch)),
269 ]);
270 }
271
272 #[test]
273 fn supports_filters() {
274 test_grok_pattern(vec![
275 ("%{data:field:number}", "1.0", Ok(Value::from(1))),
276 ("%{data:field:integer}", "1", Ok(Value::from(1))),
277 (
278 "%{data:field:lowercase}",
279 "aBC",
280 Ok(Value::Bytes("abc".into())),
281 ),
282 (
283 "%{data:field:uppercase}",
284 "Abc",
285 Ok(Value::Bytes("ABC".into())),
286 ),
287 ("%{integer:field:scale(10)}", "1", Ok(Value::from(10))),
288 ("%{number:field:scale(0.5)}", "10.0", Ok(Value::from(5))),
289 ]);
290 }
291
292 fn test_grok_pattern(tests: Vec<(&str, &str, Result<Value, FatalError>)>) {
293 for (filter, k, v) in tests {
294 let v = v.map(|parsed| ParsedGrokObject {
295 parsed,
296 internal_errors: vec![],
297 });
298 let rules =
299 parse_grok_rules(&[filter.to_string()], BTreeMap::new()).unwrap_or_else(|error| {
300 panic!("failed to parse {k} with filter {filter}: {error}")
301 });
302 let parsed = parse_grok(k, &rules);
303
304 if let Ok(v) = v {
305 assert_eq!(
306 parsed
307 .unwrap_or_else(|_| panic!("{filter} does not match {k}"))
308 .parsed,
309 Value::from(btreemap! {
310 "field" => v.parsed,
311 }),
312 "failed to parse {k} with filter {filter}"
313 );
314 } else {
315 assert_eq!(parsed, v, "failed to parse {k} with filter {filter}");
316 }
317 }
318 }
319
320 fn test_full_grok(tests: Vec<(&str, &str, Result<Value, FatalError>)>) {
321 for (filter, k, v) in tests {
322 let v = v.map(|parsed| ParsedGrokObject {
323 parsed,
324 internal_errors: vec![],
325 });
326 let rules = parse_grok_rules(&[filter.to_string()], BTreeMap::new())
327 .unwrap_or_else(|_| panic!("failed to parse {k} with filter {filter}"));
328 let parsed = parse_grok(k, &rules);
329
330 assert_eq!(parsed, v);
331 }
332 }
333
334 fn test_full_grok_internal_errors(
335 tests: Vec<(&str, &str, Result<ParsedGrokObject, FatalError>)>,
336 ) {
337 for (filter, k, v) in tests {
338 let rules = parse_grok_rules(&[filter.to_string()], BTreeMap::new())
339 .unwrap_or_else(|_| panic!("failed to parse {k} with filter {filter}"));
340 let parsed = parse_grok(k, &rules);
341
342 assert_eq!(parsed, v);
343 }
344 }
345
346 #[test]
347 fn fails_on_unknown_pattern_definition() {
348 assert_eq!(
349 parse_grok_rules(&["%{unknown}".to_string()], BTreeMap::new())
350 .unwrap_err()
351 .to_string(),
352 r#"failed to parse grok expression '(?m)\A%{unknown}\z': The given pattern definition name "unknown" could not be found in the definition map"#
353 );
354 }
355
356 #[test]
357 fn fails_on_unknown_filter() {
358 assert_eq!(
359 parse_grok_rules(
360 &["%{data:field:unknownFilter}".to_string()],
361 BTreeMap::new(),
362 )
363 .unwrap_err()
364 .to_string(),
365 "unknown filter 'unknownFilter'"
366 );
367 }
368
369 #[test]
370 fn fails_on_invalid_matcher_parameter() {
371 assert_eq!(
372 parse_grok_rules(&["%{regex(1):field}".to_string()], BTreeMap::new())
373 .unwrap_err()
374 .to_string(),
375 "invalid arguments for the function 'regex'"
376 );
377 }
378
379 #[test]
380 fn fails_on_invalid_filter_parameter() {
381 assert_eq!(
382 parse_grok_rules(&["%{data:field:scale()}".to_string()], BTreeMap::new())
383 .unwrap_err()
384 .to_string(),
385 "invalid arguments for the function 'scale'"
386 );
387 }
388
389 #[test]
390 fn regex_with_empty_field() {
391 test_grok_pattern(vec![(
392 r#"%{regex("\\d+\\.\\d+")} %{data:field}"#,
393 "1.0 field_value",
394 Ok(Value::from("field_value")),
395 )]);
396 }
397
398 #[test]
399 fn does_not_merge_field_maps() {
400 test_full_grok(vec![(
402 "'%{data:nested.json:json}' '%{data:nested.json:json}'",
403 r#"'{ "json_field1": "value2" }' '{ "json_field2": "value3" }'"#,
404 Ok(Value::from(btreemap! {
405 "nested" => btreemap! {
406 "json" => Value::Array(vec! [
407 Value::from(btreemap! { "json_field1" => Value::Bytes("value2".into()) }),
408 Value::from(btreemap! { "json_field2" => Value::Bytes("value3".into()) }),
409 ]),
410 }
411 })),
412 )]);
413 }
414
415 #[test]
418 fn supports_filters_without_fields() {
419 test_full_grok(vec![
420 (
421 "%{data::json}",
422 r#"{ "json_field1": "value2" }"#,
423 Ok(Value::from(btreemap! {
424 "json_field1" => Value::Bytes("value2".into()),
425 })),
426 ),
427 (
429 "%{notSpace:standalone_field} %{data::integer}",
430 "value1 1",
431 Ok(Value::from(btreemap! {
432 "standalone_field" => Value::Bytes("value1".into()),
433 })),
434 ),
435 ]);
436
437 test_full_grok_internal_errors(vec![(
438 "%{notSpace:standalone_field} '%{data::json}' '%{data::json}' %{number::number}",
439 r#"value1 '{ "json_field1": "value2" }' '{ "json_field2": "value3" }' 3"#,
440 Ok(ParsedGrokObject {
441 parsed: Value::from(btreemap! {
442 "standalone_field" => Value::Bytes("value1".into()),
443 "json_field1" => Value::Bytes("value2".into()),
444 "json_field2" => Value::Bytes("value3".into())
445 }),
446
447 internal_errors: vec![InternalError::FailedToApplyFilter(
448 "Number".to_owned(),
449 "3".to_owned(),
450 )],
451 }),
452 )]);
453 }
454
455 #[test]
456 fn ignores_field_if_filter_fails() {
457 test_full_grok_internal_errors(vec![(
459 "%{notSpace:field1:integer} %{data:field2:json}",
460 "not_a_number not a json",
461 Ok(ParsedGrokObject {
462 parsed: Value::from(BTreeMap::new()),
463 internal_errors: vec![
464 InternalError::FailedToApplyFilter(
465 "Integer".to_owned(),
466 "\"not_a_number\"".to_owned(),
467 ),
468 InternalError::FailedToApplyFilter(
469 "Json".to_owned(),
470 "\"not a json\"".to_owned(),
471 ),
472 ],
473 }),
474 )]);
475 }
476
477 #[test]
478 fn fails_on_no_match() {
479 let rules = parse_grok_rules(
480 &[
481 "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}"
482 .to_string(),
483 ],
484 BTreeMap::new(),
485 )
486 .expect("couldn't parse rules");
487 let error = parse_grok("an ungrokkable message", &rules).unwrap_err();
488
489 assert_eq!(error, FatalError::NoMatch);
490 }
491
492 #[test]
493 fn fails_on_too_many_match_retries() {
494 let pattern = std::fs::read_to_string(format!(
495 "{FIXTURE_ROOT}/pattern/excessive-match-retries.txt"
496 ))
497 .expect("Failed to read pattern file");
498 let value =
499 std::fs::read_to_string(format!("{FIXTURE_ROOT}/value/excessive-match-retries.txt"))
500 .expect("Failed to read value file");
501
502 let rules = parse_grok_rules(
503 &[pattern],
505 BTreeMap::new(),
506 )
507 .expect("couldn't parse rules");
508
509 let parsed = parse_grok(&value, &rules);
510
511 assert_eq!(parsed.unwrap_err(), FatalError::RegexEngineError)
512 }
513
514 #[test]
515 fn appends_to_the_same_field() {
516 let rules = parse_grok_rules(
517 &[
518 r#"%{integer:nested.field} %{notSpace:nested.field:uppercase} %{notSpace:nested.field:nullIf("-")}"#
519 .to_string(),
520 ],
521 BTreeMap::new(),
522 )
523 .expect("couldn't parse rules");
524 let parsed = parse_grok("1 info message", &rules).unwrap().parsed;
525
526 assert_eq!(
527 parsed,
528 Value::from(btreemap! {
529 "nested" => btreemap! {
530 "field" => Value::Array(vec![1.into(), "INFO".into(), "message".into()]),
531 },
532 })
533 );
534 }
535
536 #[test]
537 fn error_on_circular_dependency() {
538 let err = parse_grok_rules(
539 &["%{pattern1}".to_string()],
541 btreemap! {
543 "pattern1" => "%{pattern2}".to_string(),
544 "pattern2" => "%{pattern1}".to_string()},
545 )
546 .unwrap_err();
547 assert_eq!(
548 err.to_string(),
549 "Circular dependency found in the alias 'pattern1'"
550 );
551 }
552
553 #[test]
554 fn extracts_field_with_regex_capture() {
555 test_grok_pattern(vec![(
556 r"(?<field>\w+)",
557 "abc",
558 Ok(Value::Bytes("abc".into())),
559 )]);
560
561 test_full_grok(vec![(
565 r"(?<nested.field.name>\w+)",
566 "abc",
567 Ok(Value::from(btreemap! {
568 "nested.field.name" => Value::Bytes("abc".into()),
569 })),
570 )]);
571 }
572
573 #[test]
574 fn supports_date_matcher() {
575 let now = Utc::now();
576 let now = NaiveDate::from_ymd_opt(now.year(), now.month(), now.day())
577 .unwrap()
578 .and_hms_opt(12, 13, 14)
579 .unwrap()
580 .and_utc();
581 test_grok_pattern(vec![
582 (
583 r#"%{date("dd/MMM/yyyy"):field}"#,
584 "06/Mar/2013",
585 Ok(Value::Integer(1362528000000)),
586 ),
587 (
588 r#"%{date("EEE MMM dd HH:mm:ss yyyy"):field}"#,
589 "Thu Jun 16 08:29:03 2016",
590 Ok(Value::Integer(1466065743000)),
591 ),
592 (
593 r#"%{date("dd/MMM/yyyy:HH:mm:ss Z"):field}"#,
594 "06/Mar/2013:01:36:30 +0900",
595 Ok(Value::Integer(1362501390000)),
596 ),
597 (
598 r#"%{date("yyyy-MM-dd'T'HH:mm:ss.SSSZ"):field}"#,
599 "2016-11-29T16:21:36.431+0000",
600 Ok(Value::Integer(1480436496431)),
601 ),
602 (
603 r#"%{date("yyyy-MM-dd'T'HH:mm:ss.SSSZZ"):field}"#,
604 "2016-11-29T16:21:36.431+00:00",
605 Ok(Value::Integer(1480436496431)),
606 ),
607 (
608 r#"%{date("dd/MMM/yyyy:HH:mm:ss.SSS"):field}"#,
609 "06/Feb/2009:12:14:14.655",
610 Ok(Value::Integer(1233922454655)),
611 ),
612 (
613 r#"%{date("yyyy-MM-dd HH:mm:ss.SSS z"):field}"#,
614 "2007-08-31 19:22:22.427 CET",
615 Ok(Value::Integer(1188580942427)),
616 ),
617 (
618 r#"%{date("yyyy-MM-dd HH:mm:ss.SSS zzzz"):field}"#,
619 "2007-08-31 19:22:22.427 America/Thule",
620 Ok(Value::Integer(1188598942427)),
621 ),
622 (
623 r#"%{date("yyyy-MM-dd HH:mm:ss.SSS Z"):field}"#,
624 "2007-08-31 19:22:22.427 -03:00",
625 Ok(Value::Integer(1188598942427)),
626 ),
627 (
628 r#"%{date("EEE MMM dd HH:mm:ss yyyy", "Europe/Moscow"):field}"#,
629 "Thu Jun 16 08:29:03 2016",
630 Ok(Value::Integer(1466054943000)),
631 ),
632 (
633 r#"%{date("EEE MMM dd HH:mm:ss yyyy", "UTC+5"):field}"#,
634 "Thu Jun 16 08:29:03 2016",
635 Ok(Value::Integer(1466047743000)),
636 ),
637 (
638 r#"%{date("EEE MMM dd HH:mm:ss yyyy", "+3"):field}"#,
639 "Thu Jun 16 08:29:03 2016",
640 Ok(Value::Integer(1466054943000)),
641 ),
642 (
643 r#"%{date("EEE MMM dd HH:mm:ss yyyy", "+03:00"):field}"#,
644 "Thu Jun 16 08:29:03 2016",
645 Ok(Value::Integer(1466054943000)),
646 ),
647 (
648 r#"%{date("EEE MMM dd HH:mm:ss yyyy", "-0300"):field}"#,
649 "Thu Jun 16 08:29:03 2016",
650 Ok(Value::Integer(1466076543000)),
651 ),
652 (
653 r#"%{date("MMM d y HH:mm:ss z"):field}"#,
654 "Nov 16 2020 13:41:29 GMT",
655 Ok(Value::Integer(1605534089000)),
656 ),
657 (
658 r#"%{date("yyyy-MM-dd HH:mm:ss.SSSS"):field}"#,
659 "2019-11-25 11:21:32.6282",
660 Ok(Value::Integer(1574680892628)),
661 ),
662 (
663 r#"%{date("yyyy-MM-dd'T'HH:mm:ss.SSSZ"):field}"#,
664 "2016-09-02T15:02:29.648Z",
665 Ok(Value::Integer(1472828549648)),
666 ),
667 (
668 r#"%{date("yyMMdd HH:mm:ss"):field}"#,
669 "171113 14:14:20",
670 Ok(Value::Integer(1510582460000)),
671 ),
672 (
673 r#"%{date("M/d/yy HH:mm:ss z"):field}"#,
674 "5/6/18 19:40:59 GMT",
675 Ok(Value::Integer(1525635659000)),
676 ),
677 (
678 r#"%{date("M/d/yy HH:mm:ss z"):field}"#,
679 "11/16/18 19:40:59 GMT",
680 Ok(Value::Integer(1542397259000)),
681 ),
682 (
683 r#"%{date("M/d/yy HH:mm:ss,SSS z"):field}"#,
684 "11/16/18 19:40:59,123 GMT",
685 Ok(Value::Integer(1542397259123)),
686 ),
687 (
688 r#"%{date("M/d/yy HH:mm:ss,SSSS z"):field}"#,
689 "11/16/18 19:40:59,1234 GMT",
690 Ok(Value::Integer(1542397259123)),
691 ),
692 (
693 r#"%{date("M/d/yy HH:mm:ss,SSSSSSSSS z"):field}"#,
694 "11/16/18 19:40:59,123456789 GMT",
695 Ok(Value::Integer(1542397259123)),
696 ),
697 (
698 r#"%{date("M/d/yy HH:mm:ss.SSSS z"):field}"#,
699 "11/16/18 19:40:59.1234 GMT",
700 Ok(Value::Integer(1542397259123)),
701 ),
702 (
704 r#"%{date("HH:mm:ss"):field}"#,
705 &format!("{}:{}:{}", now.hour(), now.minute(), now.second()),
706 Ok(Value::Integer(now.timestamp() * 1000)),
707 ),
708 (
710 r#"%{date("d/M HH:mm:ss"):field}"#,
711 &format!(
712 "{}/{} {}:{}:{}",
713 now.day(),
714 now.month(),
715 now.hour(),
716 now.minute(),
717 now.second()
718 ),
719 Ok(Value::Integer(now.timestamp() * 1000)),
720 ),
721 ]);
722
723 assert_eq!(
725 parse_grok_rules(
726 &[r#"%{date("ABC:XYZ"):field}"#.to_string()],
727 BTreeMap::new(),
728 )
729 .unwrap_err()
730 .to_string(),
731 "invalid arguments for the function 'date'"
732 );
733 assert_eq!(
734 parse_grok_rules(
735 &[r#"%{date("EEE MMM dd HH:mm:ss yyyy", "unknown timezone"):field}"#.to_string()],
736 BTreeMap::new(),
737 )
738 .unwrap_err()
739 .to_string(),
740 "invalid arguments for the function 'date'"
741 );
742 }
743
744 #[test]
745 fn supports_array_filter() {
746 test_grok_pattern(vec![
747 (
748 "%{data:field:array}",
749 "[1,2]",
750 Ok(Value::Array(vec!["1".into(), "2".into()])),
751 ),
752 (
753 r#"%{data:field:array("\\t")}"#,
754 "[1\t2]",
755 Ok(Value::Array(vec!["1".into(), "2".into()])),
756 ),
757 (
758 r#"%{data:field:array("[]","\\n")}"#,
759 "[1\n2]",
760 Ok(Value::Array(vec!["1".into(), "2".into()])),
761 ),
762 (
763 r#"%{data:field:array("","-")}"#,
764 "1-2",
765 Ok(Value::Array(vec!["1".into(), "2".into()])),
766 ),
767 (
768 "%{data:field:array(integer)}",
769 "[1,2]",
770 Ok(Value::Array(vec![1.into(), 2.into()])),
771 ),
772 (
773 r#"%{data:field:array(";", integer)}"#,
774 "[1;2]",
775 Ok(Value::Array(vec![1.into(), 2.into()])),
776 ),
777 (
778 r#"%{data:field:array("{}",";", integer)}"#,
779 "{1;2}",
780 Ok(Value::Array(vec![1.into(), 2.into()])),
781 ),
782 (
783 "%{data:field:array(number)}",
784 "[1,2]",
785 Ok(Value::Array(vec![1.into(), 2.into()])),
786 ),
787 (
788 "%{data:field:array(integer)}",
789 "[1,2]",
790 Ok(Value::Array(vec![1.into(), 2.into()])),
791 ),
792 (
793 "%{data:field:array(scale(10))}",
794 "[1,2.1]",
795 Ok(Value::Array(vec![10.into(), 21.into()])),
796 ),
797 (
798 r#"%{data:field:array(";", scale(10))}"#,
799 "[1;2.1]",
800 Ok(Value::Array(vec![10.into(), 21.into()])),
801 ),
802 (
803 r#"%{data:field:array("{}",";", scale(10))}"#,
804 "{1;2.1}",
805 Ok(Value::Array(vec![10.into(), 21.into()])),
806 ),
807 ]);
808
809 test_full_grok_internal_errors(vec![
810 (
812 "%{data:field:array}",
813 "abc",
814 Ok(ParsedGrokObject {
815 parsed: Value::from(BTreeMap::new()),
816 internal_errors: vec![InternalError::FailedToApplyFilter(
817 "Array(..)".to_owned(),
818 "\"abc\"".to_owned(),
819 )],
820 }),
821 ),
822 (
824 "%{data:field:array(scale(10))}",
825 "[a,b]",
826 Ok(ParsedGrokObject {
827 parsed: Value::from(BTreeMap::new()),
828 internal_errors: vec![InternalError::FailedToApplyFilter(
829 "Scale(..)".to_owned(),
830 "\"a\"".to_owned(),
831 )],
832 }),
833 ),
834 ]);
835 }
836
837 #[test]
838 fn parses_keyvalue() {
839 test_full_grok(vec![
840 (
841 "%{data::keyvalue}",
842 "key=valueStr",
843 Ok(Value::from(btreemap! {
844 "key" => "valueStr"
845 })),
846 ),
847 (
848 "%{data::keyvalue}",
849 "key=<valueStr>",
850 Ok(Value::from(btreemap! {
851 "key" => "valueStr"
852 })),
853 ),
854 (
855 "%{data::keyvalue}",
856 r#""key"="valueStr""#,
857 Ok(Value::from(btreemap! {
858 "key" => "valueStr"
859 })),
860 ),
861 (
862 "%{data::keyvalue}",
863 "'key'='valueStr'",
864 Ok(Value::from(btreemap! {
865 "key" => "valueStr"
866 })),
867 ),
868 (
869 "%{data::keyvalue}",
870 "<key>=<valueStr>",
871 Ok(Value::from(btreemap! {
872 "key" => "valueStr"
873 })),
874 ),
875 (
876 r#"%{data::keyvalue(":")}"#,
877 "key:valueStr",
878 Ok(Value::from(btreemap! {
879 "key" => "valueStr"
880 })),
881 ),
882 (
883 r#"%{data::keyvalue(":", "/")}"#,
884 r#"key:"/valueStr""#,
885 Ok(Value::from(btreemap! {
886 "key" => "/valueStr"
887 })),
888 ),
889 (
890 r#"%{data::keyvalue(":", "/")}"#,
891 "/key:/valueStr",
892 Ok(Value::from(btreemap! {
893 "/key" => "/valueStr"
894 })),
895 ),
896 (
897 r#"%{data::keyvalue(":=", "", "{}")}"#,
898 "key:={valueStr}",
899 Ok(Value::from(btreemap! {
900 "key" => "valueStr"
901 })),
902 ),
903 (
905 r#"%{data::keyvalue}"#,
906 "key1=value1, key2=value2",
907 Ok(Value::from(btreemap! {
908 "key1" => "value1",
909 "key2" => "value2",
910 })),
911 ),
912 (
914 r#"%{data::keyvalue("="," ")}"#,
915 "key1=value1, key2 = value 2 ",
916 Ok(Value::from(btreemap! {
917 "key1" => "value1",
918 "key2" => "value 2",
919 })),
920 ),
921 (
922 r#"%{data::keyvalue("=", "", "", "|")}"#,
923 "key1=value1|key2=value2",
924 Ok(Value::from(btreemap! {
925 "key1" => "value1",
926 "key2" => "value2",
927 })),
928 ),
929 (
930 r#"%{data::keyvalue("=", "", "", "|")}"#,
931 r#"key1="value1"|key2="value2""#,
932 Ok(Value::from(btreemap! {
933 "key1" => "value1",
934 "key2" => "value2",
935 })),
936 ),
937 (
938 r#"%{data::keyvalue(":=","","<>")}"#,
939 r#"key1:=valueStr key2:=</valueStr2> key3:="valueStr3""#,
940 Ok(Value::from(btreemap! {
941 "key1" => "valueStr",
942 "key2" => "/valueStr2",
943 })),
944 ),
945 (
946 "%{data::keyvalue}",
947 "key1=value1,key2=value2",
948 Ok(Value::from(btreemap! {
949 "key1" => "value1",
950 "key2" => "value2",
951 })),
952 ),
953 (
954 "%{data::keyvalue}",
955 "key1=value1;key2=value2",
956 Ok(Value::from(btreemap! {
957 "key1" => "value1",
958 "key2" => "value2",
959 })),
960 ),
961 (
962 "%{data::keyvalue}",
963 "key:=valueStr",
964 Ok(Value::from(BTreeMap::new())),
965 ),
966 (
968 "%{data::keyvalue}",
969 "key1= key2=null key3=value3",
970 Ok(Value::from(btreemap! {
971 "key3" => "value3"
972 })),
973 ),
974 (
976 "%{data::keyvalue}",
977 "key1=,key2=null,key3= ,key4=value4",
978 Ok(Value::from(btreemap! {
979 "key4" => "value4"
980 })),
981 ),
982 (
984 "%{data::keyvalue}",
985 "=,=value",
986 Ok(Value::from(BTreeMap::new())),
987 ),
988 (
990 "%{data::keyvalue}",
991 "float=1.2,boolean=true,null=null,string=abc,integer1=11,integer2=12",
992 Ok(Value::from(btreemap! {
993 "float" => Value::Float(NotNan::new(1.2).expect("not a float")),
994 "boolean" => Value::Boolean(true),
995 "string" => Value::Bytes("abc".into()),
996 "integer1" => Value::Integer(11),
997 "integer2" => Value::Integer(12)
998 })),
999 ),
1000 (
1002 "%{data::keyvalue}",
1003 "float=1.2 , boolean=true , null=null , string=abc , integer1=11 , integer2=12 ",
1004 Ok(Value::from(btreemap! {
1005 "float" => Value::Float(NotNan::new(1.2).expect("not a float")),
1006 "boolean" => Value::Boolean(true),
1007 "string" => Value::Bytes("abc".into()),
1008 "integer1" => Value::Integer(11),
1009 "integer2" => Value::Integer(12)
1010 })),
1011 ),
1012 (
1014 "%{data::keyvalue}",
1015 "key = valueStr",
1016 Ok(Value::from(BTreeMap::new())),
1017 ),
1018 (
1019 "%{data::keyvalue}",
1020 "key= valueStr",
1021 Ok(Value::from(BTreeMap::new())),
1022 ),
1023 (
1024 "%{data::keyvalue}",
1025 "key =valueStr",
1026 Ok(Value::from(BTreeMap::new())),
1027 ),
1028 (
1029 r#"%{data::keyvalue(":")}"#,
1030 "kafka_cluster_status:8ca7b736f0aa43e5",
1031 Ok(Value::from(btreemap! {
1032 "kafka_cluster_status" => "8ca7b736f0aa43e5"
1033 })),
1034 ),
1035 (
1036 "%{data::keyvalue}",
1037 "field=2.0e",
1038 Ok(Value::from(btreemap! {
1039 "field" => "2.0e"
1040 })),
1041 ),
1042 (
1043 r#"%{data::keyvalue("=", "\\w.\\-_@:")}"#,
1044 "IN=eth0 OUT= MAC", Ok(Value::from(btreemap! {
1046 "IN" => "eth0"
1047 })),
1048 ),
1049 (
1050 "%{data::keyvalue}",
1051 "db.name=my_db,db.operation=insert",
1052 Ok(Value::from(btreemap! {
1053 "db" => btreemap! {
1054 "name" => "my_db",
1055 "operation" => "insert",
1056 }
1057 })),
1058 ),
1059 (
1061 "%{data::keyvalue}",
1062 r#" , key1=value1 "key2"="value2",key3=value3 "#,
1063 Ok(Value::from(btreemap! {
1064 "key1" => "value1",
1065 "key2" => "value2",
1066 "key3" => "value3",
1067 })),
1068 ),
1069 (
1070 r#"%{data::keyvalue(": ",",")}"#,
1071 r#"client: 217.92.148.44, server: localhost, request: "HEAD http://174.138.82.103:80/sql/sql-admin/ HTTP/1.1", host: "174.138.82.103""#,
1072 Ok(Value::from(btreemap! {
1073 "client" => "217.92.148.44",
1074 "host" => "174.138.82.103",
1075 "request" => "HEAD http://174.138.82.103:80/sql/sql-admin/ HTTP/1.1",
1076 "server" => "localhost",
1077 })),
1078 ),
1079 (
1081 r#"%{data::keyvalue}"#,
1082 r#"a=1, a=1, a=2"#,
1083 Ok(Value::from(btreemap! {
1084 "a" => vec![1, 1, 2]
1085 })),
1086 ),
1087 (
1089 r#"%{data::keyvalue("="," ")}"#,
1090 r#"a= foo"#,
1091 Ok(Value::from(btreemap! {
1092 "a" => "foo"
1093 })),
1094 ),
1095 (
1097 r#"%{data::keyvalue("="," ")}"#,
1098 "a key=value",
1099 Ok(Value::from(btreemap! {})),
1100 ),
1101 (
1103 r#"%{data::keyvalue}"#,
1104 "a=07",
1105 Ok(Value::from(btreemap! {
1106 "a" => 7
1107 })),
1108 ),
1109 (
1111 r#"%{data::keyvalue}"#,
1112 "a=08",
1113 Ok(Value::from(btreemap! {
1114 "a" => "08"
1115 })),
1116 ),
1117 ]);
1118 }
1119
1120 #[test]
1121 fn alias_and_main_rule_extract_same_fields_to_array() {
1122 let rules = parse_grok_rules(
1123 &["%{notSpace:field:number} %{alias}".to_string()],
1125 btreemap! {
1127 "alias" => "%{notSpace:field:integer}".to_string()
1128 },
1129 )
1130 .expect("couldn't parse rules");
1131 let parsed = parse_grok("1 2", &rules).unwrap().parsed;
1132
1133 assert_eq!(
1134 parsed,
1135 Value::from(btreemap! {
1136 "field" => Value::Array(vec![1.into(), 2.into()]),
1137 })
1138 );
1139 }
1140
1141 #[test]
1142 fn alias_with_filter() {
1143 let rules = parse_grok_rules(
1144 &["%{alias:field:uppercase}".to_string()],
1146 btreemap! {
1148 "alias" => "%{notSpace:subfield1} %{notSpace:subfield2:integer}".to_string()
1149 },
1150 )
1151 .expect("couldn't parse rules");
1152 let parsed = parse_grok("a 1", &rules).unwrap().parsed;
1153
1154 assert_eq!(
1155 parsed,
1156 Value::from(btreemap! {
1157 "field" => Value::Bytes("A 1".into()),
1158 "subfield1" => Value::Bytes("a".into()),
1159 "subfield2" => Value::Integer(1)
1160 })
1161 );
1162 }
1163
1164 #[test]
1165 #[traced_test]
1166 fn does_not_emit_error_log_on_alternatives_with_filters() {
1167 test_full_grok(vec![(
1168 "(%{integer:field_int}|%{data:field_str})",
1169 "abc",
1170 Ok(Value::from(btreemap! {
1171 "field_str" => Value::Bytes("abc".into()),
1172 })),
1173 )]);
1174 assert!(!logs_contain("Error applying filter"));
1175 }
1176
1177 #[test]
1178 fn parses_grok_unsafe_field_names() {
1179 test_full_grok(vec![
1180 (
1181 r#"%{data:field["quoted name"]}"#,
1182 "abc",
1183 Ok(Value::from(btreemap! {
1184 "field" => btreemap! {
1185 "quoted name" => "abc",
1186 }
1187 })),
1188 ),
1189 (
1190 "%{data:@field-name-with-symbols$}",
1191 "abc",
1192 Ok(Value::from(btreemap! {
1193 "@field-name-with-symbols$" => "abc"})),
1194 ),
1195 (
1196 "%{data:@parent.$child}",
1197 "abc",
1198 Ok(Value::from(btreemap! {
1199 "@parent" => btreemap! {
1200 "$child" => "abc",
1201 }
1202 })),
1203 ),
1204 ]);
1205 }
1206
1207 #[test]
1208 fn parses_with_new_lines() {
1209 test_full_grok(vec![
1210 (
1212 "%{data:field}",
1213 "a\nb",
1214 Ok(Value::from(btreemap! {
1215 "field" => "a\nb"
1216 })),
1217 ),
1218 (
1220 "(?s)%{data:field}",
1221 "a\nb",
1222 Ok(Value::from(btreemap! {
1223 "field" => "a\nb"
1224 })),
1225 ),
1226 (
1227 "%{data:line1}\n%{data:line2}",
1228 "a\nb",
1229 Ok(Value::from(btreemap! {
1230 "line1" => "a",
1231 "line2" => "b"
1232 })),
1233 ),
1234 ("(?s)(?-s)%{data:field}", "a\nb", Err(FatalError::NoMatch)),
1236 (
1238 "(?-s)%{data:field} (?s)%{data:field}",
1239 "abc d\ne",
1240 Ok(Value::from(btreemap! {
1241 "field" => Value::Array(vec!["abc".into(), "d\ne".into()]),
1242 })),
1243 ),
1244 ]);
1245 }
1246
1247 #[test]
1248 fn supports_rubyhash_filter() {
1249 test_grok_pattern(vec![(
1250 "%{data:field:rubyhash}",
1251 r#"{hello=>"world",'number'=>42.0}"#,
1252 Ok(Value::from(btreemap! {
1253 "hello" => "world",
1254 "number" => 42.0
1255 })),
1256 )]);
1257 }
1258
1259 #[test]
1260 fn supports_querystring_filter() {
1261 test_grok_pattern(vec![(
1262 "%{data:field:querystring}",
1263 "foo=bar",
1264 Ok(Value::from(btreemap! {
1265 "foo" => "bar",
1266 })),
1267 )]);
1268 }
1269
1270 #[test]
1271 fn supports_boolean_filter() {
1272 test_grok_pattern(vec![
1273 ("%{data:field:boolean}", "True", Ok(Value::Boolean(true))),
1274 (
1275 "%{data:field:boolean}",
1276 "NotTrue",
1277 Ok(Value::Boolean(false)),
1278 ),
1279 ]);
1280 }
1281
1282 #[test]
1283 fn supports_decodeuricomponent_filter() {
1284 test_grok_pattern(vec![(
1285 "%{data:field:decodeuricomponent}",
1286 "%2Fservice%2Ftest",
1287 Ok(Value::Bytes("/service/test".into())),
1288 )]);
1289 }
1290
1291 #[test]
1292 fn supports_xml_filter() {
1293 test_grok_pattern(vec![(
1294 "%{data:field:xml}",
1295 r#"<book category="CHILDREN">
1296 <title lang="en">Harry Potter</title>
1297 <author>J K. Rowling</author>
1298 <year>2005</year>
1299 <booleanValue>true</booleanValue>
1300 <nullValue>null</nullValue>
1301 </book>"#,
1302 Ok(Value::from(btreemap! {
1303 "book" => btreemap! {
1304 "year" => "2005",
1305 "category" => "CHILDREN",
1306 "author" => "J K. Rowling",
1307 "booleanValue" => "true",
1308 "nullValue" => "null",
1309 "title" => btreemap! {
1310 "lang" => "en",
1311 "value" => "Harry Potter"
1312 }
1313 }
1314 })),
1315 )]);
1316 }
1317
1318 #[test]
1319 fn parses_sample() {
1320 test_full_grok(vec![(
1321 r#"\[%{date("yyyy-MM-dd HH:mm:ss,SSS"):date}\]\[%{notSpace:level}\s*\]\[%{notSpace:logger.thread_name}-#%{integer:logger.thread_id}\]\[%{notSpace:logger.name}\] .*"#,
1322 r#"[2020-04-03 07:01:55,248][INFO ][exchange-worker-#43][FileWriteAheadLogManager] Started write-ahead log manager [mode=LOG_ONLY]"#,
1323 Ok(Value::from(btreemap! {
1324 "date"=> 1585897315248_i64,
1325 "level"=> "INFO",
1326 "logger"=> btreemap! {
1327 "name"=> "FileWriteAheadLogManager",
1328 "thread_id"=> 43,
1329 "thread_name"=> "exchange-worker"
1330 }
1331 })),
1332 )]);
1333 }
1334
1335 #[test]
1336 fn remove_empty_objects() {
1337 test_full_grok(vec![
1338 (
1339 "%{data::json}",
1340 r#"{"root": {"object": {"empty": {}}, "string": "abc" }}"#,
1341 Ok(Value::Object(btreemap!(
1342 "root" => btreemap! (
1343 "string" => "abc"
1344 )
1345 ))),
1346 ),
1347 (
1348 "%{data:field:json}",
1349 r#"{"root": {"object": {"empty": {}}, "string": "abc" }}"#,
1350 Ok(Value::Object(btreemap!(
1351 "field" => btreemap!(
1352 "root" => btreemap! (
1353 "string" => "abc"
1354 )
1355 )))),
1356 ),
1357 (
1358 r#"%{notSpace:network.destination.ip:nullIf("-")}"#,
1359 "-",
1360 Ok(Value::Object(btreemap!())),
1361 ),
1362 ]);
1363 }
1364 #[test]
1365 fn parses_json_keys_as_path() {
1366 test_full_grok(vec![(
1367 "%{data::json}",
1368 r#"{"a.b": "c"}"#,
1369 Ok(Value::Object(btreemap!(
1370 "a" => btreemap! (
1371 "b" => "c"
1372 )
1373 ))),
1374 )]);
1375 }
1376}