1use crate::path::OwnedValuePath;
2use crate::value::{KeyString, Value};
3use std::sync::LazyLock;
4use std::{
5 collections::{BTreeMap, HashMap},
6 convert::TryFrom,
7};
8use tracing::error;
9
10use super::grok::Grok;
11use super::{
12 ast::{self, Destination, GrokPattern},
13 grok_filter::GrokFilter,
14 matchers::{date, date::DateFilter},
15 parse_grok_pattern::parse_grok_pattern,
16};
17
18static GROK_PATTERN_RE: LazyLock<onig::Regex> = LazyLock::new(|| {
19 onig::Regex::new(r#"%\{(?:[^"\}]|(?<!\\)"(?:\\"|[^"])*(?<!\\)")+\}"#).unwrap()
20});
21
22#[derive(Clone, Debug)]
25pub struct GrokRule {
26 pub pattern: super::grok::Pattern,
28 pub fields: HashMap<String, GrokField>,
30}
31
32#[derive(Debug, Clone)]
35pub struct GrokField {
36 pub lookup: OwnedValuePath,
37 pub filters: Vec<GrokFilter>,
38}
39
40#[derive(Debug, Clone)]
42pub struct GrokRuleParseContext {
43 pub regex: String,
45 pub fields: HashMap<String, GrokField>,
47 pub aliases: BTreeMap<KeyString, String>,
49 pub alias_stack: Vec<String>,
51}
52
53impl GrokRuleParseContext {
54 fn append_regex(&mut self, regex: &str) {
56 self.regex.push_str(regex);
57 }
58
59 fn register_grok_field(&mut self, grok_name: &str, field: GrokField) {
61 self.fields.insert(grok_name.to_string(), field);
62 }
63
64 fn register_filter(&mut self, grok_name: &str, filter: GrokFilter) {
66 self.fields
67 .entry(grok_name.to_string())
68 .and_modify(|v| v.filters.insert(0, filter));
69 }
70
71 fn new(aliases: BTreeMap<KeyString, String>) -> Self {
72 Self {
73 regex: String::new(),
74 fields: HashMap::new(),
75 aliases,
76 alias_stack: vec![],
77 }
78 }
79
80 fn generate_grok_compliant_name(&mut self) -> String {
82 format!("grok{}", self.fields.len())
83 }
84}
85
86#[derive(thiserror::Error, Debug, PartialEq, Eq)]
87pub enum Error {
88 #[error("failed to parse grok expression '{}': {}", .0, .1)]
89 InvalidGrokExpression(String, String),
90 #[error("invalid arguments for the function '{}'", .0)]
91 InvalidFunctionArguments(String),
92 #[error("unknown filter '{}'", .0)]
93 UnknownFilter(String),
94 #[error("Circular dependency found in the alias '{}'", .0)]
95 CircularDependencyInAliasDefinition(String),
96}
97
98pub fn parse_grok_rules(
117 patterns: &[String],
118 aliases: BTreeMap<KeyString, String>,
119) -> Result<Vec<GrokRule>, Error> {
120 let mut grok = Grok::with_patterns();
121
122 patterns
123 .iter()
124 .filter(|&r| !r.is_empty())
125 .map(|r| {
126 parse_pattern(
127 r,
128 &mut GrokRuleParseContext::new(aliases.clone()),
129 &mut grok,
130 )
131 })
132 .collect::<Result<Vec<GrokRule>, Error>>()
133}
134
135fn parse_alias(
144 name: &str,
145 definition: &str,
146 context: &mut GrokRuleParseContext,
147) -> Result<(), Error> {
148 if context.alias_stack.iter().any(|a| a == name) {
150 return Err(Error::CircularDependencyInAliasDefinition(
151 context.alias_stack.first().unwrap().to_string(),
152 ));
153 } else {
154 context.alias_stack.push(name.to_string());
155 }
156
157 parse_grok_rule(definition, context)?;
158
159 context.alias_stack.pop();
160
161 Ok(())
162}
163
164fn parse_pattern(
173 pattern: &str,
174 context: &mut GrokRuleParseContext,
175 grok: &mut Grok,
176) -> Result<GrokRule, Error> {
177 parse_grok_rule(pattern, context)?;
178 let pattern = [
179 r"(?m)\A", &context
184 .regex
185 .replace("(?s)", "(?m)")
186 .replace("(?-s)", "(?-m)"),
187 r"\z",
188 ]
189 .concat();
190
191 let pattern = grok
193 .compile(&pattern, true)
194 .map_err(|e| Error::InvalidGrokExpression(pattern, e.to_string()))?;
195
196 Ok(GrokRule {
197 pattern,
198 fields: context.fields.clone(),
199 })
200}
201
202fn parse_grok_rule(rule: &str, context: &mut GrokRuleParseContext) -> Result<(), Error> {
210 let mut regex_i = 0;
211 for (start, end) in GROK_PATTERN_RE.find_iter(rule) {
212 context.append_regex(&rule[regex_i..start]);
213 regex_i = end;
214 let pattern = parse_grok_pattern(&rule[start..end])
215 .map_err(|e| Error::InvalidGrokExpression(rule[start..end].to_string(), e))?;
216 resolve_grok_pattern(&pattern, context)?;
217 }
218 context.append_regex(&rule[regex_i..]);
219
220 Ok(())
221}
222
223fn resolve_grok_pattern(
233 pattern: &GrokPattern,
234 context: &mut GrokRuleParseContext,
235) -> Result<(), Error> {
236 let grok_alias = pattern
237 .destination
238 .as_ref()
239 .map(|_| context.generate_grok_compliant_name());
240 match pattern {
241 GrokPattern {
242 destination:
243 Some(Destination {
244 path,
245 filter_fn: Some(filter),
246 }),
247 ..
248 } => {
249 context.register_grok_field(
250 grok_alias.as_ref().expect("grok alias is not defined"),
251 GrokField {
252 lookup: path.clone(),
253 filters: vec![GrokFilter::try_from(filter)?],
254 },
255 );
256 }
257 GrokPattern {
258 destination:
259 Some(Destination {
260 path,
261 filter_fn: None,
262 }),
263 ..
264 } => {
265 context.register_grok_field(
266 grok_alias.as_ref().expect("grok alias is not defined"),
267 GrokField {
268 lookup: path.clone(),
269 filters: vec![],
270 },
271 );
272 }
273 _ => {}
274 }
275
276 let match_name = &pattern.match_fn.name;
277 match context.aliases.get(match_name.as_str()).cloned() {
278 Some(alias_def) => match &grok_alias {
279 Some(grok_alias) => {
280 context.append_regex("(?<");
281 context.append_regex(grok_alias);
282 context.append_regex(">");
283 parse_alias(match_name, &alias_def, context)?;
284 context.append_regex(")");
285 }
286 None => {
287 parse_alias(match_name, &alias_def, context)?;
288 }
289 },
290 None if match_name == "regex" || match_name == "date" || match_name == "boolean" => {
291 match &grok_alias {
293 Some(grok_alias) => {
294 context.append_regex("(?<");
295 context.append_regex(grok_alias);
296 context.append_regex(">");
297 }
298 None => {
299 context.append_regex("(?:"); }
301 }
302 resolves_match_function(grok_alias, pattern, context)?;
303 context.append_regex(")");
304 }
305 None => {
306 context.append_regex("%{");
308 resolves_match_function(grok_alias.clone(), pattern, context)?;
309
310 if let Some(grok_alias) = &grok_alias {
311 context.append_regex(&format!(":{grok_alias}"));
312 }
313 context.append_regex("}");
314 }
315 }
316
317 Ok(())
318}
319
320fn resolves_match_function(
324 grok_alias: Option<String>,
325 pattern: &ast::GrokPattern,
326 context: &mut GrokRuleParseContext,
327) -> Result<(), Error> {
328 let match_fn = &pattern.match_fn;
329 match match_fn.name.as_ref() {
330 "regex" => match match_fn.args.as_ref() {
331 Some(args) if !args.is_empty() => {
332 if let ast::FunctionArgument::Arg(Value::Bytes(ref b)) = args[0] {
333 context.append_regex(&String::from_utf8_lossy(b));
334 return Ok(());
335 }
336 Err(Error::InvalidFunctionArguments(match_fn.name.clone()))
337 }
338 _ => Err(Error::InvalidFunctionArguments(match_fn.name.clone())),
339 },
340 "integer" => {
341 if let Some(grok_alias) = &grok_alias {
342 context.register_filter(grok_alias, GrokFilter::Integer);
343 }
344 context.append_regex("integerStr");
345 Ok(())
346 }
347 "integerExt" => {
348 if let Some(grok_alias) = &grok_alias {
349 context.register_filter(grok_alias, GrokFilter::IntegerExt);
350 }
351 context.append_regex("integerExtStr");
352 Ok(())
353 }
354 "number" => {
355 if let Some(grok_alias) = &grok_alias {
356 context.register_filter(grok_alias, GrokFilter::Number);
357 }
358 context.append_regex("numberStr");
359 Ok(())
360 }
361 "numberExt" => {
362 if let Some(grok_alias) = &grok_alias {
363 context.register_filter(grok_alias, GrokFilter::NumberExt);
364 }
365 context.append_regex("numberExtStr");
366 Ok(())
367 }
368 "date" => {
369 match match_fn.args.as_ref() {
370 Some(args) if !args.is_empty() && args.len() <= 2 => {
371 if let ast::FunctionArgument::Arg(Value::Bytes(b)) = &args[0] {
372 let format = String::from_utf8_lossy(b);
373 let result = date::time_format_to_regex(&format, true)
375 .map_err(|_e| Error::InvalidFunctionArguments(match_fn.name.clone()))?;
376 let filter_re = regex::Regex::new(&result.regex).map_err(|error| {
377 error!(message = "Error compiling regex", regex = %result.regex, %error);
378 Error::InvalidFunctionArguments(match_fn.name.clone())
379 })?;
380
381 let strp_format = date::convert_time_format(&format).map_err(|error| {
382 error!(message = "Error compiling regex", regex = %result.regex, %error);
383 Error::InvalidFunctionArguments(match_fn.name.clone())
384 })?;
385 let mut target_tz = None;
386 if args.len() == 2
387 && let ast::FunctionArgument::Arg(Value::Bytes(b)) = &args[1]
388 {
389 let tz = String::from_utf8_lossy(b);
390 date::parse_timezone(&tz).map_err(|error| {
391 error!(message = "Invalid(unrecognized) timezone", %error);
392 Error::InvalidFunctionArguments(match_fn.name.clone())
393 })?;
394 target_tz = Some(tz.to_string());
395 }
396 let filter = GrokFilter::Date(DateFilter {
397 original_format: format.to_string(),
398 strp_format,
399 regex: filter_re,
400 target_tz,
401 tz_aware: result.with_tz,
402 with_tz_capture: result.with_tz_capture,
403 with_fraction_second: result.with_fraction_second,
404 });
405 let grok_re = date::time_format_to_regex(&format, false)
407 .map_err(|error| {
408 error!(message = "Invalid time format", format = %format, %error);
409 Error::InvalidFunctionArguments(match_fn.name.clone())
410 })?
411 .regex;
412 if let Some(grok_alias) = &grok_alias {
413 context.register_filter(grok_alias, filter);
414 }
415 context.append_regex(&grok_re);
416 return Ok(());
417 }
418 Err(Error::InvalidFunctionArguments(match_fn.name.clone()))
419 }
420 _ => Err(Error::InvalidFunctionArguments(match_fn.name.clone())),
421 }
422 }
423 grok_pattern_name => {
425 context.append_regex(grok_pattern_name);
426 Ok(())
427 }
428 }
429}
430
431#[cfg(test)]
433mod tests {
434 use super::*;
435
436 #[test]
437 fn supports_escaped_quotes() {
438 let rules = parse_grok_rules(
439 &[r#"%{notSpace:field:nullIf("with \"escaped\" quotes")}"#.to_string()],
440 BTreeMap::new(),
441 )
442 .expect("couldn't parse rules");
443 assert!(matches!(
444 &rules[0]
445 .fields
446 .iter().next()
447 .expect("invalid grok pattern").1
448 .filters[0],
449 GrokFilter::NullIf(v) if *v == r#"with "escaped" quotes"#
450 ));
451 }
452}