vrl/stdlib/
parse_grok.rs

1use crate::compiler::prelude::*;
2
3#[cfg(not(target_arch = "wasm32"))]
4mod non_wasm {
5    use crate::compiler::prelude::*;
6    use crate::diagnostic::{Label, Span};
7    use crate::value::Value;
8    pub(super) use std::sync::Arc;
9    use std::{collections::BTreeMap, fmt};
10
11    fn parse_grok(value: &Value, pattern: &Arc<grok::Pattern>) -> Resolved {
12        let bytes = value.try_bytes_utf8_lossy()?;
13        match pattern.match_against(&bytes) {
14            Some(matches) => {
15                let mut result = BTreeMap::new();
16
17                for (name, value) in &matches {
18                    result.insert(name.to_string().into(), Value::from(value));
19                }
20
21                Ok(Value::from(result))
22            }
23            None => Err("unable to parse input with grok pattern".into()),
24        }
25    }
26
27    #[derive(Debug)]
28    pub(crate) enum Error {
29        InvalidGrokPattern(grok::Error),
30    }
31
32    impl fmt::Display for Error {
33        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34            match self {
35                Error::InvalidGrokPattern(err) => err.fmt(f),
36            }
37        }
38    }
39
40    impl std::error::Error for Error {}
41
42    impl DiagnosticMessage for Error {
43        fn code(&self) -> usize {
44            109
45        }
46
47        fn labels(&self) -> Vec<Label> {
48            match self {
49                Error::InvalidGrokPattern(err) => {
50                    vec![Label::primary(
51                        format!("grok pattern error: {err}"),
52                        Span::default(),
53                    )]
54                }
55            }
56        }
57    }
58
59    #[derive(Clone, Debug)]
60    pub(super) struct ParseGrokFn {
61        pub(super) value: Box<dyn Expression>,
62
63        // Wrapping pattern in an Arc, as cloning the pattern could otherwise be expensive.
64        pub(super) pattern: Arc<grok::Pattern>,
65    }
66
67    impl FunctionExpression for ParseGrokFn {
68        fn resolve(&self, ctx: &mut Context) -> Resolved {
69            let value = self.value.resolve(ctx)?;
70            let pattern = self.pattern.clone();
71
72            parse_grok(&value, &pattern)
73        }
74
75        fn type_def(&self, _: &TypeState) -> TypeDef {
76            TypeDef::object(Collection::any()).fallible()
77        }
78    }
79}
80
81#[allow(clippy::wildcard_imports)]
82#[cfg(not(target_arch = "wasm32"))]
83use non_wasm::*;
84
85#[derive(Clone, Copy, Debug)]
86pub struct ParseGrok;
87
88impl Function for ParseGrok {
89    fn identifier(&self) -> &'static str {
90        "parse_grok"
91    }
92
93    fn usage(&self) -> &'static str {
94        "Parses the `value` using the [`grok`](https://github.com/daschl/grok/tree/master/patterns) format. All patterns [listed here](https://github.com/daschl/grok/tree/master/patterns) are supported."
95    }
96
97    fn category(&self) -> &'static str {
98        Category::Parse.as_ref()
99    }
100
101    fn internal_failure_reasons(&self) -> &'static [&'static str] {
102        &["`value` fails to parse using the provided `pattern`."]
103    }
104
105    fn return_kind(&self) -> u16 {
106        kind::OBJECT
107    }
108
109    fn notices(&self) -> &'static [&'static str] {
110        &[indoc! {"
111            We recommend using community-maintained Grok patterns when possible, as they're more
112            likely to be properly vetted and improved over time than bespoke patterns.
113        "}]
114    }
115
116    fn parameters(&self) -> &'static [Parameter] {
117        const PARAMETERS: &[Parameter] = &[
118            Parameter::required("value", kind::BYTES, "The string to parse."),
119            Parameter::required(
120                "pattern",
121                kind::BYTES,
122                "The [Grok pattern](https://github.com/daschl/grok/tree/master/patterns).",
123            ),
124        ];
125        PARAMETERS
126    }
127
128    fn examples(&self) -> &'static [Example] {
129        &[example! {
130            title: "Parse using Grok",
131            source: indoc! {r#"
132                value = "2020-10-02T23:22:12.223222Z info Hello world"
133                pattern = "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}"
134
135                parse_grok!(value, pattern)
136            "#},
137            result: Ok(indoc! {r#"
138                {
139                    "timestamp": "2020-10-02T23:22:12.223222Z",
140                    "level": "info",
141                    "message": "Hello world"
142                }
143            "#}),
144        }]
145    }
146
147    #[cfg(not(target_arch = "wasm32"))]
148    fn compile(
149        &self,
150        state: &state::TypeState,
151        _ctx: &mut FunctionCompileContext,
152        arguments: ArgumentList,
153    ) -> Compiled {
154        let value = arguments.required("value");
155
156        let pattern = arguments
157            .required_literal("pattern", state)?
158            .try_bytes_utf8_lossy()
159            .expect("grok pattern not bytes")
160            .into_owned();
161
162        let grok = grok::Grok::with_default_patterns();
163        let pattern =
164            Arc::new(grok.compile(&pattern, true).map_err(|e| {
165                Box::new(Error::InvalidGrokPattern(e)) as Box<dyn DiagnosticMessage>
166            })?);
167
168        Ok(ParseGrokFn { value, pattern }.as_expr())
169    }
170
171    #[cfg(target_arch = "wasm32")]
172    fn compile(
173        &self,
174        _state: &state::TypeState,
175        ctx: &mut FunctionCompileContext,
176        _: ArgumentList,
177    ) -> Compiled {
178        Ok(super::WasmUnsupportedFunction::new(
179            ctx.span(),
180            TypeDef::object(Collection::any()).fallible(),
181        )
182        .as_expr())
183    }
184}
185
186#[cfg(test)]
187mod test {
188    use crate::btreemap;
189    use crate::value::Value;
190
191    use super::*;
192
193    test_function![
194        parse_grok => ParseGrok;
195
196        invalid_grok {
197            args: func_args![ value: "foo",
198                              pattern: "%{NOG}"],
199            want: Err("The given pattern definition name \"NOG\" could not be found in the definition map"),
200            tdef: TypeDef::object(Collection::any()).fallible(),
201        }
202
203        error {
204            args: func_args![ value: "an ungrokkable message",
205                              pattern: "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}"],
206            want: Err("unable to parse input with grok pattern"),
207            tdef: TypeDef::object(Collection::any()).fallible(),
208        }
209
210        error2 {
211            args: func_args![ value: "2020-10-02T23:22:12.223222Z an ungrokkable message",
212                              pattern: "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}"],
213            want: Err("unable to parse input with grok pattern"),
214            tdef: TypeDef::object(Collection::any()).fallible(),
215        }
216
217        parsed {
218            args: func_args![ value: "2020-10-02T23:22:12.223222Z info Hello world",
219                              pattern: "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}"],
220            want: Ok(Value::from(btreemap! {
221                "timestamp" => "2020-10-02T23:22:12.223222Z",
222                "level" => "info",
223                "message" => "Hello world",
224            })),
225            tdef: TypeDef::object(Collection::any()).fallible(),
226        }
227
228        parsed2 {
229            args: func_args![ value: "2020-10-02T23:22:12.223222Z",
230                              pattern: "(%{TIMESTAMP_ISO8601:timestamp}|%{LOGLEVEL:level})"],
231            want: Ok(Value::from(btreemap! {
232                "timestamp" => "2020-10-02T23:22:12.223222Z",
233            })),
234            tdef: TypeDef::object(Collection::any()).fallible(),
235        }
236    ];
237}