1use crate::compiler::prelude::*;
2
3#[cfg(not(target_arch = "wasm32"))]
4mod non_wasm {
5 use crate::compiler::prelude::*;
6 use crate::diagnostic::{Label, Span};
7 use crate::value::Value;
8 pub(super) use std::sync::Arc;
9 use std::{collections::BTreeMap, fmt};
10
11 fn parse_grok(value: &Value, pattern: &Arc<grok::Pattern>) -> Resolved {
12 let bytes = value.try_bytes_utf8_lossy()?;
13 match pattern.match_against(&bytes) {
14 Some(matches) => {
15 let mut result = BTreeMap::new();
16
17 for (name, value) in &matches {
18 result.insert(name.to_string().into(), Value::from(value));
19 }
20
21 Ok(Value::from(result))
22 }
23 None => Err("unable to parse input with grok pattern".into()),
24 }
25 }
26
27 #[derive(Debug)]
28 pub(crate) enum Error {
29 InvalidGrokPattern(grok::Error),
30 }
31
32 impl fmt::Display for Error {
33 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34 match self {
35 Error::InvalidGrokPattern(err) => err.fmt(f),
36 }
37 }
38 }
39
40 impl std::error::Error for Error {}
41
42 impl DiagnosticMessage for Error {
43 fn code(&self) -> usize {
44 109
45 }
46
47 fn labels(&self) -> Vec<Label> {
48 match self {
49 Error::InvalidGrokPattern(err) => {
50 vec![Label::primary(
51 format!("grok pattern error: {err}"),
52 Span::default(),
53 )]
54 }
55 }
56 }
57 }
58
59 #[derive(Clone, Debug)]
60 pub(super) struct ParseGrokFn {
61 pub(super) value: Box<dyn Expression>,
62
63 pub(super) pattern: Arc<grok::Pattern>,
65 }
66
67 impl FunctionExpression for ParseGrokFn {
68 fn resolve(&self, ctx: &mut Context) -> Resolved {
69 let value = self.value.resolve(ctx)?;
70 let pattern = self.pattern.clone();
71
72 parse_grok(&value, &pattern)
73 }
74
75 fn type_def(&self, _: &TypeState) -> TypeDef {
76 TypeDef::object(Collection::any()).fallible()
77 }
78 }
79}
80
81#[allow(clippy::wildcard_imports)]
82#[cfg(not(target_arch = "wasm32"))]
83use non_wasm::*;
84
85#[derive(Clone, Copy, Debug)]
86pub struct ParseGrok;
87
88impl Function for ParseGrok {
89 fn identifier(&self) -> &'static str {
90 "parse_grok"
91 }
92
93 fn usage(&self) -> &'static str {
94 "Parses the `value` using the [`grok`](https://github.com/daschl/grok/tree/master/patterns) format. All patterns [listed here](https://github.com/daschl/grok/tree/master/patterns) are supported."
95 }
96
97 fn category(&self) -> &'static str {
98 Category::Parse.as_ref()
99 }
100
101 fn internal_failure_reasons(&self) -> &'static [&'static str] {
102 &["`value` fails to parse using the provided `pattern`."]
103 }
104
105 fn return_kind(&self) -> u16 {
106 kind::OBJECT
107 }
108
109 fn notices(&self) -> &'static [&'static str] {
110 &[indoc! {"
111 We recommend using community-maintained Grok patterns when possible, as they're more
112 likely to be properly vetted and improved over time than bespoke patterns.
113 "}]
114 }
115
116 fn parameters(&self) -> &'static [Parameter] {
117 const PARAMETERS: &[Parameter] = &[
118 Parameter::required("value", kind::BYTES, "The string to parse."),
119 Parameter::required(
120 "pattern",
121 kind::BYTES,
122 "The [Grok pattern](https://github.com/daschl/grok/tree/master/patterns).",
123 ),
124 ];
125 PARAMETERS
126 }
127
128 fn examples(&self) -> &'static [Example] {
129 &[example! {
130 title: "Parse using Grok",
131 source: indoc! {r#"
132 value = "2020-10-02T23:22:12.223222Z info Hello world"
133 pattern = "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}"
134
135 parse_grok!(value, pattern)
136 "#},
137 result: Ok(indoc! {r#"
138 {
139 "timestamp": "2020-10-02T23:22:12.223222Z",
140 "level": "info",
141 "message": "Hello world"
142 }
143 "#}),
144 }]
145 }
146
147 #[cfg(not(target_arch = "wasm32"))]
148 fn compile(
149 &self,
150 state: &state::TypeState,
151 _ctx: &mut FunctionCompileContext,
152 arguments: ArgumentList,
153 ) -> Compiled {
154 let value = arguments.required("value");
155
156 let pattern = arguments
157 .required_literal("pattern", state)?
158 .try_bytes_utf8_lossy()
159 .expect("grok pattern not bytes")
160 .into_owned();
161
162 let grok = grok::Grok::with_default_patterns();
163 let pattern =
164 Arc::new(grok.compile(&pattern, true).map_err(|e| {
165 Box::new(Error::InvalidGrokPattern(e)) as Box<dyn DiagnosticMessage>
166 })?);
167
168 Ok(ParseGrokFn { value, pattern }.as_expr())
169 }
170
171 #[cfg(target_arch = "wasm32")]
172 fn compile(
173 &self,
174 _state: &state::TypeState,
175 ctx: &mut FunctionCompileContext,
176 _: ArgumentList,
177 ) -> Compiled {
178 Ok(super::WasmUnsupportedFunction::new(
179 ctx.span(),
180 TypeDef::object(Collection::any()).fallible(),
181 )
182 .as_expr())
183 }
184}
185
186#[cfg(test)]
187mod test {
188 use crate::btreemap;
189 use crate::value::Value;
190
191 use super::*;
192
193 test_function![
194 parse_grok => ParseGrok;
195
196 invalid_grok {
197 args: func_args![ value: "foo",
198 pattern: "%{NOG}"],
199 want: Err("The given pattern definition name \"NOG\" could not be found in the definition map"),
200 tdef: TypeDef::object(Collection::any()).fallible(),
201 }
202
203 error {
204 args: func_args![ value: "an ungrokkable message",
205 pattern: "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}"],
206 want: Err("unable to parse input with grok pattern"),
207 tdef: TypeDef::object(Collection::any()).fallible(),
208 }
209
210 error2 {
211 args: func_args![ value: "2020-10-02T23:22:12.223222Z an ungrokkable message",
212 pattern: "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}"],
213 want: Err("unable to parse input with grok pattern"),
214 tdef: TypeDef::object(Collection::any()).fallible(),
215 }
216
217 parsed {
218 args: func_args![ value: "2020-10-02T23:22:12.223222Z info Hello world",
219 pattern: "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}"],
220 want: Ok(Value::from(btreemap! {
221 "timestamp" => "2020-10-02T23:22:12.223222Z",
222 "level" => "info",
223 "message" => "Hello world",
224 })),
225 tdef: TypeDef::object(Collection::any()).fallible(),
226 }
227
228 parsed2 {
229 args: func_args![ value: "2020-10-02T23:22:12.223222Z",
230 pattern: "(%{TIMESTAMP_ISO8601:timestamp}|%{LOGLEVEL:level})"],
231 want: Ok(Value::from(btreemap! {
232 "timestamp" => "2020-10-02T23:22:12.223222Z",
233 })),
234 tdef: TypeDef::object(Collection::any()).fallible(),
235 }
236 ];
237}