1use regex::Regex;
2
3use crate::compiler::prelude::*;
4
5use super::util;
6use std::sync::LazyLock;
7
8static DEFAULT_NUMERIC_GROUPS: LazyLock<Value> = LazyLock::new(|| Value::Boolean(false));
9
10static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
11 vec![
12 Parameter::required("value", kind::ANY, "The string to search."),
13 Parameter::required("pattern", kind::REGEX, "The regular expression pattern to search against."),
14 Parameter::optional("numeric_groups", kind::BOOLEAN, "If `true`, the index of each group in the regular expression is also captured. Index `0`
15contains the whole match.")
16 .default(&DEFAULT_NUMERIC_GROUPS),
17 ]
18});
19
20fn parse_regex_all(value: &Value, numeric_groups: bool, pattern: &Regex) -> Resolved {
21 let value = value.try_bytes_utf8_lossy()?;
22 Ok(pattern
23 .captures_iter(&value)
24 .map(|capture| util::capture_regex_to_map(pattern, &capture, numeric_groups).into())
25 .collect::<Vec<Value>>()
26 .into())
27}
28
29#[derive(Clone, Copy, Debug)]
30pub struct ParseRegexAll;
31
32impl Function for ParseRegexAll {
33 fn identifier(&self) -> &'static str {
34 "parse_regex_all"
35 }
36
37 fn usage(&self) -> &'static str {
38 indoc! {"
39 Parses the `value` using the provided [Regex](https://en.wikipedia.org/wiki/Regular_expression) `pattern`.
40
41 This function differs from the `parse_regex` function in that it returns _all_ matches, not just the first.
42 "}
43 }
44
45 fn category(&self) -> &'static str {
46 Category::Parse.as_ref()
47 }
48
49 fn internal_failure_reasons(&self) -> &'static [&'static str] {
50 &["`value` is not a string.", "`pattern` is not a regex."]
51 }
52
53 fn return_kind(&self) -> u16 {
54 kind::ARRAY
55 }
56
57 fn return_rules(&self) -> &'static [&'static str] {
58 &[
59 "Matches return all capture groups corresponding to the leftmost matches in the text.",
60 "Raises an error if no match is found.",
61 ]
62 }
63
64 fn notices(&self) -> &'static [&'static str] {
65 &[
66 indoc! {"
67 VRL aims to provide purpose-specific [parsing functions](/docs/reference/vrl/functions/#parse-functions)
68 for common log formats. Before reaching for the `parse_regex` function, see if a VRL
69 [`parse_*` function](/docs/reference/vrl/functions/#parse-functions) already exists
70 for your format. If not, we recommend
71 [opening an issue](https://github.com/vectordotdev/vector/issues/new?labels=type%3A+new+feature)
72 to request support for the desired format.
73 "},
74 indoc! {"
75 All values are returned as strings. We recommend manually coercing values to desired
76 types as you see fit.
77 "},
78 ]
79 }
80
81 fn parameters(&self) -> &'static [Parameter] {
82 PARAMETERS.as_slice()
83 }
84
85 fn compile(
86 &self,
87 _state: &state::TypeState,
88 _ctx: &mut FunctionCompileContext,
89 arguments: ArgumentList,
90 ) -> Compiled {
91 let value = arguments.required("value");
92 let pattern = arguments.required("pattern");
93 let numeric_groups = arguments.optional("numeric_groups");
94
95 Ok(ParseRegexAllFn {
96 value,
97 pattern,
98 numeric_groups,
99 }
100 .as_expr())
101 }
102
103 fn examples(&self) -> &'static [Example] {
104 &[
105 example! {
106 title: "Parse using Regex (all matches)",
107 source: r#"parse_regex_all!("first group and second group.", r'(?P<number>\w+) group', numeric_groups: true)"#,
108 result: Ok(indoc! { r#"[
109 {"number": "first",
110 "0": "first group",
111 "1": "first"},
112 {"number": "second",
113 "0": "second group",
114 "1": "second"}]"# }),
115 },
116 example! {
117 title: "Parse using Regex (simple match)",
118 source: r#"parse_regex_all!("apples and carrots, peaches and peas", r'(?P<fruit>[\w\.]+) and (?P<veg>[\w]+)')"#,
119 result: Ok(indoc! { r#"[
120 {"fruit": "apples",
121 "veg": "carrots"},
122 {"fruit": "peaches",
123 "veg": "peas"}]"# }),
124 },
125 example! {
126 title: "Parse using Regex (all numeric groups)",
127 source: r#"parse_regex_all!("apples and carrots, peaches and peas", r'(?P<fruit>[\w\.]+) and (?P<veg>[\w]+)', numeric_groups: true)"#,
128 result: Ok(indoc! { r#"[
129 {"fruit": "apples",
130 "veg": "carrots",
131 "0": "apples and carrots",
132 "1": "apples",
133 "2": "carrots"},
134 {"fruit": "peaches",
135 "veg": "peas",
136 "0": "peaches and peas",
137 "1": "peaches",
138 "2": "peas"}]"# }),
139 },
140 example! {
141 title: "Parse using Regex with variables",
142 source: indoc! {r#"
143 variable = r'(?P<fruit>[\w\.]+) and (?P<veg>[\w]+)';
144 parse_regex_all!("apples and carrots, peaches and peas", variable)
145 "#},
146 result: Ok(indoc! { r#"[
147 {"fruit": "apples",
148 "veg": "carrots"},
149 {"fruit": "peaches",
150 "veg": "peas"}]"# }),
151 },
152 ]
153 }
154}
155
156#[derive(Debug, Clone)]
157pub(crate) struct ParseRegexAllFn {
158 value: Box<dyn Expression>,
159 pattern: Box<dyn Expression>,
160 numeric_groups: Option<Box<dyn Expression>>,
161}
162
163impl FunctionExpression for ParseRegexAllFn {
164 fn resolve(&self, ctx: &mut Context) -> Resolved {
165 let value = self.value.resolve(ctx)?;
166 let numeric_groups = self
167 .numeric_groups
168 .map_resolve_with_default(ctx, || DEFAULT_NUMERIC_GROUPS.clone())?;
169 let pattern = self
170 .pattern
171 .resolve(ctx)?
172 .as_regex()
173 .ok_or_else(|| ExpressionError::from("failed to resolve regex"))?
174 .clone();
175
176 parse_regex_all(&value, numeric_groups.try_boolean()?, &pattern)
177 }
178
179 fn type_def(&self, state: &state::TypeState) -> TypeDef {
180 if let Some(value) = self.pattern.resolve_constant(state)
181 && let Some(regex) = value.as_regex()
182 {
183 return TypeDef::array(Collection::from_unknown(
184 Kind::object(util::regex_kind(regex)).or_null(),
185 ))
186 .fallible();
187 }
188
189 TypeDef::array(Collection::from_unknown(
190 Kind::object(Collection::from_unknown(Kind::bytes() | Kind::null())).or_null(),
191 ))
192 .fallible()
193 }
194}
195
196#[cfg(test)]
197#[allow(clippy::trivial_regex)]
198mod tests {
199 use crate::{btreemap, value};
200
201 use super::*;
202
203 test_function![
204 parse_regex_all => ParseRegexAll;
205
206 matches {
207 args: func_args![
208 value: "apples and carrots, peaches and peas",
209 pattern: Regex::new(r"(?P<fruit>[\w\.]+) and (?P<veg>[\w]+)").unwrap(),
210 ],
211 want: Ok(value!([{"fruit": "apples",
212 "veg": "carrots"},
213 {"fruit": "peaches",
214 "veg": "peas"}])),
215 tdef: TypeDef::array(Collection::from_unknown(Kind::null().or_object(btreemap! {
216 Field::from("fruit") => Kind::bytes(),
217 Field::from("veg") => Kind::bytes(),
218 Field::from("0") => Kind::bytes() | Kind::null(),
219 Field::from("1") => Kind::bytes() | Kind::null(),
220 Field::from("2") => Kind::bytes() | Kind::null(),
221 }))).fallible(),
222 }
223
224 numeric_groups {
225 args: func_args![
226 value: "apples and carrots, peaches and peas",
227 pattern: Regex::new(r"(?P<fruit>[\w\.]+) and (?P<veg>[\w]+)").unwrap(),
228 numeric_groups: true
229 ],
230 want: Ok(value!([{"fruit": "apples",
231 "veg": "carrots",
232 "0": "apples and carrots",
233 "1": "apples",
234 "2": "carrots"},
235 {"fruit": "peaches",
236 "veg": "peas",
237 "0": "peaches and peas",
238 "1": "peaches",
239 "2": "peas"}])),
240 tdef: TypeDef::array(Collection::from_unknown(Kind::null().or_object(btreemap! {
241 Field::from("fruit") => Kind::bytes(),
242 Field::from("veg") => Kind::bytes(),
243 Field::from("0") => Kind::bytes() | Kind::null(),
244 Field::from("1") => Kind::bytes() | Kind::null(),
245 Field::from("2") => Kind::bytes() | Kind::null(),
246 }))).fallible(),
247 }
248
249 no_matches {
250 args: func_args![
251 value: "I don't match",
252 pattern: Regex::new(r"(?P<fruit>[\w\.]+) and (?P<veg>[\w]+)").unwrap()
253 ],
254 want: Ok(value!([])),
255 tdef: TypeDef::array(Collection::from_unknown(Kind::null().or_object(btreemap! {
256 Field::from("fruit") => Kind::bytes(),
257 Field::from("veg") => Kind::bytes(),
258 Field::from("0") => Kind::bytes() | Kind::null(),
259 Field::from("1") => Kind::bytes() | Kind::null(),
260 Field::from("2") => Kind::bytes() | Kind::null(),
261 }))).fallible(),
262 }
263 ];
264}