vrl/stdlib/
parse_regex_all.rs

1use regex::Regex;
2
3use crate::compiler::prelude::*;
4
5use super::util;
6use std::sync::LazyLock;
7
8static DEFAULT_NUMERIC_GROUPS: LazyLock<Value> = LazyLock::new(|| Value::Boolean(false));
9
10static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
11    vec![
12        Parameter::required("value", kind::ANY, "The string to search."),
13        Parameter::required("pattern", kind::REGEX, "The regular expression pattern to search against."),
14        Parameter::optional("numeric_groups", kind::BOOLEAN, "If `true`, the index of each group in the regular expression is also captured. Index `0`
15contains the whole match.")
16            .default(&DEFAULT_NUMERIC_GROUPS),
17    ]
18});
19
20fn parse_regex_all(value: &Value, numeric_groups: bool, pattern: &Regex) -> Resolved {
21    let value = value.try_bytes_utf8_lossy()?;
22    Ok(pattern
23        .captures_iter(&value)
24        .map(|capture| util::capture_regex_to_map(pattern, &capture, numeric_groups).into())
25        .collect::<Vec<Value>>()
26        .into())
27}
28
29#[derive(Clone, Copy, Debug)]
30pub struct ParseRegexAll;
31
32impl Function for ParseRegexAll {
33    fn identifier(&self) -> &'static str {
34        "parse_regex_all"
35    }
36
37    fn usage(&self) -> &'static str {
38        indoc! {"
39            Parses the `value` using the provided [Regex](https://en.wikipedia.org/wiki/Regular_expression) `pattern`.
40
41            This function differs from the `parse_regex` function in that it returns _all_ matches, not just the first.
42        "}
43    }
44
45    fn category(&self) -> &'static str {
46        Category::Parse.as_ref()
47    }
48
49    fn internal_failure_reasons(&self) -> &'static [&'static str] {
50        &["`value` is not a string.", "`pattern` is not a regex."]
51    }
52
53    fn return_kind(&self) -> u16 {
54        kind::ARRAY
55    }
56
57    fn return_rules(&self) -> &'static [&'static str] {
58        &[
59            "Matches return all capture groups corresponding to the leftmost matches in the text.",
60            "Raises an error if no match is found.",
61        ]
62    }
63
64    fn notices(&self) -> &'static [&'static str] {
65        &[
66            indoc! {"
67                VRL aims to provide purpose-specific [parsing functions](/docs/reference/vrl/functions/#parse-functions)
68                for common log formats. Before reaching for the `parse_regex` function, see if a VRL
69                [`parse_*` function](/docs/reference/vrl/functions/#parse-functions) already exists
70                for your format. If not, we recommend
71                [opening an issue](https://github.com/vectordotdev/vector/issues/new?labels=type%3A+new+feature)
72                to request support for the desired format.
73            "},
74            indoc! {"
75                All values are returned as strings. We recommend manually coercing values to desired
76                types as you see fit.
77            "},
78        ]
79    }
80
81    fn parameters(&self) -> &'static [Parameter] {
82        PARAMETERS.as_slice()
83    }
84
85    fn compile(
86        &self,
87        _state: &state::TypeState,
88        _ctx: &mut FunctionCompileContext,
89        arguments: ArgumentList,
90    ) -> Compiled {
91        let value = arguments.required("value");
92        let pattern = arguments.required("pattern");
93        let numeric_groups = arguments.optional("numeric_groups");
94
95        Ok(ParseRegexAllFn {
96            value,
97            pattern,
98            numeric_groups,
99        }
100        .as_expr())
101    }
102
103    fn examples(&self) -> &'static [Example] {
104        &[
105            example! {
106                title: "Parse using Regex (all matches)",
107                source: r#"parse_regex_all!("first group and second group.", r'(?P<number>\w+) group', numeric_groups: true)"#,
108                result: Ok(indoc! { r#"[
109               {"number": "first",
110                "0": "first group",
111                "1": "first"},
112               {"number": "second",
113                "0": "second group",
114                "1": "second"}]"# }),
115            },
116            example! {
117                title: "Parse using Regex (simple match)",
118                source: r#"parse_regex_all!("apples and carrots, peaches and peas", r'(?P<fruit>[\w\.]+) and (?P<veg>[\w]+)')"#,
119                result: Ok(indoc! { r#"[
120               {"fruit": "apples",
121                "veg": "carrots"},
122               {"fruit": "peaches",
123                "veg": "peas"}]"# }),
124            },
125            example! {
126                title: "Parse using Regex (all numeric groups)",
127                source: r#"parse_regex_all!("apples and carrots, peaches and peas", r'(?P<fruit>[\w\.]+) and (?P<veg>[\w]+)', numeric_groups: true)"#,
128                result: Ok(indoc! { r#"[
129               {"fruit": "apples",
130                "veg": "carrots",
131                "0": "apples and carrots",
132                "1": "apples",
133                "2": "carrots"},
134               {"fruit": "peaches",
135                "veg": "peas",
136                "0": "peaches and peas",
137                "1": "peaches",
138                "2": "peas"}]"# }),
139            },
140            example! {
141                title: "Parse using Regex with variables",
142                source: indoc! {r#"
143                    variable = r'(?P<fruit>[\w\.]+) and (?P<veg>[\w]+)';
144                    parse_regex_all!("apples and carrots, peaches and peas", variable)
145                "#},
146                result: Ok(indoc! { r#"[
147               {"fruit": "apples",
148                "veg": "carrots"},
149               {"fruit": "peaches",
150                "veg": "peas"}]"# }),
151            },
152        ]
153    }
154}
155
156#[derive(Debug, Clone)]
157pub(crate) struct ParseRegexAllFn {
158    value: Box<dyn Expression>,
159    pattern: Box<dyn Expression>,
160    numeric_groups: Option<Box<dyn Expression>>,
161}
162
163impl FunctionExpression for ParseRegexAllFn {
164    fn resolve(&self, ctx: &mut Context) -> Resolved {
165        let value = self.value.resolve(ctx)?;
166        let numeric_groups = self
167            .numeric_groups
168            .map_resolve_with_default(ctx, || DEFAULT_NUMERIC_GROUPS.clone())?;
169        let pattern = self
170            .pattern
171            .resolve(ctx)?
172            .as_regex()
173            .ok_or_else(|| ExpressionError::from("failed to resolve regex"))?
174            .clone();
175
176        parse_regex_all(&value, numeric_groups.try_boolean()?, &pattern)
177    }
178
179    fn type_def(&self, state: &state::TypeState) -> TypeDef {
180        if let Some(value) = self.pattern.resolve_constant(state)
181            && let Some(regex) = value.as_regex()
182        {
183            return TypeDef::array(Collection::from_unknown(
184                Kind::object(util::regex_kind(regex)).or_null(),
185            ))
186            .fallible();
187        }
188
189        TypeDef::array(Collection::from_unknown(
190            Kind::object(Collection::from_unknown(Kind::bytes() | Kind::null())).or_null(),
191        ))
192        .fallible()
193    }
194}
195
196#[cfg(test)]
197#[allow(clippy::trivial_regex)]
198mod tests {
199    use crate::{btreemap, value};
200
201    use super::*;
202
203    test_function![
204        parse_regex_all => ParseRegexAll;
205
206        matches {
207            args: func_args![
208                value: "apples and carrots, peaches and peas",
209                pattern: Regex::new(r"(?P<fruit>[\w\.]+) and (?P<veg>[\w]+)").unwrap(),
210            ],
211            want: Ok(value!([{"fruit": "apples",
212                              "veg": "carrots"},
213                             {"fruit": "peaches",
214                              "veg": "peas"}])),
215            tdef: TypeDef::array(Collection::from_unknown(Kind::null().or_object(btreemap! {
216                    Field::from("fruit") => Kind::bytes(),
217                    Field::from("veg") => Kind::bytes(),
218                    Field::from("0") => Kind::bytes() | Kind::null(),
219                    Field::from("1") => Kind::bytes() | Kind::null(),
220                    Field::from("2") => Kind::bytes() | Kind::null(),
221                }))).fallible(),
222        }
223
224        numeric_groups {
225            args: func_args![
226                value: "apples and carrots, peaches and peas",
227                pattern: Regex::new(r"(?P<fruit>[\w\.]+) and (?P<veg>[\w]+)").unwrap(),
228                numeric_groups: true
229            ],
230            want: Ok(value!([{"fruit": "apples",
231                              "veg": "carrots",
232                              "0": "apples and carrots",
233                              "1": "apples",
234                              "2": "carrots"},
235                             {"fruit": "peaches",
236                              "veg": "peas",
237                              "0": "peaches and peas",
238                              "1": "peaches",
239                              "2": "peas"}])),
240            tdef: TypeDef::array(Collection::from_unknown(Kind::null().or_object(btreemap! {
241                    Field::from("fruit") => Kind::bytes(),
242                    Field::from("veg") => Kind::bytes(),
243                    Field::from("0") => Kind::bytes() | Kind::null(),
244                    Field::from("1") => Kind::bytes() | Kind::null(),
245                    Field::from("2") => Kind::bytes() | Kind::null(),
246                }))).fallible(),
247        }
248
249        no_matches {
250            args: func_args![
251                value: "I don't match",
252                pattern: Regex::new(r"(?P<fruit>[\w\.]+) and (?P<veg>[\w]+)").unwrap()
253            ],
254            want: Ok(value!([])),
255            tdef: TypeDef::array(Collection::from_unknown(Kind::null().or_object(btreemap! {
256                    Field::from("fruit") => Kind::bytes(),
257                    Field::from("veg") => Kind::bytes(),
258                    Field::from("0") => Kind::bytes() | Kind::null(),
259                    Field::from("1") => Kind::bytes() | Kind::null(),
260                    Field::from("2") => Kind::bytes() | Kind::null(),
261                }))).fallible(),
262        }
263    ];
264}