vrl/stdlib/
replace_with.rs

1use std::collections::BTreeMap;
2
3use regex::{CaptureMatches, CaptureNames, Captures, Regex};
4
5use crate::compiler::prelude::*;
6use std::sync::LazyLock;
7
8static DEFAULT_COUNT: LazyLock<Value> = LazyLock::new(|| Value::Integer(-1));
9
10static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
11    vec![
12        Parameter::required("value", kind::BYTES, "The original string."),
13        Parameter::required(
14            "pattern",
15            kind::REGEX,
16            "Replace all matches of this pattern. Must be a regular expression.",
17        ),
18        Parameter::optional(
19            "count",
20            kind::INTEGER,
21            "The maximum number of replacements to perform. `-1` means replace all matches.",
22        )
23        .default(&DEFAULT_COUNT),
24    ]
25});
26
27fn replace_with<T>(
28    value: Value,
29    pattern: &Regex,
30    count: Value,
31    ctx: &mut Context,
32    runner: &closure::Runner<T>,
33) -> Resolved
34where
35    T: Fn(&mut Context) -> Result<Value, ExpressionError>,
36{
37    let haystack = value.try_bytes_utf8_lossy()?;
38    let count = match count.try_integer()? {
39        // TODO consider removal options
40        #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
41        i if i > 0 => i as usize,
42        i if i < 0 => 0,
43        // this is when i == 0
44        _ => return Ok(value),
45    };
46    let captures = pattern.captures_iter(&haystack);
47    make_replacement(
48        captures,
49        &haystack,
50        count,
51        &pattern.capture_names(),
52        ctx,
53        runner,
54    )
55}
56
57fn make_replacement<T>(
58    caps: CaptureMatches,
59    haystack: &str,
60    count: usize,
61    capture_names: &CaptureNames,
62    ctx: &mut Context,
63    runner: &closure::Runner<T>,
64) -> Resolved
65where
66    T: Fn(&mut Context) -> Result<Value, ExpressionError>,
67{
68    // possible optimization: peek at first capture, if none return the original value.
69    let mut replaced = String::with_capacity(haystack.len());
70    let limit = if count == 0 { usize::MAX } else { count - 1 };
71    let mut last_match = 0;
72    // we loop over the matches ourselves instead of calling Regex::replacen, so that we can
73    // handle errors. This is however based on the implementation of Regex::replacen
74    for (idx, captures) in caps.enumerate() {
75        // Safe to unrap because the 0th index always includes the full match.
76        let m = captures.get(0).unwrap(); // full match
77
78        let mut value = captures_to_value(&captures, capture_names.clone());
79        runner.map_value(ctx, &mut value)?;
80        let replacement = value.try_bytes_utf8_lossy()?;
81
82        replaced.push_str(&haystack[last_match..m.start()]);
83        replaced.push_str(&replacement);
84        last_match = m.end();
85        if idx >= limit {
86            break;
87        }
88    }
89    // add the final component
90    replaced.push_str(&haystack[last_match..]);
91    Ok(replaced.into())
92}
93
94const STRING_NAME: &str = "string";
95const CAPTURES_NAME: &str = "captures";
96
97fn captures_to_value(captures: &Captures, capture_names: CaptureNames) -> Value {
98    let mut object: ObjectMap = BTreeMap::new();
99
100    // The full match, named "string"
101    object.insert(STRING_NAME.into(), captures.get(0).unwrap().as_str().into());
102    // The length includes the total match, so subtract 1
103    let mut capture_groups: Vec<Value> = Vec::with_capacity(captures.len() - 1);
104
105    // We skip the first entry, because it is for the full match, which we have already
106    // extracted
107    for (idx, name) in capture_names.enumerate().skip(1) {
108        let value: Value = if let Some(group) = captures.get(idx) {
109            group.as_str().into()
110        } else {
111            Value::Null
112        };
113        if let Some(name) = name {
114            object.insert(name.into(), value.clone());
115        }
116        capture_groups.push(value);
117    }
118
119    object.insert(CAPTURES_NAME.into(), capture_groups.into());
120
121    object.into()
122}
123
124#[derive(Clone, Copy, Debug)]
125pub struct ReplaceWith;
126
127impl Function for ReplaceWith {
128    fn identifier(&self) -> &'static str {
129        "replace_with"
130    }
131
132    fn usage(&self) -> &'static str {
133        indoc! {"
134            Replaces all matching instances of `pattern` using a closure.
135
136            The `pattern` argument accepts a regular expression that can use capture groups.
137
138            The function uses the function closure syntax to compute the replacement values.
139
140            The closure takes a single parameter, which is an array, where the first item is always
141            present and contains the entire string that matched `pattern`. The items from index one on
142            contain the capture groups of the corresponding index. If a capture group is optional, the
143            value may be null if it didn't match.
144
145            The value returned by the closure must be a string and will replace the section of
146            the input that was matched.
147
148            This returns a new string with the replacements, the original string is not mutated.
149        "}
150    }
151
152    fn category(&self) -> &'static str {
153        Category::String.as_ref()
154    }
155
156    fn return_kind(&self) -> u16 {
157        kind::BYTES
158    }
159
160    fn parameters(&self) -> &'static [Parameter] {
161        PARAMETERS.as_slice()
162    }
163
164    fn examples(&self) -> &'static [Example] {
165        &[
166            example! {
167                title: "Capitalize words",
168                source: indoc! {r#"
169                    replace_with("apples and bananas", r'\b(\w)(\w*)') -> |match| {
170                        upcase!(match.captures[0]) + string!(match.captures[1])
171                    }
172                "#},
173                result: Ok("Apples And Bananas"),
174            },
175            example! {
176                title: "Replace with hash",
177                source: indoc! {r#"
178                    replace_with("email from test@example.com", r'\w+@example.com') -> |match| {
179                        sha2(match.string, variant: "SHA-512/224")
180                    }
181                "#},
182                result: Ok("email from adf6e1bc4415d24912bd93072ad34ef825a7b6eb3bf53f68def1fc17"),
183            },
184            example! {
185                title: "Replace first instance",
186                source: indoc! {r#"
187                    replace_with("Apples and Apples", r'(?i)apples|cones', count: 1) -> |match| {
188                        "Pine" + downcase(match.string)
189                    }
190                "#},
191                result: Ok("Pineapples and Apples"),
192            },
193            example! {
194                title: "Named capture group",
195                source: indoc! {r#"
196                    replace_with("level=error A message", r'level=(?P<level>\w+)') -> |match| {
197                        lvl = upcase!(match.level)
198                        "[{{lvl}}]"
199                    }
200                "#},
201                result: Ok("[ERROR] A message"),
202            },
203            example! {
204                title: "Replace with processed capture group",
205                source: indoc! {r#"
206                    replace_with(s'Got message: {"msg": "b"}', r'message: (\{.*\})') -> |m| {
207                        to_string!(parse_json!(m.captures[0]).msg)
208                    }
209                "#},
210                result: Ok("Got b"),
211            },
212            example! {
213                title: "Replace with optional capture group",
214                source: indoc! {r#"
215                    replace_with("bar of chocolate and bar of gold", r'bar( of gold)?') -> |m| {
216                        if m.captures[0] == null { "pile" } else { "money" }
217                    }
218                "#},
219                result: Ok("pile of chocolate and money"),
220            },
221        ]
222    }
223
224    fn compile(
225        &self,
226        _state: &state::TypeState,
227        _ctx: &mut FunctionCompileContext,
228        arguments: ArgumentList,
229    ) -> Compiled {
230        let value = arguments.required("value");
231        let pattern = arguments.required("pattern");
232        let count = arguments.optional("count");
233
234        let closure = arguments.required_closure()?;
235
236        Ok(ReplaceWithFn {
237            value,
238            pattern,
239            count,
240            closure,
241        }
242        .as_expr())
243    }
244
245    fn closure(&self) -> Option<closure::Definition> {
246        use closure::{Definition, Input, Output, Variable, VariableKind};
247
248        let match_type = Collection::from_parts(
249            BTreeMap::from([
250                (STRING_NAME.into(), Kind::bytes()),
251                (
252                    CAPTURES_NAME.into(),
253                    Kind::array(Collection::from_unknown(Kind::bytes().or_null())),
254                ),
255            ]),
256            Kind::bytes().or_null(),
257        );
258
259        Some(Definition {
260            inputs: vec![Input {
261                parameter_keyword: "value",
262                kind: Kind::bytes(),
263                variables: vec![Variable {
264                    kind: VariableKind::Exact(Kind::object(match_type)),
265                }],
266                output: Output::Kind(Kind::bytes()),
267                example: example! {
268                    title: "replace with hash",
269                    source: r#"replace_with("received email from a@example.com", pattern: r'\w+@\w+\.\w+') -> |match| { sha2(match.string) }"#,
270                    result: Ok(
271                        "received email from 896bdca840c9304a5d0bdbeacc4ef359e3093f80c9777c9967e31ba0ff99ed58",
272                    ),
273                },
274            }],
275            is_iterator: false,
276        })
277    }
278}
279
280#[derive(Debug, Clone)]
281struct ReplaceWithFn {
282    value: Box<dyn Expression>,
283    pattern: Box<dyn Expression>,
284    count: Option<Box<dyn Expression>>,
285    closure: Closure,
286}
287
288impl FunctionExpression for ReplaceWithFn {
289    fn resolve(&self, ctx: &mut Context) -> ExpressionResult<Value> {
290        let value = self.value.resolve(ctx)?;
291        let pattern = self.pattern.resolve(ctx)?;
292        let pattern = pattern
293            .as_regex()
294            .ok_or_else(|| ExpressionError::from("failed to resolve regex"))?;
295        for name in pattern.capture_names().flatten() {
296            if name == STRING_NAME || name == CAPTURES_NAME {
297                return Err(ExpressionError::from(
298                    r#"Capture group cannot be named "string" or "captures""#,
299                ));
300            }
301        }
302        let count = self
303            .count
304            .map_resolve_with_default(ctx, || DEFAULT_COUNT.clone())?;
305        let Closure {
306            variables, block, ..
307        } = &self.closure;
308
309        let runner = closure::Runner::new(variables, |ctx| block.resolve(ctx));
310
311        replace_with(value, pattern, count, ctx, &runner)
312    }
313
314    fn type_def(&self, _: &state::TypeState) -> TypeDef {
315        TypeDef::bytes().infallible()
316    }
317}