1use std::collections::BTreeMap;
2
3use regex::{CaptureMatches, CaptureNames, Captures, Regex};
4
5use crate::compiler::prelude::*;
6use std::sync::LazyLock;
7
8static DEFAULT_COUNT: LazyLock<Value> = LazyLock::new(|| Value::Integer(-1));
9
10static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
11 vec![
12 Parameter::required("value", kind::BYTES, "The original string."),
13 Parameter::required(
14 "pattern",
15 kind::REGEX,
16 "Replace all matches of this pattern. Must be a regular expression.",
17 ),
18 Parameter::optional(
19 "count",
20 kind::INTEGER,
21 "The maximum number of replacements to perform. `-1` means replace all matches.",
22 )
23 .default(&DEFAULT_COUNT),
24 ]
25});
26
27fn replace_with<T>(
28 value: Value,
29 pattern: &Regex,
30 count: Value,
31 ctx: &mut Context,
32 runner: &closure::Runner<T>,
33) -> Resolved
34where
35 T: Fn(&mut Context) -> Result<Value, ExpressionError>,
36{
37 let haystack = value.try_bytes_utf8_lossy()?;
38 let count = match count.try_integer()? {
39 #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
41 i if i > 0 => i as usize,
42 i if i < 0 => 0,
43 _ => return Ok(value),
45 };
46 let captures = pattern.captures_iter(&haystack);
47 make_replacement(
48 captures,
49 &haystack,
50 count,
51 &pattern.capture_names(),
52 ctx,
53 runner,
54 )
55}
56
57fn make_replacement<T>(
58 caps: CaptureMatches,
59 haystack: &str,
60 count: usize,
61 capture_names: &CaptureNames,
62 ctx: &mut Context,
63 runner: &closure::Runner<T>,
64) -> Resolved
65where
66 T: Fn(&mut Context) -> Result<Value, ExpressionError>,
67{
68 let mut replaced = String::with_capacity(haystack.len());
70 let limit = if count == 0 { usize::MAX } else { count - 1 };
71 let mut last_match = 0;
72 for (idx, captures) in caps.enumerate() {
75 let m = captures.get(0).unwrap(); let mut value = captures_to_value(&captures, capture_names.clone());
79 runner.map_value(ctx, &mut value)?;
80 let replacement = value.try_bytes_utf8_lossy()?;
81
82 replaced.push_str(&haystack[last_match..m.start()]);
83 replaced.push_str(&replacement);
84 last_match = m.end();
85 if idx >= limit {
86 break;
87 }
88 }
89 replaced.push_str(&haystack[last_match..]);
91 Ok(replaced.into())
92}
93
94const STRING_NAME: &str = "string";
95const CAPTURES_NAME: &str = "captures";
96
97fn captures_to_value(captures: &Captures, capture_names: CaptureNames) -> Value {
98 let mut object: ObjectMap = BTreeMap::new();
99
100 object.insert(STRING_NAME.into(), captures.get(0).unwrap().as_str().into());
102 let mut capture_groups: Vec<Value> = Vec::with_capacity(captures.len() - 1);
104
105 for (idx, name) in capture_names.enumerate().skip(1) {
108 let value: Value = if let Some(group) = captures.get(idx) {
109 group.as_str().into()
110 } else {
111 Value::Null
112 };
113 if let Some(name) = name {
114 object.insert(name.into(), value.clone());
115 }
116 capture_groups.push(value);
117 }
118
119 object.insert(CAPTURES_NAME.into(), capture_groups.into());
120
121 object.into()
122}
123
124#[derive(Clone, Copy, Debug)]
125pub struct ReplaceWith;
126
127impl Function for ReplaceWith {
128 fn identifier(&self) -> &'static str {
129 "replace_with"
130 }
131
132 fn usage(&self) -> &'static str {
133 indoc! {"
134 Replaces all matching instances of `pattern` using a closure.
135
136 The `pattern` argument accepts a regular expression that can use capture groups.
137
138 The function uses the function closure syntax to compute the replacement values.
139
140 The closure takes a single parameter, which is an array, where the first item is always
141 present and contains the entire string that matched `pattern`. The items from index one on
142 contain the capture groups of the corresponding index. If a capture group is optional, the
143 value may be null if it didn't match.
144
145 The value returned by the closure must be a string and will replace the section of
146 the input that was matched.
147
148 This returns a new string with the replacements, the original string is not mutated.
149 "}
150 }
151
152 fn category(&self) -> &'static str {
153 Category::String.as_ref()
154 }
155
156 fn return_kind(&self) -> u16 {
157 kind::BYTES
158 }
159
160 fn parameters(&self) -> &'static [Parameter] {
161 PARAMETERS.as_slice()
162 }
163
164 fn examples(&self) -> &'static [Example] {
165 &[
166 example! {
167 title: "Capitalize words",
168 source: indoc! {r#"
169 replace_with("apples and bananas", r'\b(\w)(\w*)') -> |match| {
170 upcase!(match.captures[0]) + string!(match.captures[1])
171 }
172 "#},
173 result: Ok("Apples And Bananas"),
174 },
175 example! {
176 title: "Replace with hash",
177 source: indoc! {r#"
178 replace_with("email from test@example.com", r'\w+@example.com') -> |match| {
179 sha2(match.string, variant: "SHA-512/224")
180 }
181 "#},
182 result: Ok("email from adf6e1bc4415d24912bd93072ad34ef825a7b6eb3bf53f68def1fc17"),
183 },
184 example! {
185 title: "Replace first instance",
186 source: indoc! {r#"
187 replace_with("Apples and Apples", r'(?i)apples|cones', count: 1) -> |match| {
188 "Pine" + downcase(match.string)
189 }
190 "#},
191 result: Ok("Pineapples and Apples"),
192 },
193 example! {
194 title: "Named capture group",
195 source: indoc! {r#"
196 replace_with("level=error A message", r'level=(?P<level>\w+)') -> |match| {
197 lvl = upcase!(match.level)
198 "[{{lvl}}]"
199 }
200 "#},
201 result: Ok("[ERROR] A message"),
202 },
203 example! {
204 title: "Replace with processed capture group",
205 source: indoc! {r#"
206 replace_with(s'Got message: {"msg": "b"}', r'message: (\{.*\})') -> |m| {
207 to_string!(parse_json!(m.captures[0]).msg)
208 }
209 "#},
210 result: Ok("Got b"),
211 },
212 example! {
213 title: "Replace with optional capture group",
214 source: indoc! {r#"
215 replace_with("bar of chocolate and bar of gold", r'bar( of gold)?') -> |m| {
216 if m.captures[0] == null { "pile" } else { "money" }
217 }
218 "#},
219 result: Ok("pile of chocolate and money"),
220 },
221 ]
222 }
223
224 fn compile(
225 &self,
226 _state: &state::TypeState,
227 _ctx: &mut FunctionCompileContext,
228 arguments: ArgumentList,
229 ) -> Compiled {
230 let value = arguments.required("value");
231 let pattern = arguments.required("pattern");
232 let count = arguments.optional("count");
233
234 let closure = arguments.required_closure()?;
235
236 Ok(ReplaceWithFn {
237 value,
238 pattern,
239 count,
240 closure,
241 }
242 .as_expr())
243 }
244
245 fn closure(&self) -> Option<closure::Definition> {
246 use closure::{Definition, Input, Output, Variable, VariableKind};
247
248 let match_type = Collection::from_parts(
249 BTreeMap::from([
250 (STRING_NAME.into(), Kind::bytes()),
251 (
252 CAPTURES_NAME.into(),
253 Kind::array(Collection::from_unknown(Kind::bytes().or_null())),
254 ),
255 ]),
256 Kind::bytes().or_null(),
257 );
258
259 Some(Definition {
260 inputs: vec![Input {
261 parameter_keyword: "value",
262 kind: Kind::bytes(),
263 variables: vec![Variable {
264 kind: VariableKind::Exact(Kind::object(match_type)),
265 }],
266 output: Output::Kind(Kind::bytes()),
267 example: example! {
268 title: "replace with hash",
269 source: r#"replace_with("received email from a@example.com", pattern: r'\w+@\w+\.\w+') -> |match| { sha2(match.string) }"#,
270 result: Ok(
271 "received email from 896bdca840c9304a5d0bdbeacc4ef359e3093f80c9777c9967e31ba0ff99ed58",
272 ),
273 },
274 }],
275 is_iterator: false,
276 })
277 }
278}
279
280#[derive(Debug, Clone)]
281struct ReplaceWithFn {
282 value: Box<dyn Expression>,
283 pattern: Box<dyn Expression>,
284 count: Option<Box<dyn Expression>>,
285 closure: Closure,
286}
287
288impl FunctionExpression for ReplaceWithFn {
289 fn resolve(&self, ctx: &mut Context) -> ExpressionResult<Value> {
290 let value = self.value.resolve(ctx)?;
291 let pattern = self.pattern.resolve(ctx)?;
292 let pattern = pattern
293 .as_regex()
294 .ok_or_else(|| ExpressionError::from("failed to resolve regex"))?;
295 for name in pattern.capture_names().flatten() {
296 if name == STRING_NAME || name == CAPTURES_NAME {
297 return Err(ExpressionError::from(
298 r#"Capture group cannot be named "string" or "captures""#,
299 ));
300 }
301 }
302 let count = self
303 .count
304 .map_resolve_with_default(ctx, || DEFAULT_COUNT.clone())?;
305 let Closure {
306 variables, block, ..
307 } = &self.closure;
308
309 let runner = closure::Runner::new(variables, |ctx| block.resolve(ctx));
310
311 replace_with(value, pattern, count, ctx, &runner)
312 }
313
314 fn type_def(&self, _: &state::TypeState) -> TypeDef {
315 TypeDef::bytes().infallible()
316 }
317}