vrl/stdlib/
split.rs

1use crate::compiler::prelude::*;
2
3fn split(value: &Value, limit: Value, pattern: Value) -> Resolved {
4    let string = value.try_bytes_utf8_lossy()?;
5    let limit = match limit.try_integer()? {
6        x if x < 0 => 0,
7        // TODO consider removal options
8        #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
9        x => x as usize,
10    };
11    match pattern {
12        Value::Regex(pattern) => Ok(pattern
13            .splitn(string.as_ref(), limit)
14            .collect::<Vec<_>>()
15            .into()),
16        Value::Bytes(bytes) => {
17            let pattern = String::from_utf8_lossy(&bytes);
18
19            Ok(string
20                .splitn(limit, pattern.as_ref())
21                .collect::<Vec<_>>()
22                .into())
23        }
24        value => Err(ValueError::Expected {
25            got: value.kind(),
26            expected: Kind::regex() | Kind::bytes(),
27        }
28        .into()),
29    }
30}
31
32#[derive(Clone, Copy, Debug)]
33pub struct Split;
34
35impl Function for Split {
36    fn identifier(&self) -> &'static str {
37        "split"
38    }
39
40    fn usage(&self) -> &'static str {
41        "Splits the `value` string using `pattern`."
42    }
43
44    fn category(&self) -> &'static str {
45        Category::String.as_ref()
46    }
47
48    fn return_kind(&self) -> u16 {
49        kind::ARRAY
50    }
51
52    fn return_rules(&self) -> &'static [&'static str] {
53        &[
54            "If `limit` is specified, the remainder of the string is returned unsplit after `limit` has been reached.",
55        ]
56    }
57
58    fn parameters(&self) -> &'static [Parameter] {
59        const PARAMETERS: &[Parameter] = &[
60            Parameter::required("value", kind::BYTES, "The string to split."),
61            Parameter::required(
62                "pattern",
63                kind::BYTES | kind::REGEX,
64                "The string is split whenever this pattern is matched.",
65            ),
66            Parameter::optional(
67                "limit",
68                kind::INTEGER,
69                "The maximum number of substrings to return.",
70            ),
71        ];
72        PARAMETERS
73    }
74
75    fn examples(&self) -> &'static [Example] {
76        &[
77            example! {
78                title: "Split a string (no limit)",
79                source: r#"split("apples and pears and bananas", " and ")"#,
80                result: Ok(r#"["apples", "pears", "bananas"]"#),
81            },
82            example! {
83                title: "Split a string (with a limit)",
84                source: r#"split("apples and pears and bananas", " and ", limit: 2)"#,
85                result: Ok(r#"["apples", "pears and bananas"]"#),
86            },
87            example! {
88                title: "Split string",
89                source: r#"split("foobar", "b")"#,
90                result: Ok(r#"["foo", "ar"]"#),
91            },
92            example! {
93                title: "Split regex",
94                source: r#"split("barbaz", r'ba')"#,
95                result: Ok(r#"["", "r", "z"]"#),
96            },
97        ]
98    }
99
100    fn compile(
101        &self,
102        _state: &state::TypeState,
103        _ctx: &mut FunctionCompileContext,
104        arguments: ArgumentList,
105    ) -> Compiled {
106        let value = arguments.required("value");
107        let pattern = arguments.required("pattern");
108        let limit = arguments.optional("limit").unwrap_or(expr!(999_999_999));
109
110        Ok(SplitFn {
111            value,
112            pattern,
113            limit,
114        }
115        .as_expr())
116    }
117}
118
119#[derive(Debug, Clone)]
120pub(crate) struct SplitFn {
121    value: Box<dyn Expression>,
122    pattern: Box<dyn Expression>,
123    limit: Box<dyn Expression>,
124}
125
126impl FunctionExpression for SplitFn {
127    fn resolve(&self, ctx: &mut Context) -> Resolved {
128        let value = self.value.resolve(ctx)?;
129        let limit = self.limit.resolve(ctx)?;
130        let pattern = self.pattern.resolve(ctx)?;
131
132        split(&value, limit, pattern)
133    }
134
135    fn type_def(&self, _: &state::TypeState) -> TypeDef {
136        TypeDef::array(Collection::from_unknown(Kind::bytes())).infallible()
137    }
138}
139
140#[cfg(test)]
141#[allow(clippy::trivial_regex)]
142mod test {
143    use super::*;
144    use crate::value;
145
146    test_function![
147        split => Split;
148
149        empty {
150            args: func_args![value: "",
151                             pattern: " "
152            ],
153            want: Ok(value!([""])),
154            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
155        }
156
157        single {
158            args: func_args![value: "foo",
159                             pattern: " "
160            ],
161            want: Ok(value!(["foo"])),
162            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
163        }
164
165        long {
166            args: func_args![value: "This is a long string.",
167                             pattern: " "
168            ],
169            want: Ok(value!(["This", "is", "a", "long", "string."])),
170            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
171        }
172
173        regex {
174            args: func_args![value: "This is a long string",
175                             pattern: Value::Regex(regex::Regex::new(" ").unwrap().into()),
176                             limit: 2
177            ],
178            want: Ok(value!(["This", "is a long string"])),
179            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
180        }
181
182        non_space {
183            args: func_args![value: "ThisaisAlongAstring.",
184                             pattern: Value::Regex(regex::Regex::new("(?i)a").unwrap().into())
185            ],
186            want: Ok(value!(["This", "is", "long", "string."])),
187            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
188        }
189
190        unicode {
191             args: func_args![value: "˙ƃuᴉɹʇs ƃuol ɐ sᴉ sᴉɥ┴",
192                              pattern: " "
193             ],
194             want: Ok(value!(["˙ƃuᴉɹʇs", "ƃuol", "ɐ", "sᴉ", "sᴉɥ┴"])),
195             tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
196         }
197
198        limit {
199            args: func_args![value: "This is a long string.",
200                             pattern: " ",
201                             limit: 2
202            ],
203            want: Ok(value!(["This", "is a long string."])),
204            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
205        }
206
207        over_length_limit {
208            args: func_args![value: "This is a long string.",
209                             pattern: " ",
210                             limit: 2000
211            ],
212            want: Ok(value!(["This", "is", "a", "long", "string."])),
213            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
214        }
215
216        zero_limit {
217            args: func_args![value: "This is a long string.",
218                             pattern: " ",
219                             limit: 0
220            ],
221            want: Ok(value!([])),
222            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
223        }
224
225        negative_limit {
226            args: func_args![value: "This is a long string.",
227                             pattern: " ",
228                             limit: -1
229            ],
230            want: Ok(value!([])),
231            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
232        }
233    ];
234}