1use crate::compiler::prelude::*;
2
3fn split(value: &Value, limit: Value, pattern: Value) -> Resolved {
4 let string = value.try_bytes_utf8_lossy()?;
5 let limit = match limit.try_integer()? {
6 x if x < 0 => 0,
7 #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
9 x => x as usize,
10 };
11 match pattern {
12 Value::Regex(pattern) => Ok(pattern
13 .splitn(string.as_ref(), limit)
14 .collect::<Vec<_>>()
15 .into()),
16 Value::Bytes(bytes) => {
17 let pattern = String::from_utf8_lossy(&bytes);
18
19 Ok(string
20 .splitn(limit, pattern.as_ref())
21 .collect::<Vec<_>>()
22 .into())
23 }
24 value => Err(ValueError::Expected {
25 got: value.kind(),
26 expected: Kind::regex() | Kind::bytes(),
27 }
28 .into()),
29 }
30}
31
32#[derive(Clone, Copy, Debug)]
33pub struct Split;
34
35impl Function for Split {
36 fn identifier(&self) -> &'static str {
37 "split"
38 }
39
40 fn usage(&self) -> &'static str {
41 "Splits the `value` string using `pattern`."
42 }
43
44 fn category(&self) -> &'static str {
45 Category::String.as_ref()
46 }
47
48 fn return_kind(&self) -> u16 {
49 kind::ARRAY
50 }
51
52 fn return_rules(&self) -> &'static [&'static str] {
53 &[
54 "If `limit` is specified, the remainder of the string is returned unsplit after `limit` has been reached.",
55 ]
56 }
57
58 fn parameters(&self) -> &'static [Parameter] {
59 const PARAMETERS: &[Parameter] = &[
60 Parameter::required("value", kind::BYTES, "The string to split."),
61 Parameter::required(
62 "pattern",
63 kind::BYTES | kind::REGEX,
64 "The string is split whenever this pattern is matched.",
65 ),
66 Parameter::optional(
67 "limit",
68 kind::INTEGER,
69 "The maximum number of substrings to return.",
70 ),
71 ];
72 PARAMETERS
73 }
74
75 fn examples(&self) -> &'static [Example] {
76 &[
77 example! {
78 title: "Split a string (no limit)",
79 source: r#"split("apples and pears and bananas", " and ")"#,
80 result: Ok(r#"["apples", "pears", "bananas"]"#),
81 },
82 example! {
83 title: "Split a string (with a limit)",
84 source: r#"split("apples and pears and bananas", " and ", limit: 2)"#,
85 result: Ok(r#"["apples", "pears and bananas"]"#),
86 },
87 example! {
88 title: "Split string",
89 source: r#"split("foobar", "b")"#,
90 result: Ok(r#"["foo", "ar"]"#),
91 },
92 example! {
93 title: "Split regex",
94 source: r#"split("barbaz", r'ba')"#,
95 result: Ok(r#"["", "r", "z"]"#),
96 },
97 ]
98 }
99
100 fn compile(
101 &self,
102 _state: &state::TypeState,
103 _ctx: &mut FunctionCompileContext,
104 arguments: ArgumentList,
105 ) -> Compiled {
106 let value = arguments.required("value");
107 let pattern = arguments.required("pattern");
108 let limit = arguments.optional("limit").unwrap_or(expr!(999_999_999));
109
110 Ok(SplitFn {
111 value,
112 pattern,
113 limit,
114 }
115 .as_expr())
116 }
117}
118
119#[derive(Debug, Clone)]
120pub(crate) struct SplitFn {
121 value: Box<dyn Expression>,
122 pattern: Box<dyn Expression>,
123 limit: Box<dyn Expression>,
124}
125
126impl FunctionExpression for SplitFn {
127 fn resolve(&self, ctx: &mut Context) -> Resolved {
128 let value = self.value.resolve(ctx)?;
129 let limit = self.limit.resolve(ctx)?;
130 let pattern = self.pattern.resolve(ctx)?;
131
132 split(&value, limit, pattern)
133 }
134
135 fn type_def(&self, _: &state::TypeState) -> TypeDef {
136 TypeDef::array(Collection::from_unknown(Kind::bytes())).infallible()
137 }
138}
139
140#[cfg(test)]
141#[allow(clippy::trivial_regex)]
142mod test {
143 use super::*;
144 use crate::value;
145
146 test_function![
147 split => Split;
148
149 empty {
150 args: func_args![value: "",
151 pattern: " "
152 ],
153 want: Ok(value!([""])),
154 tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
155 }
156
157 single {
158 args: func_args![value: "foo",
159 pattern: " "
160 ],
161 want: Ok(value!(["foo"])),
162 tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
163 }
164
165 long {
166 args: func_args![value: "This is a long string.",
167 pattern: " "
168 ],
169 want: Ok(value!(["This", "is", "a", "long", "string."])),
170 tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
171 }
172
173 regex {
174 args: func_args![value: "This is a long string",
175 pattern: Value::Regex(regex::Regex::new(" ").unwrap().into()),
176 limit: 2
177 ],
178 want: Ok(value!(["This", "is a long string"])),
179 tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
180 }
181
182 non_space {
183 args: func_args![value: "ThisaisAlongAstring.",
184 pattern: Value::Regex(regex::Regex::new("(?i)a").unwrap().into())
185 ],
186 want: Ok(value!(["This", "is", "long", "string."])),
187 tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
188 }
189
190 unicode {
191 args: func_args![value: "˙ƃuᴉɹʇs ƃuol ɐ sᴉ sᴉɥ┴",
192 pattern: " "
193 ],
194 want: Ok(value!(["˙ƃuᴉɹʇs", "ƃuol", "ɐ", "sᴉ", "sᴉɥ┴"])),
195 tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
196 }
197
198 limit {
199 args: func_args![value: "This is a long string.",
200 pattern: " ",
201 limit: 2
202 ],
203 want: Ok(value!(["This", "is a long string."])),
204 tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
205 }
206
207 over_length_limit {
208 args: func_args![value: "This is a long string.",
209 pattern: " ",
210 limit: 2000
211 ],
212 want: Ok(value!(["This", "is", "a", "long", "string."])),
213 tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
214 }
215
216 zero_limit {
217 args: func_args![value: "This is a long string.",
218 pattern: " ",
219 limit: 0
220 ],
221 want: Ok(value!([])),
222 tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
223 }
224
225 negative_limit {
226 args: func_args![value: "This is a long string.",
227 pattern: " ",
228 limit: -1
229 ],
230 want: Ok(value!([])),
231 tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
232 }
233 ];
234}