vrl/stdlib/
chunks.rs

1use crate::compiler::prelude::*;
2
3fn chunks(value: Value, chunk_size: Value) -> Resolved {
4    let bytes = value.try_bytes()?;
5    let chunk_size = chunk_size.try_integer()?;
6
7    if chunk_size < 1 {
8        return Err(r#""chunk_size" must be at least 1 byte"#.into());
9    }
10
11    if let Ok(chunk_size) = usize::try_from(chunk_size) {
12        Ok(bytes.chunks(chunk_size).collect::<Vec<_>>().into())
13    } else {
14        Err(format!(
15            r#""chunk_size" is too large: must be at most {} bytes"#,
16            usize::MAX
17        )
18        .into())
19    }
20}
21
22#[derive(Clone, Copy, Debug)]
23pub struct Chunks;
24
25impl Function for Chunks {
26    fn identifier(&self) -> &'static str {
27        "chunks"
28    }
29
30    fn usage(&self) -> &'static str {
31        "Chunks `value` into slices of length `chunk_size` bytes."
32    }
33
34    fn category(&self) -> &'static str {
35        Category::Array.as_ref()
36    }
37
38    fn internal_failure_reasons(&self) -> &'static [&'static str] {
39        &[
40            "`chunk_size` must be at least 1 byte.",
41            "`chunk_size` is too large.",
42        ]
43    }
44
45    fn return_kind(&self) -> u16 {
46        kind::ARRAY
47    }
48
49    fn return_rules(&self) -> &'static [&'static str] {
50        &[
51            "`chunks` is considered fallible if the supplied `chunk_size` is an expression, and infallible if it's a literal integer.",
52        ]
53    }
54
55    fn parameters(&self) -> &'static [Parameter] {
56        const PARAMETERS: &[Parameter] = &[
57            Parameter::required("value", kind::BYTES, "The array of bytes to split."),
58            Parameter::required(
59                "chunk_size",
60                kind::INTEGER,
61                "The desired length of each chunk in bytes. This may be constrained by the host platform architecture.",
62            ),
63        ];
64        PARAMETERS
65    }
66
67    fn examples(&self) -> &'static [Example] {
68        &[
69            example! {
70                title: "Split a string into chunks",
71                source: r#"chunks("abcdefgh", 4)"#,
72                result: Ok(r#"["abcd", "efgh"]"#),
73            },
74            example! {
75                title: "Chunks do not respect unicode code point boundaries",
76                source: r#"chunks("ab你好", 4)"#,
77                result: Ok(r#"["ab�","�好"]"#),
78            },
79        ]
80    }
81
82    fn compile(
83        &self,
84        state: &TypeState,
85        _ctx: &mut FunctionCompileContext,
86        arguments: ArgumentList,
87    ) -> Compiled {
88        let value = arguments.required("value");
89        let chunk_size = arguments.required("chunk_size");
90
91        // chunk_size is converted to a usize, so if a user-supplied Value::Integer (i64) is
92        // larger than the platform's usize::MAX, it could fail to convert.
93        if let Some(literal) = chunk_size.resolve_constant(state)
94            && let Some(integer) = literal.as_integer()
95        {
96            if integer < 1 {
97                return Err(function::Error::InvalidArgument {
98                    keyword: "chunk_size",
99                    value: literal,
100                    error: r#""chunk_size" must be at least 1 byte"#,
101                }
102                .into());
103            }
104
105            if usize::try_from(integer).is_err() {
106                return Err(function::Error::InvalidArgument {
107                    keyword: "chunk_size",
108                    value: literal,
109                    error: r#""chunk_size" is too large"#,
110                }
111                .into());
112            }
113        }
114
115        Ok(ChunksFn { value, chunk_size }.as_expr())
116    }
117}
118
119#[derive(Debug, Clone)]
120struct ChunksFn {
121    value: Box<dyn Expression>,
122    chunk_size: Box<dyn Expression>,
123}
124
125impl FunctionExpression for ChunksFn {
126    fn resolve(&self, ctx: &mut Context) -> Resolved {
127        let value = self.value.resolve(ctx)?;
128        let chunk_size = self.chunk_size.resolve(ctx)?;
129
130        chunks(value, chunk_size)
131    }
132
133    fn type_def(&self, state: &TypeState) -> TypeDef {
134        let not_literal = self.chunk_size.resolve_constant(state).is_none();
135
136        TypeDef::array(Collection::from_unknown(Kind::bytes())).maybe_fallible(not_literal)
137    }
138}
139
140#[cfg(test)]
141mod tests {
142    use crate::value;
143
144    use super::*;
145
146    test_function![
147        chunks => Chunks;
148
149        chunks_data {
150            args: func_args![value: "abcdefgh",
151                             chunk_size: 4,
152            ],
153            want: Ok(value!(["abcd", "efgh"])),
154            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
155        }
156
157        mixed_ascii_unicode {
158            args: func_args![value: "ab你好",
159                             chunk_size: 4,
160                             utf8: false
161            ],
162            want: Ok(value!([b"ab\xe4\xbd", b"\xa0\xe5\xa5\xbd"])),
163            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
164        }
165    ];
166}