vrl/stdlib/
parse_csv.rs

1use crate::compiler::prelude::*;
2use csv::ReaderBuilder;
3use std::sync::LazyLock;
4
5static DEFAULT_DELIMITER: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from(",")));
6
7static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
8    vec![
9        Parameter::required("value", kind::BYTES, "The string to parse."),
10        Parameter::optional(
11            "delimiter",
12            kind::BYTES,
13            "The field delimiter to use when parsing. Must be a single-byte utf8 character.",
14        )
15        .default(&DEFAULT_DELIMITER),
16    ]
17});
18
19fn parse_csv(csv_string: Value, delimiter: Value) -> Resolved {
20    let csv_string = csv_string.try_bytes()?;
21    let delimiter = delimiter.try_bytes()?;
22    if delimiter.len() != 1 {
23        return Err("delimiter must be a single character".into());
24    }
25    let delimiter = delimiter[0];
26    let reader = ReaderBuilder::new()
27        .has_headers(false)
28        .delimiter(delimiter)
29        .from_reader(&*csv_string);
30    reader
31        .into_byte_records()
32        .next()
33        .transpose()
34        .map_err(|err| format!("invalid csv record: {err}").into()) // shouldn't really happen
35        .map(|record| {
36            record
37                .map(|record| {
38                    record
39                        .iter()
40                        .map(|x| Bytes::copy_from_slice(x).into())
41                        .collect::<Vec<Value>>()
42                })
43                .unwrap_or_default()
44                .into()
45        })
46}
47
48#[derive(Clone, Copy, Debug)]
49pub struct ParseCsv;
50
51impl Function for ParseCsv {
52    fn identifier(&self) -> &'static str {
53        "parse_csv"
54    }
55
56    fn usage(&self) -> &'static str {
57        "Parses a single CSV formatted row. Only the first row is parsed in case of multiline input value."
58    }
59
60    fn category(&self) -> &'static str {
61        Category::Parse.as_ref()
62    }
63
64    fn internal_failure_reasons(&self) -> &'static [&'static str] {
65        &[
66            "The delimiter must be a single-byte UTF-8 character.",
67            "`value` is not a valid CSV string.",
68        ]
69    }
70
71    fn return_kind(&self) -> u16 {
72        kind::ARRAY
73    }
74
75    fn notices(&self) -> &'static [&'static str] {
76        &[indoc! {"
77            All values are returned as strings. We recommend manually coercing values to desired
78            types as you see fit.
79        "}]
80    }
81
82    fn examples(&self) -> &'static [Example] {
83        &[
84            example! {
85                title: "Parse a single CSV formatted row",
86                source: r#"parse_csv!(s'foo,bar,"foo "", bar"')"#,
87                result: Ok(r#"["foo", "bar", "foo \", bar"]"#),
88            },
89            example! {
90                title: "Parse a single CSV formatted row with custom delimiter",
91                source: r#"parse_csv!("foo bar", delimiter: " ")"#,
92                result: Ok(r#"["foo", "bar"]"#),
93            },
94        ]
95    }
96
97    fn compile(
98        &self,
99        _state: &state::TypeState,
100        _ctx: &mut FunctionCompileContext,
101        arguments: ArgumentList,
102    ) -> Compiled {
103        let value = arguments.required("value");
104        let delimiter = arguments.optional("delimiter");
105        Ok(ParseCsvFn { value, delimiter }.as_expr())
106    }
107
108    fn parameters(&self) -> &'static [Parameter] {
109        PARAMETERS.as_slice()
110    }
111}
112
113#[derive(Debug, Clone)]
114struct ParseCsvFn {
115    value: Box<dyn Expression>,
116    delimiter: Option<Box<dyn Expression>>,
117}
118
119impl FunctionExpression for ParseCsvFn {
120    fn resolve(&self, ctx: &mut Context) -> Resolved {
121        let csv_string = self.value.resolve(ctx)?;
122        let delimiter = self
123            .delimiter
124            .map_resolve_with_default(ctx, || DEFAULT_DELIMITER.clone())?;
125
126        parse_csv(csv_string, delimiter)
127    }
128
129    fn type_def(&self, _: &state::TypeState) -> TypeDef {
130        TypeDef::array(inner_kind()).fallible()
131    }
132}
133
134#[inline]
135fn inner_kind() -> Collection<Index> {
136    let mut v = Collection::any();
137    v.set_unknown(Kind::bytes());
138    v
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144    use crate::value;
145
146    test_function![
147        parse_csv => ParseCsv;
148
149        valid {
150            args: func_args![value: value!("foo,bar,\"foo \"\", bar\"")],
151            want: Ok(value!(["foo", "bar", "foo \", bar"])),
152            tdef: TypeDef::array(inner_kind()).fallible(),
153        }
154
155        invalid_utf8 {
156            args: func_args![value: value!(Bytes::copy_from_slice(&b"foo,b\xFFar"[..]))],
157            want: Ok(value!(vec!["foo".into(), value!(Bytes::copy_from_slice(&b"b\xFFar"[..]))])),
158            tdef: TypeDef::array(inner_kind()).fallible(),
159        }
160
161        custom_delimiter {
162            args: func_args![value: value!("foo bar"), delimiter: value!(" ")],
163            want: Ok(value!(["foo", "bar"])),
164            tdef: TypeDef::array(inner_kind()).fallible(),
165        }
166
167        invalid_delimiter {
168            args: func_args![value: value!("foo bar"), delimiter: value!(",,")],
169            want: Err("delimiter must be a single character"),
170            tdef: TypeDef::array(inner_kind()).fallible(),
171        }
172
173        single_value {
174            args: func_args![value: value!("foo")],
175            want: Ok(value!(["foo"])),
176            tdef: TypeDef::array(inner_kind()).fallible(),
177        }
178
179        empty_string {
180            args: func_args![value: value!("")],
181            want: Ok(value!([])),
182            tdef: TypeDef::array(inner_kind()).fallible(),
183        }
184
185        multiple_lines {
186            args: func_args![value: value!("first,line\nsecond,line,with,more,fields")],
187            want: Ok(value!(["first", "line"])),
188            tdef: TypeDef::array(inner_kind()).fallible(),
189        }
190    ];
191}