1use crate::compiler::prelude::*;
2use csv::ReaderBuilder;
3use std::sync::LazyLock;
4
5static DEFAULT_DELIMITER: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from(",")));
6
7static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
8 vec![
9 Parameter::required("value", kind::BYTES, "The string to parse."),
10 Parameter::optional(
11 "delimiter",
12 kind::BYTES,
13 "The field delimiter to use when parsing. Must be a single-byte utf8 character.",
14 )
15 .default(&DEFAULT_DELIMITER),
16 ]
17});
18
19fn parse_csv(csv_string: Value, delimiter: Value) -> Resolved {
20 let csv_string = csv_string.try_bytes()?;
21 let delimiter = delimiter.try_bytes()?;
22 if delimiter.len() != 1 {
23 return Err("delimiter must be a single character".into());
24 }
25 let delimiter = delimiter[0];
26 let reader = ReaderBuilder::new()
27 .has_headers(false)
28 .delimiter(delimiter)
29 .from_reader(&*csv_string);
30 reader
31 .into_byte_records()
32 .next()
33 .transpose()
34 .map_err(|err| format!("invalid csv record: {err}").into()) .map(|record| {
36 record
37 .map(|record| {
38 record
39 .iter()
40 .map(|x| Bytes::copy_from_slice(x).into())
41 .collect::<Vec<Value>>()
42 })
43 .unwrap_or_default()
44 .into()
45 })
46}
47
48#[derive(Clone, Copy, Debug)]
49pub struct ParseCsv;
50
51impl Function for ParseCsv {
52 fn identifier(&self) -> &'static str {
53 "parse_csv"
54 }
55
56 fn usage(&self) -> &'static str {
57 "Parses a single CSV formatted row. Only the first row is parsed in case of multiline input value."
58 }
59
60 fn category(&self) -> &'static str {
61 Category::Parse.as_ref()
62 }
63
64 fn internal_failure_reasons(&self) -> &'static [&'static str] {
65 &[
66 "The delimiter must be a single-byte UTF-8 character.",
67 "`value` is not a valid CSV string.",
68 ]
69 }
70
71 fn return_kind(&self) -> u16 {
72 kind::ARRAY
73 }
74
75 fn notices(&self) -> &'static [&'static str] {
76 &[indoc! {"
77 All values are returned as strings. We recommend manually coercing values to desired
78 types as you see fit.
79 "}]
80 }
81
82 fn examples(&self) -> &'static [Example] {
83 &[
84 example! {
85 title: "Parse a single CSV formatted row",
86 source: r#"parse_csv!(s'foo,bar,"foo "", bar"')"#,
87 result: Ok(r#"["foo", "bar", "foo \", bar"]"#),
88 },
89 example! {
90 title: "Parse a single CSV formatted row with custom delimiter",
91 source: r#"parse_csv!("foo bar", delimiter: " ")"#,
92 result: Ok(r#"["foo", "bar"]"#),
93 },
94 ]
95 }
96
97 fn compile(
98 &self,
99 _state: &state::TypeState,
100 _ctx: &mut FunctionCompileContext,
101 arguments: ArgumentList,
102 ) -> Compiled {
103 let value = arguments.required("value");
104 let delimiter = arguments.optional("delimiter");
105 Ok(ParseCsvFn { value, delimiter }.as_expr())
106 }
107
108 fn parameters(&self) -> &'static [Parameter] {
109 PARAMETERS.as_slice()
110 }
111}
112
113#[derive(Debug, Clone)]
114struct ParseCsvFn {
115 value: Box<dyn Expression>,
116 delimiter: Option<Box<dyn Expression>>,
117}
118
119impl FunctionExpression for ParseCsvFn {
120 fn resolve(&self, ctx: &mut Context) -> Resolved {
121 let csv_string = self.value.resolve(ctx)?;
122 let delimiter = self
123 .delimiter
124 .map_resolve_with_default(ctx, || DEFAULT_DELIMITER.clone())?;
125
126 parse_csv(csv_string, delimiter)
127 }
128
129 fn type_def(&self, _: &state::TypeState) -> TypeDef {
130 TypeDef::array(inner_kind()).fallible()
131 }
132}
133
134#[inline]
135fn inner_kind() -> Collection<Index> {
136 let mut v = Collection::any();
137 v.set_unknown(Kind::bytes());
138 v
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144 use crate::value;
145
146 test_function![
147 parse_csv => ParseCsv;
148
149 valid {
150 args: func_args![value: value!("foo,bar,\"foo \"\", bar\"")],
151 want: Ok(value!(["foo", "bar", "foo \", bar"])),
152 tdef: TypeDef::array(inner_kind()).fallible(),
153 }
154
155 invalid_utf8 {
156 args: func_args![value: value!(Bytes::copy_from_slice(&b"foo,b\xFFar"[..]))],
157 want: Ok(value!(vec!["foo".into(), value!(Bytes::copy_from_slice(&b"b\xFFar"[..]))])),
158 tdef: TypeDef::array(inner_kind()).fallible(),
159 }
160
161 custom_delimiter {
162 args: func_args![value: value!("foo bar"), delimiter: value!(" ")],
163 want: Ok(value!(["foo", "bar"])),
164 tdef: TypeDef::array(inner_kind()).fallible(),
165 }
166
167 invalid_delimiter {
168 args: func_args![value: value!("foo bar"), delimiter: value!(",,")],
169 want: Err("delimiter must be a single character"),
170 tdef: TypeDef::array(inner_kind()).fallible(),
171 }
172
173 single_value {
174 args: func_args![value: value!("foo")],
175 want: Ok(value!(["foo"])),
176 tdef: TypeDef::array(inner_kind()).fallible(),
177 }
178
179 empty_string {
180 args: func_args![value: value!("")],
181 want: Ok(value!([])),
182 tdef: TypeDef::array(inner_kind()).fallible(),
183 }
184
185 multiple_lines {
186 args: func_args![value: value!("first,line\nsecond,line,with,more,fields")],
187 want: Ok(value!(["first", "line"])),
188 tdef: TypeDef::array(inner_kind()).fallible(),
189 }
190 ];
191}