vector/config/
format.rs

1//! Support for loading configs from multiple formats.
2
3#![deny(missing_docs, missing_debug_implementations)]
4
5use std::fmt;
6use std::path::Path;
7use std::str::FromStr;
8
9use serde::{de, Deserialize, Serialize};
10use vector_config_macros::Configurable;
11
12/// A type alias to better capture the semantics.
13pub type FormatHint = Option<Format>;
14
15/// The format used to represent the configuration data.
16#[derive(
17    Debug,
18    Default,
19    Copy,
20    Clone,
21    Eq,
22    PartialEq,
23    Ord,
24    PartialOrd,
25    Hash,
26    Serialize,
27    Deserialize,
28    Configurable,
29)]
30#[serde(rename_all = "snake_case")]
31pub enum Format {
32    /// TOML format is used.
33    #[default]
34    Toml,
35    /// JSON format is used.
36    Json,
37    /// YAML format is used.
38    Yaml,
39}
40
41impl FromStr for Format {
42    type Err = String;
43
44    fn from_str(s: &str) -> Result<Self, Self::Err> {
45        match s.to_lowercase().as_str() {
46            "toml" => Ok(Format::Toml),
47            "yaml" => Ok(Format::Yaml),
48            "json" => Ok(Format::Json),
49            _ => Err(format!("Invalid format: {s}")),
50        }
51    }
52}
53
54impl fmt::Display for Format {
55    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
56        let format = match self {
57            Format::Toml => "toml",
58            Format::Json => "json",
59            Format::Yaml => "yaml",
60        };
61        write!(f, "{format}")
62    }
63}
64
65impl Format {
66    /// Obtain the format from the file path using extension as a hint.
67    pub fn from_path<T: AsRef<Path>>(path: T) -> Result<Self, T> {
68        match path.as_ref().extension().and_then(|ext| ext.to_str()) {
69            Some("toml") => Ok(Format::Toml),
70            Some("yaml") | Some("yml") => Ok(Format::Yaml),
71            Some("json") => Ok(Format::Json),
72            _ => Err(path),
73        }
74    }
75}
76
77/// Parse the string represented in the specified format.
78pub fn deserialize<T>(content: &str, format: Format) -> Result<T, Vec<String>>
79where
80    T: de::DeserializeOwned,
81{
82    match format {
83        Format::Toml => toml::from_str(content).map_err(|e| vec![e.to_string()]),
84        Format::Yaml => serde_yaml::from_str::<serde_yaml::Value>(content)
85            .and_then(|mut v| {
86                v.apply_merge()?;
87                serde_yaml::from_value(v)
88            })
89            .map_err(|e| vec![e.to_string()]),
90        Format::Json => serde_json::from_str(content).map_err(|e| vec![e.to_string()]),
91    }
92}
93
94/// Serialize the specified `value` into a string.
95pub fn serialize<T>(value: &T, format: Format) -> Result<String, String>
96where
97    T: serde::ser::Serialize,
98{
99    match format {
100        Format::Toml => toml::to_string(value).map_err(|e| e.to_string()),
101        Format::Yaml => serde_yaml::to_string(value).map_err(|e| e.to_string()),
102        Format::Json => serde_json::to_string_pretty(value).map_err(|e| e.to_string()),
103    }
104}
105
106#[cfg(test)]
107mod tests {
108    use super::*;
109
110    /// This test ensures the logic to guess file format from the file path
111    /// works correctly.
112    /// Like all other tests, it also demonstrates various cases and how our
113    /// code behaves when it encounters them.
114    #[test]
115    fn test_from_path() {
116        let cases = vec![
117            // Unknown - odd variants.
118            ("", None),
119            (".", None),
120            // Unknown - no ext.
121            ("myfile", None),
122            ("mydir/myfile", None),
123            ("/mydir/myfile", None),
124            // Unknown - some unknown ext.
125            ("myfile.myext", None),
126            ("mydir/myfile.myext", None),
127            ("/mydir/myfile.myext", None),
128            // Unknown - some unknown ext after known ext.
129            ("myfile.toml.myext", None),
130            ("myfile.yaml.myext", None),
131            ("myfile.yml.myext", None),
132            ("myfile.json.myext", None),
133            // Unknown - invalid case.
134            ("myfile.TOML", None),
135            ("myfile.YAML", None),
136            ("myfile.YML", None),
137            ("myfile.JSON", None),
138            // Unknown - nothing but extension.
139            (".toml", None),
140            (".yaml", None),
141            (".yml", None),
142            (".json", None),
143            // TOML
144            ("config.toml", Some(Format::Toml)),
145            ("/config.toml", Some(Format::Toml)),
146            ("/dir/config.toml", Some(Format::Toml)),
147            ("config.qq.toml", Some(Format::Toml)),
148            // YAML
149            ("config.yaml", Some(Format::Yaml)),
150            ("/config.yaml", Some(Format::Yaml)),
151            ("/dir/config.yaml", Some(Format::Yaml)),
152            ("config.qq.yaml", Some(Format::Yaml)),
153            ("config.yml", Some(Format::Yaml)),
154            ("/config.yml", Some(Format::Yaml)),
155            ("/dir/config.yml", Some(Format::Yaml)),
156            ("config.qq.yml", Some(Format::Yaml)),
157            // JSON
158            ("config.json", Some(Format::Json)),
159            ("/config.json", Some(Format::Json)),
160            ("/dir/config.json", Some(Format::Json)),
161            ("config.qq.json", Some(Format::Json)),
162        ];
163
164        for (input, expected) in cases {
165            let output = Format::from_path(std::path::PathBuf::from(input));
166            assert_eq!(expected, output.ok(), "{input}")
167        }
168    }
169
170    // Here we test that the deserializations from various formats match
171    // the TOML format.
172    #[cfg(all(
173        feature = "sources-socket",
174        feature = "transforms-sample",
175        feature = "sinks-socket"
176    ))]
177    #[test]
178    fn test_deserialize_matches_toml() {
179        use crate::config::ConfigBuilder;
180
181        macro_rules! concat_with_newlines {
182            ($($e:expr_2021,)*) => { concat!( $($e, "\n"),+ ) };
183        }
184
185        const SAMPLE_TOML: &str = r#"
186            [enrichment_tables.csv]
187            type = "file"
188            file.path = "/tmp/file.csv"
189            file.encoding.type = "csv"
190            [sources.in]
191            type = "socket"
192            mode = "tcp"
193            address = "127.0.0.1:1235"
194            [sources.in2]
195            type = "socket"
196            mode = "tcp"
197            address = "127.0.0.1:1234"
198            [transforms.sample]
199            type = "sample"
200            inputs = ["in"]
201            rate = 10
202            [sinks.out]
203            type = "socket"
204            mode = "tcp"
205            inputs = ["sample"]
206            encoding.codec = "text"
207            address = "127.0.0.1:9999"
208        "#;
209
210        let cases = vec![
211            // Valid empty inputs should resolve to an empty, default value.
212            ("", Format::Toml, Ok("")),
213            ("{}", Format::Yaml, Ok("")),
214            ("{}", Format::Json, Ok("")),
215            ("", Format::Yaml, Ok("")),
216            // Invalid "empty" inputs should resolve to an error.
217            (
218                "",
219                Format::Json,
220                Err(vec!["EOF while parsing a value at line 1 column 0"]),
221            ),
222            // Sample config.
223            (SAMPLE_TOML, Format::Toml, Ok(SAMPLE_TOML)),
224            (
225                // YAML is sensitive to leading whitespace and linebreaks.
226                concat_with_newlines!(
227                    r#"enrichment_tables:"#,
228                    r#"  csv:"#,
229                    r#"    type: "file""#,
230                    r#"    file:"#,
231                    r#"      path: "/tmp/file.csv""#,
232                    r#"      encoding:"#,
233                    r#"        type: "csv""#,
234                    r#"sources:"#,
235                    r#"  in: &a"#,
236                    r#"    type: "socket""#,
237                    r#"    mode: &b "tcp""#,
238                    r#"    address: "127.0.0.1:1235""#,
239                    r#"  in2:"#,
240                    r#"    <<: *a"#,
241                    r#"    address: "127.0.0.1:1234""#,
242                    r#"transforms:"#,
243                    r#"  sample:"#,
244                    r#"    type: "sample""#,
245                    r#"    inputs: ["in"]"#,
246                    r#"    rate: 10"#,
247                    r#"sinks:"#,
248                    r#"  out:"#,
249                    r#"    type: "socket""#,
250                    r#"    mode: *b"#,
251                    r#"    inputs: ["sample"]"#,
252                    r#"    encoding:"#,
253                    r#"      codec: "text""#,
254                    r#"    address: "127.0.0.1:9999""#,
255                ),
256                Format::Yaml,
257                Ok(SAMPLE_TOML),
258            ),
259            (
260                r#"
261                {
262                    "enrichment_tables": {
263                        "csv": {
264                            "type": "file",
265                            "file": {
266                              "path": "/tmp/file.csv",
267                              "encoding": {
268                                "type": "csv"
269                              }
270                            }
271                        }
272                    },
273                    "sources": {
274                        "in": {
275                            "type": "socket",
276                            "mode": "tcp",
277                            "address": "127.0.0.1:1235"
278                        },
279                        "in2": {
280                            "type": "socket",
281                            "mode": "tcp",
282                            "address": "127.0.0.1:1234"
283                        }
284                    },
285                    "transforms": {
286                        "sample": {
287                            "type": "sample",
288                            "inputs": ["in"],
289                            "rate": 10
290                        }
291                    },
292                    "sinks": {
293                        "out": {
294                            "type": "socket",
295                            "mode": "tcp",
296                            "inputs": ["sample"],
297                            "encoding": {
298                                "codec": "text"
299                            },
300                            "address": "127.0.0.1:9999"
301                        }
302                    }
303                }
304                "#,
305                Format::Json,
306                Ok(SAMPLE_TOML),
307            ),
308        ];
309
310        for (input, format, expected) in cases {
311            // Here we use the same trick as at ConfigBuilder::clone impl to
312            // compare the results.
313
314            let output = deserialize(input, format);
315            match expected {
316                Ok(expected) => {
317                    #[allow(clippy::expect_fun_call)] // false positive
318                    let output: ConfigBuilder = output.expect(&format!(
319                        "expected Ok, got Err with format {format:?} and input {input:?}"
320                    ));
321                    let output_json = serde_json::to_value(output).unwrap();
322                    let expected_output: ConfigBuilder = deserialize(expected, Format::Toml)
323                        .expect("Invalid TOML passed as an expectation");
324                    let expected_json = serde_json::to_value(expected_output).unwrap();
325                    assert_eq!(expected_json, output_json, "{input}")
326                }
327                Err(expected) => assert_eq!(
328                    expected,
329                    output.expect_err(&format!(
330                        "expected Err, got Ok with format {format:?} and input {input:?}"
331                    )),
332                    "{input}"
333                ),
334            }
335        }
336    }
337}