vector/config/
format.rs

1//! Support for loading configs from multiple formats.
2
3#![deny(missing_docs, missing_debug_implementations)]
4
5use std::{fmt, path::Path, str::FromStr};
6
7use serde::{Deserialize, Serialize, de};
8use vector_config_macros::Configurable;
9
10/// A type alias to better capture the semantics.
11pub type FormatHint = Option<Format>;
12
13/// The format used to represent the configuration data.
14#[derive(
15    Debug,
16    Default,
17    Copy,
18    Clone,
19    Eq,
20    PartialEq,
21    Ord,
22    PartialOrd,
23    Hash,
24    Serialize,
25    Deserialize,
26    Configurable,
27)]
28#[serde(rename_all = "snake_case")]
29pub enum Format {
30    /// TOML format is used.
31    #[default]
32    Toml,
33    /// JSON format is used.
34    Json,
35    /// YAML format is used.
36    Yaml,
37}
38
39impl FromStr for Format {
40    type Err = String;
41
42    fn from_str(s: &str) -> Result<Self, Self::Err> {
43        match s.to_lowercase().as_str() {
44            "toml" => Ok(Format::Toml),
45            "yaml" => Ok(Format::Yaml),
46            "json" => Ok(Format::Json),
47            _ => Err(format!("Invalid format: {s}")),
48        }
49    }
50}
51
52impl fmt::Display for Format {
53    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
54        let format = match self {
55            Format::Toml => "toml",
56            Format::Json => "json",
57            Format::Yaml => "yaml",
58        };
59        write!(f, "{format}")
60    }
61}
62
63impl Format {
64    /// Obtain the format from the file path using extension as a hint.
65    pub fn from_path<T: AsRef<Path>>(path: T) -> Result<Self, T> {
66        match path.as_ref().extension().and_then(|ext| ext.to_str()) {
67            Some("toml") => Ok(Format::Toml),
68            Some("yaml") | Some("yml") => Ok(Format::Yaml),
69            Some("json") => Ok(Format::Json),
70            _ => Err(path),
71        }
72    }
73}
74
75/// Parse the string represented in the specified format.
76pub fn deserialize<T>(content: &str, format: Format) -> Result<T, Vec<String>>
77where
78    T: de::DeserializeOwned,
79{
80    match format {
81        Format::Toml => toml::from_str(content).map_err(|e| vec![e.to_string()]),
82        Format::Yaml => serde_yaml::from_str::<serde_yaml::Value>(content)
83            .and_then(|mut v| {
84                v.apply_merge()?;
85                serde_yaml::from_value(v)
86            })
87            .map_err(|e| vec![e.to_string()]),
88        Format::Json => serde_json::from_str(content).map_err(|e| vec![e.to_string()]),
89    }
90}
91
92/// Serialize the specified `value` into a string.
93pub fn serialize<T>(value: &T, format: Format) -> Result<String, String>
94where
95    T: serde::ser::Serialize,
96{
97    match format {
98        Format::Toml => toml::to_string(value).map_err(|e| e.to_string()),
99        Format::Yaml => serde_yaml::to_string(value).map_err(|e| e.to_string()),
100        Format::Json => serde_json::to_string_pretty(value).map_err(|e| e.to_string()),
101    }
102}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107
108    /// This test ensures the logic to guess file format from the file path
109    /// works correctly.
110    /// Like all other tests, it also demonstrates various cases and how our
111    /// code behaves when it encounters them.
112    #[test]
113    fn test_from_path() {
114        let cases = vec![
115            // Unknown - odd variants.
116            ("", None),
117            (".", None),
118            // Unknown - no ext.
119            ("myfile", None),
120            ("mydir/myfile", None),
121            ("/mydir/myfile", None),
122            // Unknown - some unknown ext.
123            ("myfile.myext", None),
124            ("mydir/myfile.myext", None),
125            ("/mydir/myfile.myext", None),
126            // Unknown - some unknown ext after known ext.
127            ("myfile.toml.myext", None),
128            ("myfile.yaml.myext", None),
129            ("myfile.yml.myext", None),
130            ("myfile.json.myext", None),
131            // Unknown - invalid case.
132            ("myfile.TOML", None),
133            ("myfile.YAML", None),
134            ("myfile.YML", None),
135            ("myfile.JSON", None),
136            // Unknown - nothing but extension.
137            (".toml", None),
138            (".yaml", None),
139            (".yml", None),
140            (".json", None),
141            // TOML
142            ("config.toml", Some(Format::Toml)),
143            ("/config.toml", Some(Format::Toml)),
144            ("/dir/config.toml", Some(Format::Toml)),
145            ("config.qq.toml", Some(Format::Toml)),
146            // YAML
147            ("config.yaml", Some(Format::Yaml)),
148            ("/config.yaml", Some(Format::Yaml)),
149            ("/dir/config.yaml", Some(Format::Yaml)),
150            ("config.qq.yaml", Some(Format::Yaml)),
151            ("config.yml", Some(Format::Yaml)),
152            ("/config.yml", Some(Format::Yaml)),
153            ("/dir/config.yml", Some(Format::Yaml)),
154            ("config.qq.yml", Some(Format::Yaml)),
155            // JSON
156            ("config.json", Some(Format::Json)),
157            ("/config.json", Some(Format::Json)),
158            ("/dir/config.json", Some(Format::Json)),
159            ("config.qq.json", Some(Format::Json)),
160        ];
161
162        for (input, expected) in cases {
163            let output = Format::from_path(std::path::PathBuf::from(input));
164            assert_eq!(expected, output.ok(), "{input}")
165        }
166    }
167
168    // Here we test that the deserializations from various formats match
169    // the TOML format.
170    #[cfg(all(
171        feature = "sources-socket",
172        feature = "transforms-sample",
173        feature = "sinks-socket"
174    ))]
175    #[test]
176    fn test_deserialize_matches_toml() {
177        use crate::config::ConfigBuilder;
178
179        macro_rules! concat_with_newlines {
180            ($($e:expr_2021,)*) => { concat!( $($e, "\n"),+ ) };
181        }
182
183        const SAMPLE_TOML: &str = r#"
184            [enrichment_tables.csv]
185            type = "file"
186            file.path = "/tmp/file.csv"
187            file.encoding.type = "csv"
188            [sources.in]
189            type = "socket"
190            mode = "tcp"
191            address = "127.0.0.1:1235"
192            [sources.in2]
193            type = "socket"
194            mode = "tcp"
195            address = "127.0.0.1:1234"
196            [transforms.sample]
197            type = "sample"
198            inputs = ["in"]
199            rate = 10
200            [sinks.out]
201            type = "socket"
202            mode = "tcp"
203            inputs = ["sample"]
204            encoding.codec = "text"
205            address = "127.0.0.1:9999"
206        "#;
207
208        let cases = vec![
209            // Valid empty inputs should resolve to an empty, default value.
210            ("", Format::Toml, Ok("")),
211            ("{}", Format::Yaml, Ok("")),
212            ("{}", Format::Json, Ok("")),
213            ("", Format::Yaml, Ok("")),
214            // Invalid "empty" inputs should resolve to an error.
215            (
216                "",
217                Format::Json,
218                Err(vec!["EOF while parsing a value at line 1 column 0"]),
219            ),
220            // Sample config.
221            (SAMPLE_TOML, Format::Toml, Ok(SAMPLE_TOML)),
222            (
223                // YAML is sensitive to leading whitespace and linebreaks.
224                concat_with_newlines!(
225                    r#"enrichment_tables:"#,
226                    r#"  csv:"#,
227                    r#"    type: "file""#,
228                    r#"    file:"#,
229                    r#"      path: "/tmp/file.csv""#,
230                    r#"      encoding:"#,
231                    r#"        type: "csv""#,
232                    r#"sources:"#,
233                    r#"  in: &a"#,
234                    r#"    type: "socket""#,
235                    r#"    mode: &b "tcp""#,
236                    r#"    address: "127.0.0.1:1235""#,
237                    r#"  in2:"#,
238                    r#"    <<: *a"#,
239                    r#"    address: "127.0.0.1:1234""#,
240                    r#"transforms:"#,
241                    r#"  sample:"#,
242                    r#"    type: "sample""#,
243                    r#"    inputs: ["in"]"#,
244                    r#"    rate: 10"#,
245                    r#"sinks:"#,
246                    r#"  out:"#,
247                    r#"    type: "socket""#,
248                    r#"    mode: *b"#,
249                    r#"    inputs: ["sample"]"#,
250                    r#"    encoding:"#,
251                    r#"      codec: "text""#,
252                    r#"    address: "127.0.0.1:9999""#,
253                ),
254                Format::Yaml,
255                Ok(SAMPLE_TOML),
256            ),
257            (
258                r#"
259                {
260                    "enrichment_tables": {
261                        "csv": {
262                            "type": "file",
263                            "file": {
264                              "path": "/tmp/file.csv",
265                              "encoding": {
266                                "type": "csv"
267                              }
268                            }
269                        }
270                    },
271                    "sources": {
272                        "in": {
273                            "type": "socket",
274                            "mode": "tcp",
275                            "address": "127.0.0.1:1235"
276                        },
277                        "in2": {
278                            "type": "socket",
279                            "mode": "tcp",
280                            "address": "127.0.0.1:1234"
281                        }
282                    },
283                    "transforms": {
284                        "sample": {
285                            "type": "sample",
286                            "inputs": ["in"],
287                            "rate": 10
288                        }
289                    },
290                    "sinks": {
291                        "out": {
292                            "type": "socket",
293                            "mode": "tcp",
294                            "inputs": ["sample"],
295                            "encoding": {
296                                "codec": "text"
297                            },
298                            "address": "127.0.0.1:9999"
299                        }
300                    }
301                }
302                "#,
303                Format::Json,
304                Ok(SAMPLE_TOML),
305            ),
306        ];
307
308        for (input, format, expected) in cases {
309            // Here we use the same trick as at ConfigBuilder::clone impl to
310            // compare the results.
311
312            let output = deserialize(input, format);
313            match expected {
314                Ok(expected) => {
315                    #[allow(clippy::expect_fun_call)] // false positive
316                    let output: ConfigBuilder = output.expect(&format!(
317                        "expected Ok, got Err with format {format:?} and input {input:?}"
318                    ));
319                    let output_json = serde_json::to_value(output).unwrap();
320                    let expected_output: ConfigBuilder = deserialize(expected, Format::Toml)
321                        .expect("Invalid TOML passed as an expectation");
322                    let expected_json = serde_json::to_value(expected_output).unwrap();
323                    assert_eq!(expected_json, output_json, "{input}")
324                }
325                Err(expected) => assert_eq!(
326                    expected,
327                    output.expect_err(&format!(
328                        "expected Err, got Ok with format {format:?} and input {input:?}"
329                    )),
330                    "{input}"
331                ),
332            }
333        }
334    }
335}