vector/config/
cmd.rs

1use std::path::PathBuf;
2
3use clap::Parser;
4use serde_json::Value;
5
6use super::{ConfigBuilder, load_builder_from_paths, load_source_from_paths, process_paths};
7use crate::{cli::handle_config_errors, config};
8
9#[derive(Parser, Debug, Clone)]
10#[command(rename_all = "kebab-case")]
11pub struct Opts {
12    /// Pretty print JSON
13    #[arg(short, long)]
14    pretty: bool,
15
16    /// Include default values where missing from config
17    #[arg(short, long)]
18    include_defaults: bool,
19
20    /// Read configuration from one or more files. Wildcard paths are supported.
21    /// File format is detected from the file name.
22    /// If zero files are specified, the deprecated default config path
23    /// `/etc/vector/vector.yaml` is targeted.
24    #[arg(
25        id = "config",
26        short,
27        long,
28        env = "VECTOR_CONFIG",
29        value_delimiter(',')
30    )]
31    paths: Vec<PathBuf>,
32
33    /// Vector config files in TOML format.
34    #[arg(id = "config-toml", long, value_delimiter(','))]
35    paths_toml: Vec<PathBuf>,
36
37    /// Vector config files in JSON format.
38    #[arg(id = "config-json", long, value_delimiter(','))]
39    paths_json: Vec<PathBuf>,
40
41    /// Vector config files in YAML format.
42    #[arg(id = "config-yaml", long, value_delimiter(','))]
43    paths_yaml: Vec<PathBuf>,
44
45    /// Read configuration from files in one or more directories.
46    /// File format is detected from the file name.
47    ///
48    /// Files not ending in .toml, .json, .yaml, or .yml will be ignored.
49    #[arg(
50        id = "config-dir",
51        short = 'C',
52        long,
53        env = "VECTOR_CONFIG_DIR",
54        value_delimiter(',')
55    )]
56    pub config_dirs: Vec<PathBuf>,
57}
58
59impl Opts {
60    fn paths_with_formats(&self) -> Vec<config::ConfigPath> {
61        config::merge_path_lists(vec![
62            (&self.paths, None),
63            (&self.paths_toml, Some(config::Format::Toml)),
64            (&self.paths_json, Some(config::Format::Json)),
65            (&self.paths_yaml, Some(config::Format::Yaml)),
66        ])
67        .map(|(path, hint)| config::ConfigPath::File(path, hint))
68        .chain(
69            self.config_dirs
70                .iter()
71                .map(|dir| config::ConfigPath::Dir(dir.to_path_buf())),
72        )
73        .collect()
74    }
75}
76
77/// Helper to merge JSON. Handles objects and array concatenation.
78fn merge_json(a: &mut Value, b: Value) {
79    match (a, b) {
80        (Value::Object(a), Value::Object(b)) => {
81            for (k, v) in b {
82                merge_json(a.entry(k).or_insert(Value::Null), v);
83            }
84        }
85        (a, b) => {
86            *a = b;
87        }
88    }
89}
90
91/// Helper to sort array values.
92fn sort_json_array_values(json: &mut Value) {
93    match json {
94        Value::Array(arr) => {
95            for v in arr.iter_mut() {
96                sort_json_array_values(v);
97            }
98
99            // Since `Value` does not have a native ordering, we first convert
100            // to string, sort, and then convert back to `Value`.
101            //
102            // Practically speaking, there should not be config options that mix
103            // many JSON types in a single array. This is mainly to sort fields
104            // like component inputs.
105            let mut a = arr
106                .iter()
107                .map(|v| serde_json::to_string(v).unwrap())
108                .collect::<Vec<_>>();
109            a.sort();
110            *arr = a
111                .iter()
112                .map(|v| serde_json::from_str(v.as_str()).unwrap())
113                .collect::<Vec<_>>();
114        }
115        Value::Object(json) => {
116            for (_, v) in json {
117                sort_json_array_values(v);
118            }
119        }
120        _ => {}
121    }
122}
123
124/// Convert a raw user config to a JSON string
125fn serialize_to_json(
126    source: toml::value::Table,
127    source_builder: &ConfigBuilder,
128    include_defaults: bool,
129    pretty_print: bool,
130) -> serde_json::Result<String> {
131    // Convert table to JSON
132    let mut source_json = serde_json::to_value(source)
133        .expect("should serialize config source to JSON. Please report.");
134
135    // If a user has requested default fields, we'll serialize a `ConfigBuilder`. Otherwise,
136    // we'll serialize the raw user provided config (without interpolated env vars, to preserve
137    // the original source).
138    if include_defaults {
139        // For security, we don't want environment variables to be interpolated in the final
140        // output, but we *do* want defaults. To work around this, we'll serialize `ConfigBuilder`
141        // to JSON, and merge in the raw config which will contain the pre-interpolated strings.
142        let mut builder = serde_json::to_value(source_builder)
143            .expect("should serialize ConfigBuilder to JSON. Please report.");
144
145        merge_json(&mut builder, source_json);
146
147        source_json = builder
148    }
149
150    sort_json_array_values(&mut source_json);
151
152    // Get a JSON string. This will either be pretty printed or (default) minified.
153    if pretty_print {
154        serde_json::to_string_pretty(&source_json)
155    } else {
156        serde_json::to_string(&source_json)
157    }
158}
159
160/// Function used by the `vector config` subcommand for outputting a normalized configuration.
161/// The purpose of this func is to combine user configuration after processing all paths,
162/// Pipelines expansions, etc. The JSON result of this serialization can itself be used as a config,
163/// which also makes it useful for version control or treating as a singular unit of configuration.
164pub fn cmd(opts: &Opts) -> exitcode::ExitCode {
165    let paths = opts.paths_with_formats();
166    // Start by serializing to a `ConfigBuilder`. This will leverage validation in config
167    // builder fields which we'll use to error out if required.
168    let (paths, builder) = match process_paths(&paths) {
169        Some(paths) => match load_builder_from_paths(&paths) {
170            Ok(builder) => (paths, builder),
171            Err(errs) => return handle_config_errors(errs),
172        },
173        None => return exitcode::CONFIG,
174    };
175
176    // Load source TOML.
177    let source = match load_source_from_paths(&paths) {
178        Ok(map) => map,
179        Err(errs) => return handle_config_errors(errs),
180    };
181
182    let json = serialize_to_json(source, &builder, opts.include_defaults, opts.pretty);
183
184    #[allow(clippy::print_stdout)]
185    {
186        println!("{}", json.expect("config should be serializable"));
187    }
188
189    exitcode::OK
190}
191
192#[cfg(all(test, feature = "sources", feature = "transforms", feature = "sinks"))]
193mod tests {
194    use std::collections::HashMap;
195
196    use proptest::{num, prelude::*, sample};
197    use rand::{
198        SeedableRng,
199        prelude::{SliceRandom, StdRng},
200    };
201    use serde_json::json;
202    use similar_asserts::assert_eq;
203    use vector_lib::configurable::component::{
204        SinkDescription, SourceDescription, TransformDescription,
205    };
206
207    use super::merge_json;
208    use crate::{
209        config::{ConfigBuilder, Format, cmd::serialize_to_json, vars},
210        generate,
211        generate::{TransformInputsStrategy, generate_example},
212    };
213
214    #[test]
215    fn test_array_override() {
216        let mut json = json!({
217            "arr": [
218                "value1", "value2"
219            ]
220        });
221
222        let to_override = json!({
223            "arr": [
224                "value3", "value4"
225            ]
226        });
227
228        merge_json(&mut json, to_override);
229
230        assert_eq!(*json.get("arr").unwrap(), json!(["value3", "value4"]))
231    }
232
233    #[test]
234    fn include_defaults_does_not_include_env_vars() {
235        let env_var = "VECTOR_CONFIG_INCLUDE_DEFAULTS_TEST";
236        let env_var_in_arr = "VECTOR_CONFIG_INCLUDE_DEFAULTS_TEST_IN_ARR";
237
238        let config_source = format!(
239            r#"
240            [sources.in]
241            type = "demo_logs"
242            format = "${{{env_var}}}"
243
244            [sinks.out]
245            type = "blackhole"
246            inputs = ["${{{env_var_in_arr}}}"]
247        "#
248        );
249        let interpolated_config_source = vars::interpolate(
250            config_source.as_ref(),
251            &HashMap::from([
252                (env_var.to_string(), "syslog".to_string()),
253                (env_var_in_arr.to_string(), "in".to_string()),
254            ]),
255        )
256        .unwrap();
257
258        let json: serde_json::Value = serde_json::from_str(
259            serialize_to_json(
260                toml::from_str(config_source.as_ref()).unwrap(),
261                &ConfigBuilder::from_toml(interpolated_config_source.as_ref()),
262                true,
263                false,
264            )
265            .unwrap()
266            .as_ref(),
267        )
268        .unwrap();
269
270        assert_eq!(
271            json["sources"]["in"]["format"],
272            json!(format!("${{{}}}", env_var))
273        );
274        assert_eq!(
275            json["sinks"]["out"]["inputs"],
276            json!(vec![format!("${{{}}}", env_var_in_arr)])
277        );
278    }
279
280    /// Select any 2-4 sources
281    fn arb_sources() -> impl Strategy<Value = Vec<&'static str>> {
282        let mut types = SourceDescription::types();
283        // The `file_descriptor` source produces different defaults each time it is used, and so
284        // will never compare equal below.
285        types.retain(|t| *t != "file_descriptor");
286        sample::subsequence(types, 2..=4)
287    }
288
289    /// Select any 2-4 transforms
290    fn arb_transforms() -> impl Strategy<Value = Vec<&'static str>> {
291        sample::subsequence(TransformDescription::types(), 2..=4)
292    }
293
294    /// Select any 2-4 sinks
295    fn arb_sinks() -> impl Strategy<Value = Vec<&'static str>> {
296        sample::subsequence(SinkDescription::types(), 2..=4)
297    }
298
299    fn create_config_source(sources: &[&str], transforms: &[&str], sinks: &[&str]) -> String {
300        // This creates a string in the syntax expected by the `vector generate`
301        // command whose internal mechanics we are using to create valid Vector
302        // configurations.
303        //
304        // Importantly, we have to name the components (in this case, simply by
305        // their type as each type of component is guaranteed to only appear
306        // once), because (in some tests) we'd like to shuffle the configuration
307        // later in a way that does not change its actual semantics. Otherwise,
308        // an autogenerated ID like `source0` could correspond to different
309        // sources depending on the ordering of the `vector generate` input.
310        //
311        // We also append a fixed `remap` transform to the transforms list. This
312        // ensures sink inputs are consistent since `generate` uses the last
313        // transform the input for each sink.
314        let generate_config_str = format!(
315            "{}/{}/{}",
316            sources
317                .iter()
318                .map(|source| format!("{source}:{source}"))
319                .collect::<Vec<_>>()
320                .join(","),
321            transforms
322                .iter()
323                .map(|transform| format!("{transform}:{transform}"))
324                .chain(vec!["manually-added-remap:remap".to_string()])
325                .collect::<Vec<_>>()
326                .join(","),
327            sinks
328                .iter()
329                .map(|sink| format!("{sink}:{sink}"))
330                .collect::<Vec<_>>()
331                .join(","),
332        );
333        let opts = generate::Opts {
334            fragment: true,
335            expression: generate_config_str.to_string(),
336            file: None,
337            format: Format::Toml,
338        };
339        generate_example(&opts, TransformInputsStrategy::All).expect("invalid config generated")
340    }
341
342    proptest! {
343        #[test]
344        /// Output should be the same regardless of input config ordering
345        fn output_has_consistent_ordering(mut sources in arb_sources(), mut transforms in arb_transforms(), mut sinks in arb_sinks(), seed in num::u64::ANY) {
346            let config_source = create_config_source(sources.as_ref(), transforms.as_ref(), sinks.as_ref());
347
348            // Shuffle the ordering of components which shuffles the order in
349            // which items appear in the TOML config
350            let mut rng = StdRng::seed_from_u64(seed);
351            sources.shuffle(&mut rng);
352            transforms.shuffle(&mut rng);
353            sinks.shuffle(&mut rng);
354            let shuffled_config_source = create_config_source(sources.as_ref(), transforms.as_ref(), sinks.as_ref());
355
356            let json = serialize_to_json(
357                toml::from_str(config_source.as_ref()).unwrap(),
358                &ConfigBuilder::from_toml(config_source.as_ref()),
359                false,
360                false
361            )
362            .unwrap();
363            let shuffled_json = serialize_to_json(
364                toml::from_str(shuffled_config_source.as_ref()).unwrap(),
365                &ConfigBuilder::from_toml(shuffled_config_source.as_ref()),
366                false,
367                false
368            )
369            .unwrap();
370
371            assert_eq!(json, shuffled_json);
372        }
373    }
374
375    proptest! {
376        #[test]
377        /// Output is a valid configuration
378        fn output_is_a_valid_config(sources in arb_sources(), transforms in arb_transforms(), sinks in arb_sinks()) {
379            let config_source = create_config_source(sources.as_ref(), transforms.as_ref(), sinks.as_ref());
380            let json = serialize_to_json(
381                toml::from_str(config_source.as_ref()).unwrap(),
382                &ConfigBuilder::from_toml(config_source.as_ref()),
383                false,
384                false
385            )
386            .unwrap();
387            assert!(serde_json::from_str::<ConfigBuilder>(json.as_ref()).is_ok());
388        }
389    }
390}