vector/config/
cmd.rs

1use std::path::PathBuf;
2
3use clap::Parser;
4use serde_json::Value;
5
6use super::{ConfigBuilder, load_source_from_paths, loading::ConfigBuilderLoader, process_paths};
7use crate::{cli::handle_config_errors, config};
8
9#[derive(Parser, Debug, Clone)]
10#[command(rename_all = "kebab-case")]
11pub struct Opts {
12    /// Pretty print JSON
13    #[arg(short, long)]
14    pretty: bool,
15
16    /// Include default values where missing from config
17    #[arg(short, long)]
18    include_defaults: bool,
19
20    /// Read configuration from one or more files. Wildcard paths are supported.
21    /// File format is detected from the file name.
22    /// If zero files are specified, the deprecated default config path
23    /// `/etc/vector/vector.yaml` is targeted.
24    #[arg(
25        id = "config",
26        short,
27        long,
28        env = "VECTOR_CONFIG",
29        value_delimiter(',')
30    )]
31    paths: Vec<PathBuf>,
32
33    /// Vector config files in TOML format.
34    #[arg(id = "config-toml", long, value_delimiter(','))]
35    paths_toml: Vec<PathBuf>,
36
37    /// Vector config files in JSON format.
38    #[arg(id = "config-json", long, value_delimiter(','))]
39    paths_json: Vec<PathBuf>,
40
41    /// Vector config files in YAML format.
42    #[arg(id = "config-yaml", long, value_delimiter(','))]
43    paths_yaml: Vec<PathBuf>,
44
45    /// Read configuration from files in one or more directories.
46    /// File format is detected from the file name.
47    ///
48    /// Files not ending in .toml, .json, .yaml, or .yml will be ignored.
49    #[arg(
50        id = "config-dir",
51        short = 'C',
52        long,
53        env = "VECTOR_CONFIG_DIR",
54        value_delimiter(',')
55    )]
56    pub config_dirs: Vec<PathBuf>,
57
58    /// Disable interpolation of environment variables in configuration files.
59    #[arg(
60        long,
61        env = "VECTOR_DISABLE_ENV_VAR_INTERPOLATION",
62        default_value = "false"
63    )]
64    pub disable_env_var_interpolation: bool,
65}
66
67impl Opts {
68    fn paths_with_formats(&self) -> Vec<config::ConfigPath> {
69        config::merge_path_lists(vec![
70            (&self.paths, None),
71            (&self.paths_toml, Some(config::Format::Toml)),
72            (&self.paths_json, Some(config::Format::Json)),
73            (&self.paths_yaml, Some(config::Format::Yaml)),
74        ])
75        .map(|(path, hint)| config::ConfigPath::File(path, hint))
76        .chain(
77            self.config_dirs
78                .iter()
79                .map(|dir| config::ConfigPath::Dir(dir.to_path_buf())),
80        )
81        .collect()
82    }
83}
84
85/// Helper to merge JSON. Handles objects and array concatenation.
86fn merge_json(a: &mut Value, b: Value) {
87    match (a, b) {
88        (Value::Object(a), Value::Object(b)) => {
89            for (k, v) in b {
90                merge_json(a.entry(k).or_insert(Value::Null), v);
91            }
92        }
93        (a, b) => {
94            *a = b;
95        }
96    }
97}
98
99/// Helper to sort array values.
100fn sort_json_array_values(json: &mut Value) {
101    match json {
102        Value::Array(arr) => {
103            for v in arr.iter_mut() {
104                sort_json_array_values(v);
105            }
106
107            // Since `Value` does not have a native ordering, we first convert
108            // to string, sort, and then convert back to `Value`.
109            //
110            // Practically speaking, there should not be config options that mix
111            // many JSON types in a single array. This is mainly to sort fields
112            // like component inputs.
113            let mut a = arr
114                .iter()
115                .map(|v| serde_json::to_string(v).unwrap())
116                .collect::<Vec<_>>();
117            a.sort();
118            *arr = a
119                .iter()
120                .map(|v| serde_json::from_str(v.as_str()).unwrap())
121                .collect::<Vec<_>>();
122        }
123        Value::Object(json) => {
124            for (_, v) in json {
125                sort_json_array_values(v);
126            }
127        }
128        _ => {}
129    }
130}
131
132/// Convert a raw user config to a JSON string
133fn serialize_to_json(
134    source: toml::value::Table,
135    source_builder: &ConfigBuilder,
136    include_defaults: bool,
137    pretty_print: bool,
138) -> serde_json::Result<String> {
139    // Convert table to JSON
140    let mut source_json = serde_json::to_value(source)
141        .expect("should serialize config source to JSON. Please report.");
142
143    // If a user has requested default fields, we'll serialize a `ConfigBuilder`. Otherwise,
144    // we'll serialize the raw user provided config (without interpolated env vars, to preserve
145    // the original source).
146    if include_defaults {
147        // For security, we don't want environment variables to be interpolated in the final
148        // output, but we *do* want defaults. To work around this, we'll serialize `ConfigBuilder`
149        // to JSON, and merge in the raw config which will contain the pre-interpolated strings.
150        let mut builder = serde_json::to_value(source_builder)
151            .expect("should serialize ConfigBuilder to JSON. Please report.");
152
153        merge_json(&mut builder, source_json);
154
155        source_json = builder
156    }
157
158    sort_json_array_values(&mut source_json);
159
160    // Get a JSON string. This will either be pretty printed or (default) minified.
161    if pretty_print {
162        serde_json::to_string_pretty(&source_json)
163    } else {
164        serde_json::to_string(&source_json)
165    }
166}
167
168/// Function used by the `vector config` subcommand for outputting a normalized configuration.
169/// The purpose of this func is to combine user configuration after processing all paths,
170/// Pipelines expansions, etc. The JSON result of this serialization can itself be used as a config,
171/// which also makes it useful for version control or treating as a singular unit of configuration.
172pub fn cmd(opts: &Opts) -> exitcode::ExitCode {
173    let paths = opts.paths_with_formats();
174    // Start by serializing to a `ConfigBuilder`. This will leverage validation in config
175    // builder fields which we'll use to error out if required.
176    let (paths, builder) = match process_paths(&paths) {
177        Some(paths) => {
178            match ConfigBuilderLoader::default()
179                .interpolate_env(!opts.disable_env_var_interpolation)
180                .load_from_paths(&paths)
181            {
182                Ok(builder) => (paths, builder),
183                Err(errs) => return handle_config_errors(errs),
184            }
185        }
186        None => return exitcode::CONFIG,
187    };
188
189    // Load source TOML.
190    let source = match load_source_from_paths(&paths) {
191        Ok(map) => map,
192        Err(errs) => return handle_config_errors(errs),
193    };
194
195    let json = serialize_to_json(source, &builder, opts.include_defaults, opts.pretty);
196
197    #[allow(clippy::print_stdout)]
198    {
199        println!("{}", json.expect("config should be serializable"));
200    }
201
202    exitcode::OK
203}
204
205#[cfg(all(test, feature = "sources", feature = "transforms", feature = "sinks"))]
206mod tests {
207    use std::collections::HashMap;
208
209    use proptest::{num, prelude::*, sample};
210    use rand::{
211        SeedableRng,
212        prelude::{SliceRandom, StdRng},
213    };
214    use serde_json::json;
215    use similar_asserts::assert_eq;
216    use vector_lib::configurable::component::{
217        SinkDescription, SourceDescription, TransformDescription,
218    };
219
220    use super::merge_json;
221    use crate::{
222        config::{ConfigBuilder, Format, cmd::serialize_to_json, vars},
223        generate,
224        generate::{TransformInputsStrategy, generate_example},
225    };
226
227    #[test]
228    fn test_array_override() {
229        let mut json = json!({
230            "arr": [
231                "value1", "value2"
232            ]
233        });
234
235        let to_override = json!({
236            "arr": [
237                "value3", "value4"
238            ]
239        });
240
241        merge_json(&mut json, to_override);
242
243        assert_eq!(*json.get("arr").unwrap(), json!(["value3", "value4"]))
244    }
245
246    #[test]
247    fn include_defaults_does_not_include_env_vars() {
248        let env_var = "VECTOR_CONFIG_INCLUDE_DEFAULTS_TEST";
249        let env_var_in_arr = "VECTOR_CONFIG_INCLUDE_DEFAULTS_TEST_IN_ARR";
250
251        let config_source = format!(
252            r#"
253            [sources.in]
254            type = "demo_logs"
255            format = "${{{env_var}}}"
256
257            [sinks.out]
258            type = "blackhole"
259            inputs = ["${{{env_var_in_arr}}}"]
260        "#
261        );
262        let interpolated_config_source = vars::interpolate(
263            config_source.as_ref(),
264            &HashMap::from([
265                (env_var.to_string(), "syslog".to_string()),
266                (env_var_in_arr.to_string(), "in".to_string()),
267            ]),
268        )
269        .unwrap();
270
271        let json: serde_json::Value = serde_json::from_str(
272            serialize_to_json(
273                toml::from_str(config_source.as_ref()).unwrap(),
274                &ConfigBuilder::from_toml(interpolated_config_source.as_ref()),
275                true,
276                false,
277            )
278            .unwrap()
279            .as_ref(),
280        )
281        .unwrap();
282
283        assert_eq!(
284            json["sources"]["in"]["format"],
285            json!(format!("${{{}}}", env_var))
286        );
287        assert_eq!(
288            json["sinks"]["out"]["inputs"],
289            json!(vec![format!("${{{}}}", env_var_in_arr)])
290        );
291    }
292
293    /// Select any 2-4 sources
294    fn arb_sources() -> impl Strategy<Value = Vec<&'static str>> {
295        let mut types = SourceDescription::types();
296        // The `file_descriptor` source produces different defaults each time it is used, and so
297        // will never compare equal below.
298        types.retain(|t| *t != "file_descriptor");
299        sample::subsequence(types, 2..=4)
300    }
301
302    /// Select any 2-4 transforms
303    fn arb_transforms() -> impl Strategy<Value = Vec<&'static str>> {
304        sample::subsequence(TransformDescription::types(), 2..=4)
305    }
306
307    /// Select any 2-4 sinks
308    fn arb_sinks() -> impl Strategy<Value = Vec<&'static str>> {
309        sample::subsequence(SinkDescription::types(), 2..=4)
310    }
311
312    fn create_config_source(sources: &[&str], transforms: &[&str], sinks: &[&str]) -> String {
313        // This creates a string in the syntax expected by the `vector generate`
314        // command whose internal mechanics we are using to create valid Vector
315        // configurations.
316        //
317        // Importantly, we have to name the components (in this case, simply by
318        // their type as each type of component is guaranteed to only appear
319        // once), because (in some tests) we'd like to shuffle the configuration
320        // later in a way that does not change its actual semantics. Otherwise,
321        // an autogenerated ID like `source0` could correspond to different
322        // sources depending on the ordering of the `vector generate` input.
323        //
324        // We also append a fixed `remap` transform to the transforms list. This
325        // ensures sink inputs are consistent since `generate` uses the last
326        // transform the input for each sink.
327        let generate_config_str = format!(
328            "{}/{}/{}",
329            sources
330                .iter()
331                .map(|source| format!("{source}:{source}"))
332                .collect::<Vec<_>>()
333                .join(","),
334            transforms
335                .iter()
336                .map(|transform| format!("{transform}:{transform}"))
337                .chain(vec!["manually-added-remap:remap".to_string()])
338                .collect::<Vec<_>>()
339                .join(","),
340            sinks
341                .iter()
342                .map(|sink| format!("{sink}:{sink}"))
343                .collect::<Vec<_>>()
344                .join(","),
345        );
346        let opts = generate::Opts {
347            fragment: true,
348            expression: generate_config_str.to_string(),
349            file: None,
350            format: Format::Toml,
351        };
352        generate_example(&opts, TransformInputsStrategy::All).expect("invalid config generated")
353    }
354
355    proptest! {
356        #[test]
357        /// Output should be the same regardless of input config ordering
358        fn output_has_consistent_ordering(mut sources in arb_sources(), mut transforms in arb_transforms(), mut sinks in arb_sinks(), seed in num::u64::ANY) {
359            let config_source = create_config_source(sources.as_ref(), transforms.as_ref(), sinks.as_ref());
360
361            // Shuffle the ordering of components which shuffles the order in
362            // which items appear in the TOML config
363            let mut rng = StdRng::seed_from_u64(seed);
364            sources.shuffle(&mut rng);
365            transforms.shuffle(&mut rng);
366            sinks.shuffle(&mut rng);
367            let shuffled_config_source = create_config_source(sources.as_ref(), transforms.as_ref(), sinks.as_ref());
368
369            let json = serialize_to_json(
370                toml::from_str(config_source.as_ref()).unwrap(),
371                &ConfigBuilder::from_toml(config_source.as_ref()),
372                false,
373                false
374            )
375            .unwrap();
376            let shuffled_json = serialize_to_json(
377                toml::from_str(shuffled_config_source.as_ref()).unwrap(),
378                &ConfigBuilder::from_toml(shuffled_config_source.as_ref()),
379                false,
380                false
381            )
382            .unwrap();
383
384            assert_eq!(json, shuffled_json);
385        }
386    }
387
388    proptest! {
389        #[test]
390        /// Output is a valid configuration
391        fn output_is_a_valid_config(sources in arb_sources(), transforms in arb_transforms(), sinks in arb_sinks()) {
392            let config_source = create_config_source(sources.as_ref(), transforms.as_ref(), sinks.as_ref());
393            let json = serialize_to_json(
394                toml::from_str(config_source.as_ref()).unwrap(),
395                &ConfigBuilder::from_toml(config_source.as_ref()),
396                false,
397                false
398            )
399            .unwrap();
400            assert!(serde_json::from_str::<ConfigBuilder>(json.as_ref()).is_ok());
401        }
402    }
403}