vector/config/
cmd.rs

1use std::path::PathBuf;
2
3use clap::Parser;
4use serde_json::Value;
5
6use super::{load_builder_from_paths, load_source_from_paths, process_paths, ConfigBuilder};
7use crate::cli::handle_config_errors;
8use crate::config;
9
10#[derive(Parser, Debug, Clone)]
11#[command(rename_all = "kebab-case")]
12pub struct Opts {
13    /// Pretty print JSON
14    #[arg(short, long)]
15    pretty: bool,
16
17    /// Include default values where missing from config
18    #[arg(short, long)]
19    include_defaults: bool,
20
21    /// Read configuration from one or more files. Wildcard paths are supported.
22    /// File format is detected from the file name.
23    /// If zero files are specified, the deprecated default config path
24    /// `/etc/vector/vector.yaml` is targeted.
25    #[arg(
26        id = "config",
27        short,
28        long,
29        env = "VECTOR_CONFIG",
30        value_delimiter(',')
31    )]
32    paths: Vec<PathBuf>,
33
34    /// Vector config files in TOML format.
35    #[arg(id = "config-toml", long, value_delimiter(','))]
36    paths_toml: Vec<PathBuf>,
37
38    /// Vector config files in JSON format.
39    #[arg(id = "config-json", long, value_delimiter(','))]
40    paths_json: Vec<PathBuf>,
41
42    /// Vector config files in YAML format.
43    #[arg(id = "config-yaml", long, value_delimiter(','))]
44    paths_yaml: Vec<PathBuf>,
45
46    /// Read configuration from files in one or more directories.
47    /// File format is detected from the file name.
48    ///
49    /// Files not ending in .toml, .json, .yaml, or .yml will be ignored.
50    #[arg(
51        id = "config-dir",
52        short = 'C',
53        long,
54        env = "VECTOR_CONFIG_DIR",
55        value_delimiter(',')
56    )]
57    pub config_dirs: Vec<PathBuf>,
58}
59
60impl Opts {
61    fn paths_with_formats(&self) -> Vec<config::ConfigPath> {
62        config::merge_path_lists(vec![
63            (&self.paths, None),
64            (&self.paths_toml, Some(config::Format::Toml)),
65            (&self.paths_json, Some(config::Format::Json)),
66            (&self.paths_yaml, Some(config::Format::Yaml)),
67        ])
68        .map(|(path, hint)| config::ConfigPath::File(path, hint))
69        .chain(
70            self.config_dirs
71                .iter()
72                .map(|dir| config::ConfigPath::Dir(dir.to_path_buf())),
73        )
74        .collect()
75    }
76}
77
78/// Helper to merge JSON. Handles objects and array concatenation.
79fn merge_json(a: &mut Value, b: Value) {
80    match (a, b) {
81        (Value::Object(a), Value::Object(b)) => {
82            for (k, v) in b {
83                merge_json(a.entry(k).or_insert(Value::Null), v);
84            }
85        }
86        (a, b) => {
87            *a = b;
88        }
89    }
90}
91
92/// Helper to sort array values.
93fn sort_json_array_values(json: &mut Value) {
94    match json {
95        Value::Array(arr) => {
96            for v in arr.iter_mut() {
97                sort_json_array_values(v);
98            }
99
100            // Since `Value` does not have a native ordering, we first convert
101            // to string, sort, and then convert back to `Value`.
102            //
103            // Practically speaking, there should not be config options that mix
104            // many JSON types in a single array. This is mainly to sort fields
105            // like component inputs.
106            let mut a = arr
107                .iter()
108                .map(|v| serde_json::to_string(v).unwrap())
109                .collect::<Vec<_>>();
110            a.sort();
111            *arr = a
112                .iter()
113                .map(|v| serde_json::from_str(v.as_str()).unwrap())
114                .collect::<Vec<_>>();
115        }
116        Value::Object(json) => {
117            for (_, v) in json {
118                sort_json_array_values(v);
119            }
120        }
121        _ => {}
122    }
123}
124
125/// Convert a raw user config to a JSON string
126fn serialize_to_json(
127    source: toml::value::Table,
128    source_builder: &ConfigBuilder,
129    include_defaults: bool,
130    pretty_print: bool,
131) -> serde_json::Result<String> {
132    // Convert table to JSON
133    let mut source_json = serde_json::to_value(source)
134        .expect("should serialize config source to JSON. Please report.");
135
136    // If a user has requested default fields, we'll serialize a `ConfigBuilder`. Otherwise,
137    // we'll serialize the raw user provided config (without interpolated env vars, to preserve
138    // the original source).
139    if include_defaults {
140        // For security, we don't want environment variables to be interpolated in the final
141        // output, but we *do* want defaults. To work around this, we'll serialize `ConfigBuilder`
142        // to JSON, and merge in the raw config which will contain the pre-interpolated strings.
143        let mut builder = serde_json::to_value(source_builder)
144            .expect("should serialize ConfigBuilder to JSON. Please report.");
145
146        merge_json(&mut builder, source_json);
147
148        source_json = builder
149    }
150
151    sort_json_array_values(&mut source_json);
152
153    // Get a JSON string. This will either be pretty printed or (default) minified.
154    if pretty_print {
155        serde_json::to_string_pretty(&source_json)
156    } else {
157        serde_json::to_string(&source_json)
158    }
159}
160
161/// Function used by the `vector config` subcommand for outputting a normalized configuration.
162/// The purpose of this func is to combine user configuration after processing all paths,
163/// Pipelines expansions, etc. The JSON result of this serialization can itself be used as a config,
164/// which also makes it useful for version control or treating as a singular unit of configuration.
165pub fn cmd(opts: &Opts) -> exitcode::ExitCode {
166    let paths = opts.paths_with_formats();
167    // Start by serializing to a `ConfigBuilder`. This will leverage validation in config
168    // builder fields which we'll use to error out if required.
169    let (paths, builder) = match process_paths(&paths) {
170        Some(paths) => match load_builder_from_paths(&paths) {
171            Ok(builder) => (paths, builder),
172            Err(errs) => return handle_config_errors(errs),
173        },
174        None => return exitcode::CONFIG,
175    };
176
177    // Load source TOML.
178    let source = match load_source_from_paths(&paths) {
179        Ok(map) => map,
180        Err(errs) => return handle_config_errors(errs),
181    };
182
183    let json = serialize_to_json(source, &builder, opts.include_defaults, opts.pretty);
184
185    #[allow(clippy::print_stdout)]
186    {
187        println!("{}", json.expect("config should be serializable"));
188    }
189
190    exitcode::OK
191}
192
193#[cfg(all(test, feature = "sources", feature = "transforms", feature = "sinks"))]
194mod tests {
195    use std::collections::HashMap;
196
197    use proptest::{num, prelude::*, sample};
198    use rand::{
199        prelude::{SliceRandom, StdRng},
200        SeedableRng,
201    };
202    use serde_json::json;
203    use similar_asserts::assert_eq;
204    use vector_lib::configurable::component::{
205        SinkDescription, SourceDescription, TransformDescription,
206    };
207
208    use crate::config::Format;
209    use crate::{
210        config::{cmd::serialize_to_json, vars, ConfigBuilder},
211        generate,
212        generate::{generate_example, TransformInputsStrategy},
213    };
214
215    use super::merge_json;
216
217    #[test]
218    fn test_array_override() {
219        let mut json = json!({
220            "arr": [
221                "value1", "value2"
222            ]
223        });
224
225        let to_override = json!({
226            "arr": [
227                "value3", "value4"
228            ]
229        });
230
231        merge_json(&mut json, to_override);
232
233        assert_eq!(*json.get("arr").unwrap(), json!(["value3", "value4"]))
234    }
235
236    #[test]
237    fn include_defaults_does_not_include_env_vars() {
238        let env_var = "VECTOR_CONFIG_INCLUDE_DEFAULTS_TEST";
239        let env_var_in_arr = "VECTOR_CONFIG_INCLUDE_DEFAULTS_TEST_IN_ARR";
240
241        let config_source = format!(
242            r#"
243            [sources.in]
244            type = "demo_logs"
245            format = "${{{env_var}}}"
246
247            [sinks.out]
248            type = "blackhole"
249            inputs = ["${{{env_var_in_arr}}}"]
250        "#
251        );
252        let interpolated_config_source = vars::interpolate(
253            config_source.as_ref(),
254            &HashMap::from([
255                (env_var.to_string(), "syslog".to_string()),
256                (env_var_in_arr.to_string(), "in".to_string()),
257            ]),
258        )
259        .unwrap();
260
261        let json: serde_json::Value = serde_json::from_str(
262            serialize_to_json(
263                toml::from_str(config_source.as_ref()).unwrap(),
264                &ConfigBuilder::from_toml(interpolated_config_source.as_ref()),
265                true,
266                false,
267            )
268            .unwrap()
269            .as_ref(),
270        )
271        .unwrap();
272
273        assert_eq!(
274            json["sources"]["in"]["format"],
275            json!(format!("${{{}}}", env_var))
276        );
277        assert_eq!(
278            json["sinks"]["out"]["inputs"],
279            json!(vec![format!("${{{}}}", env_var_in_arr)])
280        );
281    }
282
283    /// Select any 2-4 sources
284    fn arb_sources() -> impl Strategy<Value = Vec<&'static str>> {
285        let mut types = SourceDescription::types();
286        // The `file_descriptor` source produces different defaults each time it is used, and so
287        // will never compare equal below.
288        types.retain(|t| *t != "file_descriptor");
289        sample::subsequence(types, 2..=4)
290    }
291
292    /// Select any 2-4 transforms
293    fn arb_transforms() -> impl Strategy<Value = Vec<&'static str>> {
294        sample::subsequence(TransformDescription::types(), 2..=4)
295    }
296
297    /// Select any 2-4 sinks
298    fn arb_sinks() -> impl Strategy<Value = Vec<&'static str>> {
299        sample::subsequence(SinkDescription::types(), 2..=4)
300    }
301
302    fn create_config_source(sources: &[&str], transforms: &[&str], sinks: &[&str]) -> String {
303        // This creates a string in the syntax expected by the `vector generate`
304        // command whose internal mechanics we are using to create valid Vector
305        // configurations.
306        //
307        // Importantly, we have to name the components (in this case, simply by
308        // their type as each type of component is guaranteed to only appear
309        // once), because (in some tests) we'd like to shuffle the configuration
310        // later in a way that does not change its actual semantics. Otherwise,
311        // an autogenerated ID like `source0` could correspond to different
312        // sources depending on the ordering of the `vector generate` input.
313        //
314        // We also append a fixed `remap` transform to the transforms list. This
315        // ensures sink inputs are consistent since `generate` uses the last
316        // transform the input for each sink.
317        let generate_config_str = format!(
318            "{}/{}/{}",
319            sources
320                .iter()
321                .map(|source| format!("{source}:{source}"))
322                .collect::<Vec<_>>()
323                .join(","),
324            transforms
325                .iter()
326                .map(|transform| format!("{transform}:{transform}"))
327                .chain(vec!["manually-added-remap:remap".to_string()])
328                .collect::<Vec<_>>()
329                .join(","),
330            sinks
331                .iter()
332                .map(|sink| format!("{sink}:{sink}"))
333                .collect::<Vec<_>>()
334                .join(","),
335        );
336        let opts = generate::Opts {
337            fragment: true,
338            expression: generate_config_str.to_string(),
339            file: None,
340            format: Format::Toml,
341        };
342        generate_example(&opts, TransformInputsStrategy::All).expect("invalid config generated")
343    }
344
345    proptest! {
346        #[test]
347        /// Output should be the same regardless of input config ordering
348        fn output_has_consistent_ordering(mut sources in arb_sources(), mut transforms in arb_transforms(), mut sinks in arb_sinks(), seed in num::u64::ANY) {
349            let config_source = create_config_source(sources.as_ref(), transforms.as_ref(), sinks.as_ref());
350
351            // Shuffle the ordering of components which shuffles the order in
352            // which items appear in the TOML config
353            let mut rng = StdRng::seed_from_u64(seed);
354            sources.shuffle(&mut rng);
355            transforms.shuffle(&mut rng);
356            sinks.shuffle(&mut rng);
357            let shuffled_config_source = create_config_source(sources.as_ref(), transforms.as_ref(), sinks.as_ref());
358
359            let json = serialize_to_json(
360                toml::from_str(config_source.as_ref()).unwrap(),
361                &ConfigBuilder::from_toml(config_source.as_ref()),
362                false,
363                false
364            )
365            .unwrap();
366            let shuffled_json = serialize_to_json(
367                toml::from_str(shuffled_config_source.as_ref()).unwrap(),
368                &ConfigBuilder::from_toml(shuffled_config_source.as_ref()),
369                false,
370                false
371            )
372            .unwrap();
373
374            assert_eq!(json, shuffled_json);
375        }
376    }
377
378    proptest! {
379        #[test]
380        /// Output is a valid configuration
381        fn output_is_a_valid_config(sources in arb_sources(), transforms in arb_transforms(), sinks in arb_sinks()) {
382            let config_source = create_config_source(sources.as_ref(), transforms.as_ref(), sinks.as_ref());
383            let json = serialize_to_json(
384                toml::from_str(config_source.as_ref()).unwrap(),
385                &ConfigBuilder::from_toml(config_source.as_ref()),
386                false,
387                false
388            )
389            .unwrap();
390            assert!(serde_json::from_str::<ConfigBuilder>(json.as_ref()).is_ok());
391        }
392    }
393}