vector/config/
vars.rs

1use std::{collections::HashMap, sync::LazyLock};
2
3use regex::{Captures, Regex};
4
5// Environment variable names can have any characters from the Portable Character Set other
6// than NUL.  However, for Vector's interpolation, we are closer to what a shell supports which
7// is solely of uppercase letters, digits, and the '_' (that is, the `[:word:]` regex class).
8// In addition to these characters, we allow `.` as this commonly appears in environment
9// variable names when they come from a Java properties file.
10//
11// https://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html
12pub static ENVIRONMENT_VARIABLE_INTERPOLATION_REGEX: LazyLock<Regex> = LazyLock::new(|| {
13    Regex::new(
14        r"(?x)
15        \$\$|
16        \$([[:word:].]+)|
17        \$\{([[:word:].]+)(?:(:?-|:?\?)([^}]*))?\}",
18    )
19    .unwrap()
20});
21
22/// Result<interpolated config, errors>
23pub fn interpolate(input: &str, vars: &HashMap<String, String>) -> Result<String, Vec<String>> {
24    let mut errors = Vec::new();
25
26    let interpolated = ENVIRONMENT_VARIABLE_INTERPOLATION_REGEX
27        .replace_all(input, |caps: &Captures<'_>| {
28            let flags = caps.get(3).map(|m| m.as_str()).unwrap_or_default();
29            let def_or_err = caps.get(4).map(|m| m.as_str()).unwrap_or_default();
30            caps.get(1)
31                .or_else(|| caps.get(2))
32                .map(|m| m.as_str())
33                .map(|name| {
34                    // Get the value and check for newlines (LF or CR)
35                    let val = vars.get(name).and_then(|v| {
36                        if v.contains(['\n', '\r']) {
37                            errors.push(format!(
38                                "Environment variable contains newline character. name = {name:?}",
39                            ));
40                            None
41                        } else {
42                            Some(v.as_str())
43                        }
44                    });
45
46                    match flags {
47                        ":-" => match val {
48                            Some(v) if !v.is_empty() => v,
49                            _ => def_or_err,
50                        },
51                        "-" => val.unwrap_or(def_or_err),
52                        ":?" => match val {
53                            Some(v) if !v.is_empty() => v,
54                            _ => {
55                                errors.push(format!(
56                                    "Non-empty environment variable required in config. name = {name:?}, error = {def_or_err:?}",
57                                ));
58                                ""
59                            },
60                        }
61                        "?" => val.unwrap_or_else(|| {
62                            errors.push(format!(
63                                "Missing environment variable required in config. name = {name:?}, error = {def_or_err:?}",
64                            ));
65                            ""
66                        }),
67                        _ => val.unwrap_or_else(|| {
68                            errors.push(format!(
69                                "Missing environment variable in config. name = {name:?}",
70                            ));
71                            ""
72                        }),
73                    }
74                })
75                .unwrap_or("$")
76                .to_string()
77        })
78        .into_owned();
79
80    if errors.is_empty() {
81        Ok(interpolated)
82    } else {
83        Err(errors)
84    }
85}
86
87#[cfg(test)]
88mod test {
89    use super::interpolate;
90    #[test]
91    fn interpolation() {
92        let vars = vec![
93            ("FOO".into(), "dogs".into()),
94            ("FOOBAR".into(), "cats".into()),
95            // Java commonly uses .s in env var names
96            ("FOO.BAR".into(), "turtles".into()),
97            ("EMPTY".into(), "".into()),
98        ]
99        .into_iter()
100        .collect();
101
102        assert_eq!("dogs", interpolate("$FOO", &vars).unwrap());
103        assert_eq!("dogs", interpolate("${FOO}", &vars).unwrap());
104        assert_eq!("cats", interpolate("${FOOBAR}", &vars).unwrap());
105        assert_eq!("xcatsy", interpolate("x${FOOBAR}y", &vars).unwrap());
106        assert!(interpolate("x$FOOBARy", &vars).is_err());
107        assert_eq!("$ x", interpolate("$ x", &vars).unwrap());
108        assert_eq!("$FOO", interpolate("$$FOO", &vars).unwrap());
109        assert_eq!("dogs=bar", interpolate("$FOO=bar", &vars).unwrap());
110        assert!(interpolate("$NOT_FOO", &vars).is_err());
111        assert!(interpolate("$NOT-FOO", &vars).is_err());
112        assert_eq!("turtles", interpolate("$FOO.BAR", &vars).unwrap());
113        assert_eq!("${FOO x", interpolate("${FOO x", &vars).unwrap());
114        assert_eq!("${}", interpolate("${}", &vars).unwrap());
115        assert_eq!("dogs", interpolate("${FOO:-cats}", &vars).unwrap());
116        assert_eq!("dogcats", interpolate("${NOT:-dogcats}", &vars).unwrap());
117        assert_eq!(
118            "dogs and cats",
119            interpolate("${NOT:-dogs and cats}", &vars).unwrap()
120        );
121        assert_eq!("${:-cats}", interpolate("${:-cats}", &vars).unwrap());
122        assert_eq!("", interpolate("${NOT:-}", &vars).unwrap());
123        assert_eq!("cats", interpolate("${NOT-cats}", &vars).unwrap());
124        assert_eq!("", interpolate("${EMPTY-cats}", &vars).unwrap());
125        assert_eq!("dogs", interpolate("${FOO:?error cats}", &vars).unwrap());
126        assert_eq!("dogs", interpolate("${FOO?error cats}", &vars).unwrap());
127        assert_eq!("", interpolate("${EMPTY?error cats}", &vars).unwrap());
128        assert!(interpolate("${NOT:?error cats}", &vars).is_err());
129        assert!(interpolate("${NOT?error cats}", &vars).is_err());
130        assert!(interpolate("${EMPTY:?error cats}", &vars).is_err());
131    }
132
133    #[test]
134    fn test_multiline_expansion_prevented() {
135        let vars = vec![
136            ("SAFE_VAR".into(), "single line value".into()),
137            ("MULTILINE_VAR".into(), "line1\nline2\nline3".into()),
138            ("WITH_NEWLINE".into(), "before\nafter".into()),
139            ("WITH_CR".into(), "before\rafter".into()),
140            ("WITH_CRLF".into(), "before\r\nafter".into()),
141        ]
142        .into_iter()
143        .collect();
144
145        // Test that multiline values are treated as missing
146        let result = interpolate("$MULTILINE_VAR", &vars);
147        assert!(result.is_err(), "Multiline var should be rejected");
148
149        let result = interpolate("$WITH_NEWLINE", &vars);
150        assert!(result.is_err(), "Newline var should be rejected");
151
152        let result = interpolate("$WITH_CR", &vars);
153        assert!(result.is_err(), "CR var should be rejected");
154
155        let result = interpolate("$WITH_CRLF", &vars);
156        assert!(result.is_err(), "CRLF var should be rejected");
157
158        // Test that safe values still work
159        let result = interpolate("$SAFE_VAR", &vars).unwrap();
160        assert_eq!("single line value", result);
161
162        // Test with default values - multiline vars should still error
163        let result = interpolate("${MULTILINE_VAR:-safe default}", &vars);
164        assert!(result.is_err(), "Should error even with default");
165
166        // Verify error messages are helpful
167        let err = interpolate("$MULTILINE_VAR", &vars).unwrap_err();
168        assert!(err.iter().any(|e| e.contains("newline character")));
169        assert!(err.iter().any(|e| e.contains("MULTILINE_VAR")));
170    }
171}