vrl/stdlib/
unnest.rs

1use crate::compiler::prelude::*;
2use crate::path::{OwnedTargetPath, OwnedValuePath};
3
4fn unnest(path: &expression::Query, ctx: &mut Context) -> Resolved {
5    let lookup_buf = path.path();
6
7    match path.target() {
8        expression::Target::External(prefix) => {
9            let root = ctx
10                .target()
11                .target_get(&OwnedTargetPath::root(*prefix))
12                .expect("must never fail")
13                .expect("always a value");
14            unnest_root(root, lookup_buf)
15        }
16        expression::Target::Internal(v) => {
17            let value = ctx.state().variable(v.ident()).unwrap_or(&Value::Null);
18            let root = value.get(&OwnedValuePath::root()).expect("always a value");
19            unnest_root(root, lookup_buf)
20        }
21        expression::Target::Container(expr) => {
22            let value = expr.resolve(ctx)?;
23            let root = value.get(&OwnedValuePath::root()).expect("always a value");
24            unnest_root(root, lookup_buf)
25        }
26        expression::Target::FunctionCall(expr) => {
27            let value = expr.resolve(ctx)?;
28            let root = value.get(&OwnedValuePath::root()).expect("always a value");
29            unnest_root(root, lookup_buf)
30        }
31    }
32}
33
34fn unnest_root(root: &Value, path: &OwnedValuePath) -> Resolved {
35    let mut trimmed = root.clone();
36    let values = trimmed
37        .remove(path, true)
38        .ok_or(ValueError::Expected {
39            got: Kind::null(),
40            expected: Kind::array(Collection::any()),
41        })?
42        .try_array()?;
43
44    let events = values
45        .into_iter()
46        .map(|value| {
47            let mut event = trimmed.clone();
48            event.insert(path, value);
49            event
50        })
51        .collect::<Vec<_>>();
52
53    Ok(Value::Array(events))
54}
55
56#[derive(Clone, Copy, Debug)]
57pub struct Unnest;
58
59impl Function for Unnest {
60    fn identifier(&self) -> &'static str {
61        "unnest"
62    }
63
64    fn usage(&self) -> &'static str {
65        indoc! {"
66            Unnest an array field from an object to create an array of objects using that field; keeping all other fields.
67
68            Assigning the array result of this to `.` results in multiple events being emitted from `remap`. See the
69            [`remap` transform docs](/docs/reference/configuration/transforms/remap/#emitting-multiple-log-events) for more details.
70
71            This is also referred to as `explode` in some languages.
72        "}
73    }
74
75    fn category(&self) -> &'static str {
76        Category::Object.as_ref()
77    }
78
79    fn internal_failure_reasons(&self) -> &'static [&'static str] {
80        &["The field path referred to is not an array."]
81    }
82
83    fn return_kind(&self) -> u16 {
84        kind::ARRAY
85    }
86
87    fn return_rules(&self) -> &'static [&'static str] {
88        &[
89            "Returns an array of objects that matches the original object, but each with the specified path replaced with a single element from the original path.",
90        ]
91    }
92
93    fn parameters(&self) -> &'static [Parameter] {
94        const PARAMETERS: &[Parameter] = &[Parameter::required(
95            "path",
96            kind::ARRAY,
97            "The path of the field to unnest.",
98        )];
99        PARAMETERS
100    }
101
102    fn examples(&self) -> &'static [Example] {
103        &[
104            example! {
105                title: "Unnest an array field",
106                source: indoc! {r#"
107                    . = {"hostname": "localhost", "messages": ["message 1", "message 2"]}
108                    . = unnest(.messages)
109                "#},
110                result: Ok(
111                    r#"[{"hostname": "localhost", "messages": "message 1"}, {"hostname": "localhost", "messages": "message 2"}]"#,
112                ),
113            },
114            example! {
115                title: "Unnest a nested array field",
116                source: indoc! {r#"
117                    . = {"hostname": "localhost", "event": {"messages": ["message 1", "message 2"]}}
118                    . = unnest(.event.messages)
119                "#},
120                result: Ok(
121                    r#"[{"hostname": "localhost", "event": {"messages": "message 1"}}, {"hostname": "localhost", "event": {"messages": "message 2"}}]"#,
122                ),
123            },
124        ]
125    }
126
127    fn compile(
128        &self,
129        _state: &state::TypeState,
130        _ctx: &mut FunctionCompileContext,
131        arguments: ArgumentList,
132    ) -> Compiled {
133        let path = arguments.required_query("path")?;
134        Ok(UnnestFn { path }.as_expr())
135    }
136}
137
138#[derive(Debug, Clone)]
139struct UnnestFn {
140    path: expression::Query,
141}
142
143impl UnnestFn {
144    #[cfg(test)]
145    fn new(path: &str) -> Self {
146        use crate::path::{PathPrefix, parse_value_path};
147
148        Self {
149            path: expression::Query::new(
150                expression::Target::External(PathPrefix::Event),
151                parse_value_path(path).unwrap(),
152            ),
153        }
154    }
155}
156
157impl FunctionExpression for UnnestFn {
158    fn resolve(&self, ctx: &mut Context) -> Resolved {
159        unnest(&self.path, ctx)
160    }
161
162    fn type_def(&self, state: &state::TypeState) -> TypeDef {
163        use expression::Target;
164
165        match self.path.target() {
166            Target::External(prefix) => invert_array_at_path(
167                &TypeDef::from(state.external.kind(*prefix)),
168                self.path.path(),
169            ),
170            Target::Internal(v) => invert_array_at_path(&v.type_def(state), self.path.path()),
171            Target::FunctionCall(f) => invert_array_at_path(&f.type_def(state), self.path.path()),
172            Target::Container(c) => invert_array_at_path(&c.type_def(state), self.path.path()),
173        }
174    }
175}
176
177/// Assuming path points at an Array, this will take the typedefs for that array,
178/// And will remove it returning a set of it's elements.
179///
180/// For example the typedef for this object:
181/// `{ "a" => { "b" => [ { "c" => 2 }, { "c" => 3 } ] } }`
182///
183/// Is converted to a typedef for this array:
184/// `[ { "a" => { "b" => { "c" => 2 } } },
185///    { "a" => { "b" => { "c" => 3 } } },
186///  ]`
187///
188pub(crate) fn invert_array_at_path(typedef: &TypeDef, path: &OwnedValuePath) -> TypeDef {
189    let kind = typedef.kind().at_path(path);
190
191    let Some(mut array) = kind.into_array() else {
192        // Guaranteed fallible.
193        // This can't actually be set to "fallible", or it will cause problems due to
194        // https://github.com/vectordotdev/vector/issues/13527
195        return TypeDef::never();
196    };
197
198    array.known_mut().values_mut().for_each(|kind| {
199        let mut tdkind = typedef.kind().clone();
200        tdkind.insert(path, kind.clone());
201
202        *kind = tdkind.clone();
203    });
204
205    let unknown = array.unknown_kind();
206    if unknown.contains_any_defined() {
207        let mut tdkind = typedef.kind().clone();
208        tdkind.insert(path, unknown.without_undefined());
209        array.set_unknown(tdkind);
210    }
211
212    TypeDef::array(array).infallible()
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218    use crate::path::parse_value_path;
219    use crate::{btreemap, type_def, value};
220
221    #[test]
222    #[allow(clippy::too_many_lines)]
223    fn type_def() {
224        struct TestCase {
225            old: TypeDef,
226            path: &'static str,
227            new: TypeDef,
228        }
229
230        let cases = vec![
231            // Simple case
232            TestCase {
233                old: type_def! { object {
234                    "nonk" => type_def! { array [
235                        type_def! { object {
236                            "noog" => type_def! { bytes },
237                            "nork" => type_def! { bytes },
238                        } },
239                    ] },
240                } },
241                path: ".nonk",
242                new: type_def! { array [
243                    type_def! { object {
244                        "nonk" => type_def! { object {
245                            "noog" => type_def! { bytes },
246                            "nork" => type_def! { bytes },
247                        } },
248                    } },
249                ] },
250            },
251            // Provided example
252            TestCase {
253                old: type_def! { object {
254                    "nonk" => type_def! { object {
255                        "shnoog" => type_def! { array [
256                            type_def! { object {
257                                "noog" => type_def! { bytes },
258                            } },
259                        ] },
260                    } },
261                } },
262                path: "nonk.shnoog",
263                new: type_def! { array [
264                    type_def! { object {
265                        "nonk" => type_def! { object {
266                            "shnoog" => type_def! { object {
267                                "noog" => type_def! { bytes },
268                            } },
269                        } },
270                    } },
271                ] },
272            },
273            // Same field in different branches
274            TestCase {
275                old: type_def! { object {
276                    "nonk" => type_def! { object {
277                        "shnoog" => type_def! { array [
278                            type_def! { object {
279                                "noog" => type_def! { bytes },
280                            } },
281                        ] },
282                    } },
283                    "nink" => type_def! { object {
284                        "shnoog" => type_def! { array [
285                            type_def! { object {
286                                "noog" => type_def! { bytes },
287                            } },
288                        ] },
289                    } },
290                } },
291                path: "nonk.shnoog",
292                new: type_def! { array [
293                    type_def! { object {
294                        "nonk" => type_def! { object {
295                            "shnoog" => type_def! { object {
296                                "noog" => type_def! { bytes },
297                            } },
298                        } },
299                        "nink" => type_def! { object {
300                            "shnoog" => type_def! { array [
301                                type_def! { object {
302                                    "noog" => type_def! { bytes },
303                                } },
304                            ] },
305                        } },
306                    } },
307                ] },
308            },
309            // Indexed specific
310            TestCase {
311                old: type_def! { object {
312                    "nonk" => type_def! { array {
313                        0 => type_def! { object {
314                            "noog" => type_def! { array [
315                                type_def! { bytes },
316                            ] },
317                            "nork" => type_def! { bytes },
318                        } },
319                    } },
320                } },
321                path: ".nonk[0].noog",
322                new: type_def! { array [
323                    type_def! { object {
324                        "nonk" => type_def! { array {
325                            // The index is added on top of the Any entry.
326                            0 => type_def! { object {
327                                "noog" => type_def! { bytes },
328                                "nork" => type_def! { bytes },
329                            } },
330                        } },
331                    } },
332                ] },
333            },
334            // More nested
335            TestCase {
336                old: type_def! { object {
337                    "nonk" => type_def! { object {
338                        "shnoog" => type_def! { array [
339                            type_def! { object {
340                                "noog" => type_def! { bytes },
341                                "nork" => type_def! { bytes },
342                            } },
343                        ] },
344                    } },
345                } },
346                path: ".nonk.shnoog",
347                new: type_def! { array [
348                    type_def! { object {
349                        "nonk" => type_def! { object {
350                            "shnoog" => type_def! { object {
351                                "noog" => type_def! { bytes },
352                                "nork" => type_def! { bytes },
353                            } },
354                        } },
355                    } },
356                ] },
357            },
358            // Nonexistent, the types we know are moved into the returned array.
359            TestCase {
360                old: type_def! { object {
361                    "nonk" => type_def! { bytes },
362                } },
363                path: ".norg",
364                // guaranteed to fail at runtime
365                new: TypeDef::never(),
366            },
367        ];
368
369        for case in cases {
370            let path = parse_value_path(case.path).unwrap();
371            let new = invert_array_at_path(&case.old, &path);
372            assert_eq!(case.new, new, "{path}");
373        }
374    }
375
376    #[test]
377    fn unnest() {
378        let cases = vec![
379            (
380                value!({"hostname": "localhost", "events": [{"message": "hello"}, {"message": "world"}]}),
381                Ok(
382                    value!([{"hostname": "localhost", "events": {"message": "hello"}}, {"hostname": "localhost", "events": {"message": "world"}}]),
383                ),
384                UnnestFn::new("events"),
385                type_def! { array [
386                    type_def! { object {
387                        "hostname" => type_def! { bytes },
388                        "events" => type_def! { object {
389                            "message" => type_def! { bytes },
390                        } },
391                    } },
392                ] },
393            ),
394            (
395                value!({"hostname": "localhost", "events": [{"message": "hello"}, {"message": "world"}]}),
396                Err("expected array, got null".to_owned()),
397                UnnestFn::new("unknown"),
398                // guaranteed to always fail
399                TypeDef::never(),
400            ),
401            (
402                value!({"hostname": "localhost", "events": [{"message": "hello"}, {"message": "world"}]}),
403                Err("expected array, got string".to_owned()),
404                UnnestFn::new("hostname"),
405                // guaranteed to always fail
406                TypeDef::never(),
407            ),
408        ];
409
410        let local = state::LocalEnv::default();
411        let external = state::ExternalEnv::new_with_kind(
412            Kind::object(btreemap! {
413                "hostname" => Kind::bytes(),
414                "events" => Kind::array(Collection::from_unknown(Kind::object(btreemap! {
415                    Field::from("message") => Kind::bytes(),
416                })),
417            )}),
418            Kind::object(Collection::empty()),
419        );
420        let state = TypeState { local, external };
421
422        let tz = TimeZone::default();
423        for (object, expected, func, expected_typedef) in cases {
424            let mut object = object.clone();
425            let mut runtime_state = state::RuntimeState::default();
426            let mut ctx = Context::new(&mut object, &mut runtime_state, &tz);
427
428            let got_typedef = func.type_def(&state);
429
430            let got = func
431                .resolve(&mut ctx)
432                .map_err(|e| format!("{:#}", anyhow::anyhow!(e)));
433
434            assert_eq!(got, expected);
435            assert_eq!(got_typedef, expected_typedef);
436        }
437    }
438}