vrl/value/kind/collection/
unknown.rs

1use crate::path::OwnedValuePath;
2use std::collections::BTreeMap;
3
4use super::Collection;
5use crate::value::Kind;
6
7/// The type-state of "unknown" values in a collection.
8///
9/// That is, given a collection, it can have a set of "known" value types (e.g. we know the object
10/// collection has a field `.foo` with a type `integer`), but also a singular "unknown" value type
11/// (e.g. the array collection has an integer value at index 0, and is 3 values in size. We don't
12/// know the exact values for indices 1 and 2, but we do know that it has to be the type defined by
13/// `Unknown`).
14///
15/// "unknown" values can either be "undefined" or the "unknown" type.
16/// For example, an array with an infinite unknown of "integer" doesn't imply that _every_
17/// index contains an array. Rather, it says every index contains an "integer" or is "undefined".
18#[derive(Debug, Clone, Eq, PartialEq, PartialOrd)]
19pub struct Unknown(pub(super) Inner);
20
21#[derive(Debug, Clone, Eq, PartialEq, PartialOrd)]
22pub(super) enum Inner {
23    Exact(Box<Kind>),
24
25    /// The `Infinite` unknown kind stores non-recursive types, with the invariant that the same
26    /// states set on this type also apply to its nested collection types.
27    ///
28    /// That is, if we have an infinite type with the `bytes` and `array` state set, then the
29    /// assumption is that the array of this type also has the bytes and array state, and its array
30    /// has the bytes and array state, ad infinitum.
31    Infinite(Infinite),
32}
33
34impl Unknown {
35    /// Returns a standard representation of an "unknown" type.
36    #[must_use]
37    pub(crate) fn canonicalize(&self) -> Self {
38        self.to_kind().or_undefined().into()
39    }
40
41    /// Get the `any` state for `Unknown`.
42    #[must_use]
43    pub(crate) fn any() -> Self {
44        Self::infinite(Infinite::any())
45    }
46
47    /// Get the `exact` state for `Unknown`.
48    #[must_use]
49    pub(crate) fn exact(kind: impl Into<Kind>) -> Self {
50        Self(Inner::Exact(Box::new(kind.into())))
51    }
52
53    /// Get the `exact` state for `Unknown`.
54    #[must_use]
55    pub(super) fn infinite(infinite: impl Into<Infinite>) -> Self {
56        Self(Inner::Infinite(infinite.into()))
57    }
58
59    /// Get the `json` state for `Unknown`.
60    ///
61    /// See [`Unknown::exact`] for details on the [`Option`] return value.
62    #[must_use]
63    pub(crate) fn json() -> Self {
64        Self::infinite(Infinite::json())
65    }
66
67    /// Check if the state of `Unknown` is "any".
68    #[must_use]
69    pub const fn is_any(&self) -> bool {
70        matches!(self.0, Inner::Infinite(infinite) if infinite.is_any())
71    }
72
73    /// Check if the state of `Unknown` is "any".
74    #[must_use]
75    pub const fn is_json(&self) -> bool {
76        matches!(self.0, Inner::Infinite(infinite) if infinite.is_json())
77    }
78
79    /// Check if the state of `Unknown` is "exact".
80    #[must_use]
81    pub const fn is_exact(&self) -> bool {
82        matches!(self.0, Inner::Exact(_))
83    }
84
85    /// Get the `Kind` stored in this `Unknown`.
86    /// This represents the kind of any type not "known".
87    /// It will always include "undefined", since unknown
88    /// values are not guaranteed to exist.
89    #[must_use]
90    pub fn to_kind(&self) -> Kind {
91        self.to_existing_kind().or_undefined()
92    }
93
94    /// Get the `Kind` stored in this `Unknown`.
95    ///
96    /// This represents the kind of any _EXISTING_ type not "known".
97    /// This function assumes the type you are accessing actually exists.
98    /// If it's an optional field, `to_kind` should be used instead.
99    ///
100    /// This will never have "undefined" as part of the type
101    #[must_use]
102    pub fn to_existing_kind(&self) -> Kind {
103        let mut result = match &self.0 {
104            Inner::Infinite(infinite) => (*infinite).into(),
105            Inner::Exact(kind) => kind.as_ref().clone(),
106        };
107        result.remove_undefined();
108        result
109    }
110
111    /// Check if `self` is a superset of `other`.
112    ///
113    /// Meaning, if `self` is `Any`, then it's always a superset of `other`, otherwise its
114    /// accumulative types need to be a superset of `other`.
115    pub(crate) fn is_superset(&self, other: &Self) -> Result<(), OwnedValuePath> {
116        match (&self.0, &other.0) {
117            (Inner::Infinite(infinite), _) if infinite.is_any() => Ok(()),
118            (Inner::Infinite(infinite), Inner::Exact(rhs)) => {
119                Kind::from(*infinite).is_superset(rhs)
120            }
121            (Inner::Exact(lhs), Inner::Exact(rhs)) => lhs
122                .clone()
123                .without_undefined()
124                .is_superset(&rhs.clone().without_undefined()),
125            (Inner::Exact(lhs), Inner::Infinite(..)) => {
126                if lhs.is_any() {
127                    Ok(())
128                } else {
129                    Err(OwnedValuePath::root())
130                }
131            }
132            (Inner::Infinite(lhs), Inner::Infinite(rhs)) => {
133                if lhs.is_superset(rhs) {
134                    Ok(())
135                } else {
136                    Err(OwnedValuePath::root())
137                }
138            }
139        }
140    }
141
142    /// Merge `other` into `self`, using the provided `Strategy`.
143    ///
144    /// If any of the two `Unknown`s is marked as "infinite", it will overwrite the finite variant.
145    pub(crate) fn merge(&mut self, other: Self, overwrite: bool) {
146        match (&mut self.0, other.0) {
147            (Inner::Exact(lhs), Inner::Exact(rhs)) => lhs.merge_keep(*rhs, overwrite),
148            (Inner::Infinite(lhs), Inner::Infinite(rhs)) => lhs.merge(rhs),
149            (_, rhs @ Inner::Infinite(_)) => self.0 = rhs,
150            (Inner::Infinite(_), _) => {}
151        }
152    }
153}
154
155impl From<Kind> for Unknown {
156    fn from(kind: Kind) -> Self {
157        (&kind).into()
158    }
159}
160
161impl From<&Kind> for Unknown {
162    fn from(kind: &Kind) -> Self {
163        if kind.is_any() {
164            return Self::any();
165        }
166
167        if kind.is_json() {
168            return Self::json();
169        }
170
171        Self::exact(kind.clone())
172    }
173}
174
175#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd)]
176pub(super) struct Infinite {
177    bytes: Option<()>,
178    integer: Option<()>,
179    float: Option<()>,
180    boolean: Option<()>,
181    timestamp: Option<()>,
182    regex: Option<()>,
183    null: Option<()>,
184    array: Option<()>,
185    object: Option<()>,
186}
187
188impl Infinite {
189    const fn any() -> Self {
190        Self {
191            bytes: Some(()),
192            integer: Some(()),
193            float: Some(()),
194            boolean: Some(()),
195            timestamp: Some(()),
196            regex: Some(()),
197            null: Some(()),
198            array: Some(()),
199            object: Some(()),
200        }
201    }
202
203    const fn json() -> Self {
204        Self {
205            bytes: Some(()),
206            integer: Some(()),
207            float: Some(()),
208            boolean: Some(()),
209            timestamp: None,
210            regex: None,
211            null: Some(()),
212            array: Some(()),
213            object: Some(()),
214        }
215    }
216
217    #[must_use]
218    pub const fn is_any(&self) -> bool {
219        self.bytes.is_some()
220            && self.integer.is_some()
221            && self.float.is_some()
222            && self.boolean.is_some()
223            && self.timestamp.is_some()
224            && self.regex.is_some()
225            && self.null.is_some()
226            && self.array.is_some()
227            && self.object.is_some()
228    }
229
230    /// Returns `true` if the JSON type states are valid.
231    #[must_use]
232    pub const fn is_json(&self) -> bool {
233        self.bytes.is_some()
234            && self.integer.is_some()
235            && self.float.is_some()
236            && self.boolean.is_some()
237            && self.timestamp.is_none()
238            && self.regex.is_none()
239            && self.null.is_some()
240            && self.array.is_some()
241            && self.object.is_some()
242    }
243
244    /// Check if `self` is a superset of `other`.
245    ///
246    /// Meaning, if `self` is `Any`, then it's always a superset of `other`, otherwise its
247    /// accumulative types need to be a superset of `other`.
248    pub(super) const fn is_superset(&self, other: &Self) -> bool {
249        if let (None, Some(())) = (self.bytes, other.bytes) {
250            return false;
251        }
252
253        if let (None, Some(())) = (self.integer, other.integer) {
254            return false;
255        }
256
257        if let (None, Some(())) = (self.float, other.float) {
258            return false;
259        }
260
261        if let (None, Some(())) = (self.boolean, other.boolean) {
262            return false;
263        }
264
265        if let (None, Some(())) = (self.timestamp, other.timestamp) {
266            return false;
267        }
268
269        if let (None, Some(())) = (self.regex, other.regex) {
270            return false;
271        }
272
273        if let (None, Some(())) = (self.null, other.null) {
274            return false;
275        }
276
277        if let (None, Some(())) = (self.array, other.array) {
278            return false;
279        }
280
281        if let (None, Some(())) = (self.object, other.object) {
282            return false;
283        }
284
285        true
286    }
287
288    /// Merge `other` into `self`.
289    pub(super) fn merge(&mut self, other: Self) {
290        self.bytes = self.bytes.or(other.bytes);
291        self.integer = self.integer.or(other.integer);
292        self.float = self.float.or(other.float);
293        self.boolean = self.boolean.or(other.boolean);
294        self.timestamp = self.timestamp.or(other.timestamp);
295        self.regex = self.regex.or(other.regex);
296        self.null = self.null.or(other.null);
297        self.array = self.array.or(other.array);
298        self.object = self.object.or(other.object);
299    }
300}
301
302impl From<Infinite> for Kind {
303    fn from(infinite: Infinite) -> Self {
304        let mut kind = Self::never();
305
306        if infinite.bytes.is_some() {
307            kind.add_bytes();
308        }
309
310        if infinite.integer.is_some() {
311            kind.add_integer();
312        }
313
314        if infinite.float.is_some() {
315            kind.add_float();
316        }
317
318        if infinite.boolean.is_some() {
319            kind.add_boolean();
320        }
321
322        if infinite.timestamp.is_some() {
323            kind.add_timestamp();
324        }
325
326        if infinite.regex.is_some() {
327            kind.add_regex();
328        }
329
330        if infinite.null.is_some() {
331            kind.add_null();
332        }
333
334        if infinite.array.is_some() {
335            kind.add_array(Collection::from(infinite));
336        }
337
338        if infinite.object.is_some() {
339            kind.add_object(Collection::from(infinite));
340        }
341
342        kind
343    }
344}
345
346impl<T: Ord> From<Infinite> for Collection<T> {
347    fn from(infinite: Infinite) -> Self {
348        Self {
349            known: BTreeMap::default(),
350            unknown: Unknown::infinite(infinite),
351        }
352    }
353}
354
355#[cfg(test)]
356mod tests {
357    use std::collections::HashMap;
358
359    use super::*;
360
361    #[test]
362    #[allow(clippy::too_many_lines)]
363    fn test_is_superset() {
364        struct TestCase {
365            this: Unknown,
366            other: Unknown,
367            want: bool,
368        }
369
370        for (title, TestCase { this, other, want }) in HashMap::from([
371            (
372                "any comparison",
373                TestCase {
374                    this: Unknown::any(),
375                    other: Unknown::any(),
376                    want: true,
377                },
378            ),
379            (
380                "exact/any mismatch",
381                TestCase {
382                    this: Unknown::json(),
383                    other: Unknown::any(),
384                    want: false,
385                },
386            ),
387            (
388                "any/exact match",
389                TestCase {
390                    this: Unknown::any(),
391                    other: Unknown::json(),
392                    want: true,
393                },
394            ),
395            (
396                "exact matching comparison",
397                TestCase {
398                    this: Unknown::json(),
399                    other: Unknown::json(),
400                    want: true,
401                },
402            ),
403            (
404                "exact mismatch comparison",
405                TestCase {
406                    this: Unknown::exact(Kind::bytes()),
407                    other: Unknown::exact(Kind::integer()),
408                    want: false,
409                },
410            ),
411        ]) {
412            assert_eq!(this.is_superset(&other).is_ok(), want, "{title}");
413        }
414    }
415}