1#![allow(clippy::module_name_repetitions)]
2mod exact;
3mod field;
4mod index;
5mod unknown;
6
7use std::collections::BTreeMap;
8
9use crate::path::OwnedSegment;
10use crate::path::OwnedValuePath;
11pub use field::Field;
12pub use index::Index;
13pub use unknown::Unknown;
14
15use super::Kind;
16
17pub trait CollectionKey {
18 fn to_segment(&self) -> OwnedSegment;
19}
20
21#[derive(Debug, Clone, Eq, PartialEq, PartialOrd)]
26pub struct Collection<T: Ord> {
27 known: BTreeMap<T, Kind>,
28
29 unknown: Unknown,
35}
36
37impl<T: Ord + Clone> Collection<T> {
38 #[must_use]
40 pub fn from_parts(known: BTreeMap<T, Kind>, unknown: impl Into<Kind>) -> Self {
41 Self {
42 known,
43 unknown: unknown.into().into(),
44 }
45 }
46
47 pub(super) fn canonicalize(&self) -> Self {
48 let mut output = (*self).clone();
49
50 output.unknown = self.unknown.canonicalize();
51
52 let unknown_kind = self.unknown_kind();
53 output
54 .known_mut()
55 .retain(|_, i_kind| *i_kind != unknown_kind);
56 output
57 }
58
59 #[must_use]
61 pub fn from_unknown(unknown: impl Into<Kind>) -> Self {
62 Self {
63 known: BTreeMap::default(),
64 unknown: unknown.into().into(),
65 }
66 }
67
68 #[must_use]
70 pub fn empty() -> Self {
71 Self {
72 known: BTreeMap::default(),
73 unknown: Kind::undefined().into(),
74 }
75 }
76
77 #[must_use]
79 pub fn any() -> Self {
80 Self {
81 known: BTreeMap::default(),
82 unknown: Unknown::any(),
83 }
84 }
85
86 #[must_use]
88 pub fn json() -> Self {
89 Self {
90 known: BTreeMap::default(),
91 unknown: Unknown::json(),
92 }
93 }
94
95 #[must_use]
99 pub fn is_any(&self) -> bool {
100 self.known.values().all(Kind::is_any) && self.unknown_kind().is_any()
101 }
102
103 #[must_use]
105 pub fn known(&self) -> &BTreeMap<T, Kind> {
106 &self.known
107 }
108
109 #[must_use]
111 pub fn known_mut(&mut self) -> &mut BTreeMap<T, Kind> {
112 &mut self.known
113 }
114
115 #[must_use]
118 pub fn unknown_kind(&self) -> Kind {
119 self.unknown.to_kind()
120 }
121
122 #[must_use]
127 pub fn is_unknown_exact(&self) -> bool {
128 self.unknown.is_exact()
129 }
130
131 #[must_use]
133 pub fn is_empty(&self) -> EmptyState {
134 if self.known.is_empty() {
135 if self.unknown_kind().contains_any_defined() {
136 EmptyState::Maybe
137 } else {
138 EmptyState::Always
139 }
140 } else {
141 EmptyState::Never
142 }
143 }
144
145 pub fn set_unknown(&mut self, unknown: impl Into<Kind>) {
147 self.unknown = unknown.into().into();
148 }
149
150 #[must_use]
152 pub fn with_unknown(mut self, unknown: impl Into<Kind>) -> Self {
153 self.set_unknown(unknown);
154 self
155 }
156
157 #[must_use]
159 pub fn with_known(mut self, key: impl Into<T>, kind: Kind) -> Self {
160 self.known_mut().insert(key.into(), kind);
161 self
162 }
163
164 pub fn anonymize(&mut self) {
175 let known_unknown = self
176 .known
177 .values_mut()
178 .reduce(|lhs, rhs| {
179 lhs.merge_keep(rhs.clone(), false);
180 lhs
181 })
182 .cloned()
183 .unwrap_or(Kind::never());
184
185 self.known.clear();
186 self.unknown = self.unknown.to_kind().union(known_unknown).into();
187 }
188
189 pub fn merge(&mut self, mut other: Self, overwrite: bool) {
205 for (key, self_kind) in &mut self.known {
206 if let Some(other_kind) = other.known.remove(key) {
207 if overwrite {
208 *self_kind = other_kind;
209 } else {
210 self_kind.merge_keep(other_kind, overwrite);
211 }
212 } else if other.unknown_kind().contains_any_defined() {
213 if overwrite {
214 *self_kind = other
216 .unknown_kind()
217 .without_undefined()
218 .union(self_kind.clone());
219 } else {
220 self_kind.merge_keep(other.unknown_kind(), overwrite);
221 }
222 } else if !overwrite {
223 self_kind.add_undefined();
225 }
226 }
227
228 let self_unknown_kind = self.unknown_kind();
229 if self_unknown_kind.contains_any_defined() {
230 for (key, mut other_kind) in other.known {
231 if !overwrite {
232 other_kind.merge_keep(self_unknown_kind.clone(), overwrite);
233 }
234 self.known_mut().insert(key, other_kind);
235 }
236 } else if overwrite {
237 self.known.extend(other.known);
238 } else {
239 for (key, other_kind) in other.known {
240 self.known.insert(key, other_kind.or_undefined());
242 }
243 }
244 self.unknown.merge(other.unknown, overwrite);
245 }
246
247 #[must_use]
251 pub fn reduced_kind(&self) -> Kind {
252 self.known
253 .values()
254 .cloned()
255 .reduce(|lhs, rhs| lhs.union(rhs))
256 .unwrap_or_else(Kind::never)
257 .union(self.unknown_kind().without_undefined())
258 }
259}
260
261impl<T: Ord + Clone + CollectionKey> Collection<T> {
262 pub fn is_superset(&self, other: &Self) -> Result<(), OwnedValuePath> {
277 self.unknown
279 .is_superset(&other.unknown)
280 .map_err(|path| path.with_field_prefix("<unknown>"))?;
281
282 for (key, other_kind) in &other.known {
285 match self.known.get(key) {
286 Some(self_kind) => {
287 self_kind
288 .is_superset(other_kind)
289 .map_err(|path| path.with_segment_prefix(key.to_segment()))?;
290 }
291 None => {
292 self.unknown_kind()
293 .is_superset(other_kind)
294 .map_err(|path| path.with_segment_prefix(key.to_segment()))?;
295 }
296 }
297 }
298
299 for (key, self_kind) in &self.known {
302 if !other.known.contains_key(key) {
303 self_kind
304 .is_superset(&other.unknown_kind())
305 .map_err(|path| path.with_segment_prefix(key.to_segment()))?;
306 }
307 }
308
309 Ok(())
310 }
311}
312
313pub trait CollectionRemove {
314 type Key: Ord;
315
316 fn remove_known(&mut self, key: &Self::Key);
317}
318
319#[derive(Debug)]
322pub enum EmptyState {
323 Always,
325 Maybe,
328 Never,
330}
331
332impl<T: Ord> From<BTreeMap<T, Kind>> for Collection<T> {
333 fn from(known: BTreeMap<T, Kind>) -> Self {
334 Self {
335 known,
336 unknown: Kind::undefined().into(),
337 }
338 }
339}
340
341impl std::fmt::Display for Collection<Field> {
342 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
343 if self.unknown_kind().contains_any_defined() || self.known.is_empty() {
344 return f.write_str("object");
346 }
347
348 f.write_str("{ ")?;
349
350 let mut known = self.known.iter().peekable();
351 while let Some((key, kind)) = known.next() {
352 write!(f, "{key}: {kind}")?;
353 if known.peek().is_some() {
354 f.write_str(", ")?;
355 }
356 }
357
358 f.write_str(" }")?;
359
360 Ok(())
361 }
362}
363
364impl std::fmt::Display for Collection<Index> {
365 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
366 if self.unknown_kind().contains_any_defined() || self.known.is_empty() {
367 return f.write_str("array");
369 }
370
371 f.write_str("[")?;
372
373 let mut known = self.known.iter().peekable();
374
375 while let Some((_, kind)) = known.next() {
380 kind.fmt(f)?;
381 if known.peek().is_some() {
382 f.write_str(", ")?;
383 }
384 }
385
386 f.write_str("]")?;
387
388 Ok(())
389 }
390}
391
392#[cfg(test)]
393mod tests {
394 use std::collections::HashMap;
395
396 use super::*;
397
398 impl CollectionKey for &'static str {
399 fn to_segment(&self) -> OwnedSegment {
400 OwnedSegment::Field((*self).into())
401 }
402 }
403
404 #[test]
405 #[allow(clippy::too_many_lines)]
406 fn test_is_superset() {
407 struct TestCase {
408 this: Collection<&'static str>,
409 other: Collection<&'static str>,
410 want: bool,
411 }
412
413 for (title, TestCase { this, other, want }) in HashMap::from([
414 (
415 "any comparison",
416 TestCase {
417 this: Collection::any(),
418 other: Collection::any(),
419 want: true,
420 },
421 ),
422 (
423 "exact/any mismatch",
424 TestCase {
425 this: Collection::json(),
426 other: Collection::any(),
427 want: false,
428 },
429 ),
430 (
431 "unknown match",
432 TestCase {
433 this: Collection::from_unknown(Kind::regex().or_null()),
434 other: Collection::from_unknown(Kind::regex()),
435 want: true,
436 },
437 ),
438 (
439 "unknown mis-match",
440 TestCase {
441 this: Collection::from_unknown(Kind::regex().or_null()),
442 other: Collection::from_unknown(Kind::bytes()),
443 want: false,
444 },
445 ),
446 (
447 "other-known match",
448 TestCase {
449 this: Collection::from_parts(
450 BTreeMap::from([("bar", Kind::bytes())]),
451 Kind::regex().or_null(),
452 ),
453 other: Collection::from_parts(
454 BTreeMap::from([("foo", Kind::regex()), ("bar", Kind::bytes())]),
455 Kind::regex(),
456 ),
457 want: true,
458 },
459 ),
460 (
461 "other-known mis-match",
462 TestCase {
463 this: Collection::from_parts(
464 BTreeMap::from([("foo", Kind::integer()), ("bar", Kind::bytes())]),
465 Kind::regex().or_null(),
466 ),
467 other: Collection::from_parts(
468 BTreeMap::from([("foo", Kind::regex()), ("bar", Kind::bytes())]),
469 Kind::regex(),
470 ),
471 want: false,
472 },
473 ),
474 (
475 "self-known match",
476 TestCase {
477 this: Collection::from_parts(
478 BTreeMap::from([
479 ("foo", Kind::bytes().or_integer()),
480 ("bar", Kind::bytes().or_integer()),
481 ]),
482 Kind::bytes().or_integer(),
483 ),
484 other: Collection::from_unknown(Kind::bytes().or_integer()),
485 want: false,
486 },
487 ),
488 (
489 "self-known mis-match",
490 TestCase {
491 this: Collection::from_parts(
492 BTreeMap::from([("foo", Kind::integer()), ("bar", Kind::bytes())]),
493 Kind::bytes().or_integer(),
494 ),
495 other: Collection::from_unknown(Kind::bytes().or_integer()),
496 want: false,
497 },
498 ),
499 (
500 "unknown superset of known",
501 TestCase {
502 this: Collection::from_parts(BTreeMap::new(), Kind::bytes().or_integer()),
503 other: Collection::empty()
504 .with_known("foo", Kind::integer())
505 .with_known("bar", Kind::bytes()),
506 want: true,
507 },
508 ),
509 (
510 "unknown not superset of known",
511 TestCase {
512 this: Collection::from_parts(BTreeMap::new(), Kind::bytes().or_integer()),
513 other: Collection::empty().with_known("foo", Kind::float()),
514 want: false,
515 },
516 ),
517 ]) {
518 assert_eq!(this.is_superset(&other).is_ok(), want, "{title}");
519 }
520 }
521
522 #[test]
523 #[allow(clippy::too_many_lines)]
524 fn test_merge() {
525 struct TestCase {
526 this: Collection<&'static str>,
527 other: Collection<&'static str>,
528 overwrite: bool,
529 want: Collection<&'static str>,
530 }
531
532 for (
533 title,
534 TestCase {
535 mut this,
536 other,
537 overwrite: strategy,
538 want,
539 },
540 ) in [
541 (
542 "any merge (deep)",
543 TestCase {
544 this: Collection::any(),
545 other: Collection::any(),
546 overwrite: false,
547 want: Collection::any(),
548 },
549 ),
550 (
551 "any merge (shallow)",
552 TestCase {
553 this: Collection::any(),
554 other: Collection::any(),
555 overwrite: true,
556 want: Collection::any(),
557 },
558 ),
559 (
560 "json merge (deep)",
561 TestCase {
562 this: Collection::json(),
563 other: Collection::json(),
564 overwrite: false,
565 want: Collection::json(),
566 },
567 ),
568 (
569 "json merge (shallow)",
570 TestCase {
571 this: Collection::json(),
572 other: Collection::json(),
573 overwrite: true,
574 want: Collection::json(),
575 },
576 ),
577 (
578 "any w/ json merge (deep)",
579 TestCase {
580 this: Collection::any(),
581 other: Collection::json(),
582 overwrite: false,
583 want: Collection::any(),
584 },
585 ),
586 (
587 "any w/ json merge (shallow)",
588 TestCase {
589 this: Collection::any(),
590 other: Collection::json(),
591 overwrite: true,
592 want: Collection::any(),
593 },
594 ),
595 (
596 "merge same knowns (deep)",
597 TestCase {
598 this: Collection::from(BTreeMap::from([("foo", Kind::integer())])),
599 other: Collection::from(BTreeMap::from([("foo", Kind::bytes())])),
600 overwrite: false,
601 want: Collection::from(BTreeMap::from([("foo", Kind::integer().or_bytes())])),
602 },
603 ),
604 (
605 "merge same knowns (shallow)",
606 TestCase {
607 this: Collection::from(BTreeMap::from([("foo", Kind::integer())])),
608 other: Collection::from(BTreeMap::from([("foo", Kind::bytes())])),
609 overwrite: true,
610 want: Collection::from(BTreeMap::from([("foo", Kind::bytes())])),
611 },
612 ),
613 (
614 "append different knowns (deep)",
615 TestCase {
616 this: Collection::from(BTreeMap::from([("foo", Kind::integer())])),
617 other: Collection::from(BTreeMap::from([("bar", Kind::bytes())])),
618 overwrite: false,
619 want: Collection::from(BTreeMap::from([
620 ("foo", Kind::integer().or_undefined()),
621 ("bar", Kind::bytes().or_undefined()),
622 ])),
623 },
624 ),
625 (
626 "append different knowns (shallow)",
627 TestCase {
628 this: Collection::from(BTreeMap::from([("foo", Kind::integer())])),
629 other: Collection::from(BTreeMap::from([("bar", Kind::bytes())])),
630 overwrite: true,
631 want: Collection::from(BTreeMap::from([
632 ("foo", Kind::integer()),
633 ("bar", Kind::bytes()),
634 ])),
635 },
636 ),
637 (
638 "merge/append same/different knowns (deep)",
639 TestCase {
640 this: Collection::from(BTreeMap::from([("foo", Kind::integer())])),
641 other: Collection::from(BTreeMap::from([
642 ("foo", Kind::bytes()),
643 ("bar", Kind::boolean()),
644 ])),
645 overwrite: false,
646 want: Collection::from(BTreeMap::from([
647 ("foo", Kind::integer().or_bytes()),
648 ("bar", Kind::boolean().or_undefined()),
649 ])),
650 },
651 ),
652 (
653 "merge/append same/different knowns (shallow)",
654 TestCase {
655 this: Collection::from(BTreeMap::from([("foo", Kind::integer())])),
656 other: Collection::from(BTreeMap::from([
657 ("foo", Kind::bytes()),
658 ("bar", Kind::boolean()),
659 ])),
660 overwrite: true,
661 want: Collection::from(BTreeMap::from([
662 ("foo", Kind::bytes()),
663 ("bar", Kind::boolean()),
664 ])),
665 },
666 ),
667 (
668 "merge unknowns (deep)",
669 TestCase {
670 this: Collection::from_unknown(Kind::bytes()),
671 other: Collection::from_unknown(Kind::integer()),
672 overwrite: false,
673 want: Collection::from_unknown(Kind::bytes().or_integer()),
674 },
675 ),
676 (
677 "merge unknowns (shallow)",
678 TestCase {
679 this: Collection::from_unknown(Kind::bytes()),
680 other: Collection::from_unknown(Kind::integer()),
681 overwrite: true,
682 want: Collection::from_unknown(Kind::bytes().or_integer()),
683 },
684 ),
685 (
686 "merge known with specific unknown",
687 TestCase {
688 this: Collection::from(BTreeMap::from([("a", Kind::integer())])),
689 other: Collection::from_unknown(Kind::float()),
690 overwrite: true,
691 want: Collection::from(BTreeMap::from([("a", Kind::integer().or_float())]))
692 .with_unknown(Kind::float().or_undefined()),
693 },
694 ),
695 ] {
696 this.merge(other, strategy);
697 assert_eq!(this, want, "{title}");
698 }
699 }
700
701 #[test]
702 #[allow(clippy::too_many_lines)]
703 fn test_anonymize() {
704 struct TestCase {
705 this: Collection<&'static str>,
706 want: Collection<&'static str>,
707 }
708
709 for (title, TestCase { mut this, want }) in HashMap::from([
710 (
711 "no knowns / any unknown",
712 TestCase {
713 this: Collection::any(),
714 want: Collection::any(),
715 },
716 ),
717 (
718 "no knowns / json unknown",
719 TestCase {
720 this: Collection::json(),
721 want: Collection::json(),
722 },
723 ),
724 (
725 "integer known / no unknown",
726 TestCase {
727 this: Collection::from(BTreeMap::from([("foo", Kind::integer())])),
728 want: Collection::from_unknown(Kind::integer().or_undefined()),
729 },
730 ),
731 (
732 "integer known / any unknown",
733 TestCase {
734 this: {
735 let mut v = Collection::from(BTreeMap::from([("foo", Kind::integer())]));
736 v.set_unknown(Kind::any());
737 v
738 },
739 want: Collection::from_unknown(Kind::any()),
740 },
741 ),
742 (
743 "integer known / byte unknown",
744 TestCase {
745 this: {
746 let mut v = Collection::from(BTreeMap::from([("foo", Kind::integer())]));
747 v.set_unknown(Kind::bytes());
748 v
749 },
750 want: Collection::from_unknown(Kind::integer().or_bytes().or_undefined()),
751 },
752 ),
753 (
754 "boolean/array known / byte/object unknown",
755 TestCase {
756 this: {
757 let mut v = Collection::from(BTreeMap::from([
758 ("foo", Kind::boolean()),
759 (
760 "bar",
761 Kind::array(BTreeMap::from([(0.into(), Kind::timestamp())])),
762 ),
763 ]));
764 v.set_unknown(
765 Kind::bytes()
766 .or_object(BTreeMap::from([("baz".into(), Kind::regex())])),
767 );
768 v
769 },
770 want: Collection::from_unknown(
771 Kind::boolean()
772 .or_array(BTreeMap::from([(0.into(), Kind::timestamp())]))
773 .or_bytes()
774 .or_object(BTreeMap::from([("baz".into(), Kind::regex())]))
775 .or_undefined(),
776 ),
777 },
778 ),
779 ]) {
780 this.anonymize();
781
782 assert_eq!(this, want, "{title}");
783 }
784 }
785
786 #[test]
787 fn test_display_field() {
788 struct TestCase {
789 this: Collection<Field>,
790 want: &'static str,
791 }
792
793 for (title, TestCase { this, want }) in HashMap::from([
794 (
795 "any",
796 TestCase {
797 this: Collection::any(),
798 want: "object",
799 },
800 ),
801 (
802 "unknown",
803 TestCase {
804 this: Collection::from_unknown(Kind::null()),
805 want: "object",
806 },
807 ),
808 (
809 "known single",
810 TestCase {
811 this: BTreeMap::from([("foo".into(), Kind::null())]).into(),
812 want: "{ foo: null }",
813 },
814 ),
815 (
816 "known multiple",
817 TestCase {
818 this: BTreeMap::from([
819 ("1".into(), Kind::null()),
820 ("2".into(), Kind::boolean()),
821 ])
822 .into(),
823 want: r#"{ "1": null, "2": boolean }"#,
824 },
825 ),
826 (
827 "known multiple, nested",
828 TestCase {
829 this: BTreeMap::from([
830 ("1".into(), Kind::null()),
831 (
832 "2".into(),
833 Kind::object(BTreeMap::from([("3".into(), Kind::integer())])),
834 ),
835 ])
836 .into(),
837 want: r#"{ "1": null, "2": { "3": integer } }"#,
838 },
839 ),
840 ]) {
841 assert_eq!(this.to_string(), want.to_string(), "{title}");
842 }
843 }
844
845 #[test]
846 fn test_display_index() {
847 struct TestCase {
848 this: Collection<Index>,
849 want: &'static str,
850 }
851
852 for (title, TestCase { this, want }) in HashMap::from([
853 (
854 "any",
855 TestCase {
856 this: Collection::any(),
857 want: "array",
858 },
859 ),
860 (
861 "unknown",
862 TestCase {
863 this: Collection::from_unknown(Kind::null()),
864 want: "array",
865 },
866 ),
867 (
868 "known single",
869 TestCase {
870 this: BTreeMap::from([(0.into(), Kind::null())]).into(),
871 want: "[null]",
872 },
873 ),
874 (
875 "known multiple",
876 TestCase {
877 this: BTreeMap::from([(0.into(), Kind::null()), (1.into(), Kind::boolean())])
878 .into(),
879 want: "[null, boolean]",
880 },
881 ),
882 (
883 "known multiple, nested",
884 TestCase {
885 this: BTreeMap::from([
886 (0.into(), Kind::null()),
887 (
888 1.into(),
889 Kind::object(BTreeMap::from([("0".into(), Kind::integer())])),
890 ),
891 ])
892 .into(),
893 want: r#"[null, { "0": integer }]"#,
894 },
895 ),
896 ]) {
897 assert_eq!(this.to_string(), want.to_string(), "{title}");
898 }
899 }
900
901 #[test]
902 #[allow(clippy::too_many_lines)]
903 fn test_reduced_kind() {
904 struct TestCase {
905 this: Collection<&'static str>,
906 want: Kind,
907 }
908
909 for (title, TestCase { this, want }) in HashMap::from([
910 (
911 "any",
912 TestCase {
913 this: Collection::any(),
914 want: Kind::any().without_undefined(),
915 },
916 ),
917 (
918 "known bytes",
919 TestCase {
920 this: BTreeMap::from([("foo", Kind::bytes())]).into(),
921 want: Kind::bytes(),
922 },
923 ),
924 (
925 "multiple known",
926 TestCase {
927 this: BTreeMap::from([("foo", Kind::bytes()), ("bar", Kind::boolean())]).into(),
928 want: Kind::bytes().or_boolean(),
929 },
930 ),
931 (
932 "known bytes, unknown any",
933 TestCase {
934 this: Collection::from_parts(
935 BTreeMap::from([("foo", Kind::bytes())]),
936 Kind::any(),
937 ),
938 want: Kind::any().without_undefined(),
939 },
940 ),
941 (
942 "known bytes, unknown timestamp",
943 TestCase {
944 this: Collection::from_parts(
945 BTreeMap::from([("foo", Kind::bytes())]),
946 Kind::timestamp(),
947 ),
948 want: Kind::bytes().or_timestamp(),
949 },
950 ),
951 ]) {
952 assert_eq!(this.reduced_kind(), want, "{title}");
953 }
954 }
955}