1use std::collections::{HashMap, HashSet};
2
3use tracing::debug;
4use vector_config_common::schema::{
5 visit::{with_resolved_schema_reference, Visitor},
6 *,
7};
8
9use crate::schema::visitors::merge::Mergeable;
10
11use super::scoped_visit::{
12 visit_schema_object_scoped, SchemaReference, SchemaScopeStack, ScopedVisitor,
13};
14
15#[derive(Debug, Default)]
27pub struct DisallowUnevaluatedPropertiesVisitor {
28 scope_stack: SchemaScopeStack,
29 eligible_to_flatten: HashMap<String, HashSet<SchemaReference>>,
30}
31
32impl DisallowUnevaluatedPropertiesVisitor {
33 pub fn from_settings(_: &SchemaSettings) -> Self {
34 Self {
35 scope_stack: SchemaScopeStack::default(),
36 eligible_to_flatten: HashMap::new(),
37 }
38 }
39}
40
41impl Visitor for DisallowUnevaluatedPropertiesVisitor {
42 fn visit_root_schema(&mut self, root: &mut RootSchema) {
43 let eligible_to_flatten = build_closed_schema_flatten_eligibility_mappings(root);
44
45 debug!(
46 "Found {} referents eligible for flattening: {:?}",
47 eligible_to_flatten.len(),
48 eligible_to_flatten,
49 );
50
51 self.eligible_to_flatten = eligible_to_flatten;
52
53 visit::visit_root_schema(self, root);
54 }
55
56 fn visit_schema_object(
57 &mut self,
58 definitions: &mut Map<String, Schema>,
59 schema: &mut SchemaObject,
60 ) {
61 if let Some(reference) = schema.reference.as_ref() {
73 let current_parent_schema_ref = self.get_current_schema_scope();
74
75 if let Some(referrers) = self.eligible_to_flatten.get(reference) {
76 if referrers.contains(current_parent_schema_ref) {
77 let current_schema_ref = get_cleaned_schema_reference(reference);
78 let referenced_schema = definitions
79 .get(current_schema_ref)
80 .expect("schema definition must exist");
81
82 debug!(
83 referent = current_schema_ref,
84 referrer = current_parent_schema_ref.as_ref(),
85 "Found eligible referent/referrer mapping."
86 );
87
88 if let Schema::Object(referenced_schema) = referenced_schema {
89 debug!(
90 referent = current_schema_ref,
91 referrer = current_parent_schema_ref.as_ref(),
92 "Flattening referent into referrer."
93 );
94
95 schema.reference = None;
96 schema.merge(referenced_schema);
97 }
98 }
99 }
100 }
101
102 visit_schema_object_scoped(self, definitions, schema);
105
106 let mut had_relevant_subschemas = false;
115 if let Some(subschema) = schema.subschemas.as_mut() {
116 let subschemas = get_object_subschemas_from_parent_mut(subschema.as_mut());
117 for subschema in subschemas {
118 had_relevant_subschemas = true;
119
120 unmark_or_flatten_schema(definitions, subschema);
121 }
122 }
123
124 if had_relevant_subschemas || is_object_schema(schema) {
127 mark_schema_closed(schema);
128 }
129 }
130}
131
132impl ScopedVisitor for DisallowUnevaluatedPropertiesVisitor {
133 fn push_schema_scope<S: Into<SchemaReference>>(&mut self, scope: S) {
134 self.scope_stack.push(scope.into());
135 }
136
137 fn pop_schema_scope(&mut self) {
138 self.scope_stack.pop().expect("stack was empty during pop");
139 }
140
141 fn get_current_schema_scope(&self) -> &SchemaReference {
142 self.scope_stack.current().unwrap_or(&SchemaReference::Root)
143 }
144}
145
146fn unmark_or_flatten_schema(definitions: &mut Map<String, Schema>, schema: &mut SchemaObject) {
147 if let Some(object) = schema.object.as_mut() {
155 debug!("Unmarked object subschema directly.");
156
157 object.unevaluated_properties = Some(Box::new(Schema::Bool(true)));
158 } else {
159 with_resolved_schema_reference(definitions, schema, |_, schema_ref, resolved| {
160 if let Schema::Object(resolved) = resolved {
161 if let Some(object) = resolved.object.as_mut() {
162 debug!(
163 referent = schema_ref,
164 "Unmarked subschema by traversing schema reference."
165 );
166
167 object.unevaluated_properties = Some(Box::new(Schema::Bool(true)));
168 }
169 }
170 });
171 }
172}
173
174#[derive(Clone, Debug, Eq, Hash, PartialEq)]
176struct MarkableReferent {
177 would_unmark: bool,
179
180 referent: SchemaReference,
182}
183
184impl MarkableReferent {
185 fn would_unmark<R: Into<SchemaReference>>(referent: R) -> Self {
186 Self {
187 would_unmark: true,
188 referent: referent.into(),
189 }
190 }
191
192 fn would_not_unmark<R: Into<SchemaReference>>(referent: R) -> Self {
193 Self {
194 would_unmark: false,
195 referent: referent.into(),
196 }
197 }
198
199 fn with_new_referent<R: Into<SchemaReference>>(&self, new_referent: R) -> Self {
200 Self {
201 would_unmark: self.would_unmark,
202 referent: new_referent.into(),
203 }
204 }
205}
206
207fn build_closed_schema_flatten_eligibility_mappings(
208 root_schema: &RootSchema,
209) -> HashMap<String, HashSet<SchemaReference>> {
210 let mut parent_to_child = HashMap::new();
230 for (definition_name, definition) in &root_schema.definitions {
231 let parent_schema = match definition {
233 Schema::Bool(_) => continue,
234 Schema::Object(schema) => schema,
235 };
236
237 debug!(
238 "Evaluating schema definition '{}' for markability.",
239 definition_name
240 );
241
242 if !is_markable_schema(&root_schema.definitions, parent_schema) {
246 debug!("Schema definition '{}' not markable.", definition_name);
247 continue;
248 } else {
249 debug!(
250 "Schema definition '{}' markable. Collecting referents.",
251 definition_name
252 );
253 }
254
255 let mut referents = HashSet::new();
259 get_referents(parent_schema, &mut referents);
260
261 debug!(
262 "Collected {} referents for '{}': {:?}",
263 referents.len(),
264 definition_name,
265 referents
266 );
267
268 parent_to_child.insert(SchemaReference::from(definition_name), referents);
270 }
271
272 let mut root_referents = HashSet::new();
274 get_referents(&root_schema.schema, &mut root_referents);
275 parent_to_child.insert(SchemaReference::Root, root_referents);
276
277 let mut child_to_parent = HashMap::new();
281 for (parent_schema_ref, child_referents) in parent_to_child {
282 for child_referent in child_referents {
283 let entry = child_to_parent
284 .entry(child_referent.referent.as_ref().to_string())
285 .or_insert_with(HashSet::new);
286
287 entry.insert(child_referent.with_new_referent(parent_schema_ref.clone()));
291 }
292 }
293
294 let mut eligible_to_flatten = HashMap::new();
295 for (child_schema_ref, referrers) in child_to_parent {
296 if referrers.len() < 2 {
298 continue;
299 }
300
301 let would_unmark = referrers
302 .iter()
303 .filter(|r| r.would_unmark)
304 .map(|r| r.referent.clone())
305 .collect::<HashSet<_>>();
306 let would_not_unmark = referrers
307 .iter()
308 .filter(|r| !r.would_unmark)
309 .map(|r| r.referent.clone())
310 .collect::<HashSet<_>>();
311
312 if would_not_unmark.len() >= would_unmark.len() {
313 eligible_to_flatten.insert(child_schema_ref.to_string(), would_unmark);
314 } else {
315 eligible_to_flatten.insert(child_schema_ref.to_string(), would_not_unmark);
316 }
317 }
318
319 eligible_to_flatten
320}
321
322fn is_markable_schema(definitions: &Map<String, Schema>, schema: &SchemaObject) -> bool {
324 let has_additional_properties = schema
328 .object
329 .as_ref()
330 .and_then(|object| object.additional_properties.as_ref())
331 .map(|schema| matches!(schema.as_ref(), Schema::Object(_)))
332 .unwrap_or(false);
333
334 if is_object_schema(schema) && !has_additional_properties {
335 return true;
336 }
337
338 if let Some(subschema) = schema.subschemas.as_ref() {
346 let subschemas = get_object_subschemas_from_parent(subschema).collect::<Vec<_>>();
347
348 debug!("{} subschemas detected.", subschemas.len());
349
350 let has_object_subschema = subschemas
351 .iter()
352 .any(|schema| is_markable_schema(definitions, schema));
353 let has_referenced_object_subschema = subschemas.iter().any(|subschema| {
354 subschema
355 .reference
356 .as_ref()
357 .and_then(|reference| {
358 let reference = get_cleaned_schema_reference(reference);
359 definitions.get_key_value(reference)
360 })
361 .and_then(|(name, schema)| schema.as_object().map(|schema| (name, schema)))
362 .is_some_and(|(name, schema)| {
363 debug!(
364 "Following schema reference '{}' for subschema markability.",
365 name
366 );
367 is_markable_schema(definitions, schema)
368 })
369 });
370
371 debug!(
372 "Schema {} object subschema(s) and {} referenced subschemas.",
373 if has_object_subschema {
374 "has"
375 } else {
376 "does not have"
377 },
378 if has_referenced_object_subschema {
379 "has"
380 } else {
381 "does not have"
382 },
383 );
384
385 if has_object_subschema || has_referenced_object_subschema {
386 return true;
387 }
388 }
389
390 false
391}
392
393fn get_referents(parent_schema: &SchemaObject, referents: &mut HashSet<MarkableReferent>) {
409 if let Some(parent_object) = parent_schema.object.as_ref() {
410 for (_, property_schema) in parent_object
413 .properties
414 .iter()
415 .chain(parent_object.pattern_properties.iter())
416 {
417 if let Some(child_schema) = property_schema.as_object() {
418 if let Some(child_schema_ref) = child_schema.reference.as_ref() {
419 referents.insert(MarkableReferent::would_not_unmark(child_schema_ref));
420 } else {
421 get_referents(child_schema, referents);
422 }
423 }
424 }
425
426 if let Some(additional_properties) = parent_object.additional_properties.as_ref() {
429 if let Some(child_schema) = additional_properties.as_ref().as_object() {
430 if let Some(child_schema_ref) = child_schema.reference.as_ref() {
431 referents.insert(MarkableReferent::would_not_unmark(child_schema_ref));
432 } else {
433 get_referents(child_schema, referents);
434 }
435 }
436 }
437 }
438
439 if let Some(subschema) = parent_schema.subschemas.as_ref() {
440 for subschema in get_object_subschemas_from_parent(subschema) {
443 if let Some(child_schema_ref) = subschema.reference.as_ref() {
444 referents.insert(MarkableReferent::would_unmark(child_schema_ref));
445 } else {
446 get_referents(subschema, referents);
447 }
448 }
449 }
450}
451
452fn get_object_subschemas_from_parent(
453 subschema: &SubschemaValidation,
454) -> impl Iterator<Item = &SchemaObject> {
455 [
456 subschema.all_of.as_ref(),
457 subschema.one_of.as_ref(),
458 subschema.any_of.as_ref(),
459 ]
460 .into_iter()
461 .flatten()
462 .flatten()
463 .filter_map(Schema::as_object)
464}
465
466fn get_object_subschemas_from_parent_mut(
467 subschema: &mut SubschemaValidation,
468) -> impl Iterator<Item = &mut SchemaObject> {
469 [
470 subschema.all_of.as_mut(),
471 subschema.one_of.as_mut(),
472 subschema.any_of.as_mut(),
473 ]
474 .into_iter()
475 .flatten()
476 .flatten()
477 .filter_map(Schema::as_object_mut)
478}
479
480fn mark_schema_closed(schema: &mut SchemaObject) {
481 if let Some(Schema::Object(_)) = schema
492 .object()
493 .additional_properties
494 .as_ref()
495 .map(|v| v.as_ref())
496 {
497 return;
498 }
499
500 if schema
504 .object
505 .as_ref()
506 .and_then(|object| object.unevaluated_properties.as_ref())
507 .is_some()
508 {
509 return;
510 }
511
512 schema.object().unevaluated_properties = Some(Box::new(Schema::Bool(false)));
513}
514
515fn schema_type_matches(
516 schema: &SchemaObject,
517 instance_type: InstanceType,
518 allow_multiple: bool,
519) -> bool {
520 match schema.instance_type.as_ref() {
521 Some(sov) => match sov {
522 SingleOrVec::Single(inner) => inner.as_ref() == &instance_type,
523 SingleOrVec::Vec(inner) => inner.contains(&instance_type) && allow_multiple,
524 },
525 None => false,
526 }
527}
528
529fn is_object_schema(schema: &SchemaObject) -> bool {
530 schema_type_matches(schema, InstanceType::Object, true)
531}
532
533#[cfg(test)]
534mod tests {
535 use serde_json::json;
536 use vector_config_common::schema::visit::Visitor;
537
538 use crate::schema::visitors::test::{as_schema, assert_schemas_eq};
539
540 use super::DisallowUnevaluatedPropertiesVisitor;
541
542 #[test]
543 fn basic_object_schema() {
544 let mut actual_schema = as_schema(json!({
545 "type": "object",
546 "properties": {
547 "a": { "type": "string" }
548 }
549 }));
550
551 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
552 visitor.visit_root_schema(&mut actual_schema);
553
554 let expected_schema = as_schema(json!({
555 "type": "object",
556 "properties": {
557 "a": { "type": "string" }
558 },
559 "unevaluatedProperties": false
560 }));
561
562 assert_schemas_eq(expected_schema, actual_schema);
563 }
564
565 #[test]
566 fn basic_object_schema_through_ref() {
567 let mut actual_schema = as_schema(json!({
568 "$ref": "#/definitions/simple",
569 "definitions": {
570 "simple": {
571 "type": "object",
572 "properties": {
573 "a": { "type": "string" }
574 }
575 }
576 }
577 }));
578
579 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
580 visitor.visit_root_schema(&mut actual_schema);
581
582 let expected_schema = as_schema(json!({
583 "$ref": "#/definitions/simple",
584 "definitions": {
585 "simple": {
586 "type": "object",
587 "properties": {
588 "a": { "type": "string" }
589 },
590 "unevaluatedProperties": false
591 }
592 }
593 }));
594
595 assert_schemas_eq(expected_schema, actual_schema);
596 }
597
598 #[test]
599 fn all_of_with_basic_object_schemas() {
600 let mut actual_schema = as_schema(json!({
601 "type": "object",
602 "allOf": [{
603 "type": "object",
604 "properties": {
605 "a": { "type": "string" }
606 }
607 },
608 {
609 "type": "object",
610 "properties": {
611 "b": { "type": "string" }
612 }
613 }]
614 }));
615
616 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
617 visitor.visit_root_schema(&mut actual_schema);
618
619 let expected_schema = as_schema(json!({
620 "type": "object",
621 "allOf": [{
622 "type": "object",
623 "properties": {
624 "a": { "type": "string" }
625 }
626 },
627 {
628 "type": "object",
629 "properties": {
630 "b": { "type": "string" }
631 }
632 }],
633 "unevaluatedProperties": false
634 }));
635
636 assert_schemas_eq(expected_schema, actual_schema);
637 }
638
639 #[test]
640 fn one_of_with_basic_object_schemas() {
641 let mut actual_schema = as_schema(json!({
642 "type": "object",
643 "oneOf": [{
644 "type": "object",
645 "properties": {
646 "a": { "type": "string" }
647 }
648 },
649 {
650 "type": "object",
651 "properties": {
652 "b": { "type": "string" }
653 }
654 }]
655 }));
656
657 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
658 visitor.visit_root_schema(&mut actual_schema);
659
660 let expected_schema = as_schema(json!({
661 "type": "object",
662 "oneOf": [{
663 "type": "object",
664 "properties": {
665 "a": { "type": "string" }
666 }
667 },
668 {
669 "type": "object",
670 "properties": {
671 "b": { "type": "string" }
672 }
673 }],
674 "unevaluatedProperties": false
675 }));
676
677 assert_schemas_eq(expected_schema, actual_schema);
678 }
679
680 #[test]
681 fn any_of_with_basic_object_schemas() {
682 let mut actual_schema = as_schema(json!({
683 "type": "object",
684 "anyOf": [{
685 "type": "object",
686 "properties": {
687 "a": { "type": "string" }
688 }
689 },
690 {
691 "type": "object",
692 "properties": {
693 "b": { "type": "string" }
694 }
695 }]
696 }));
697
698 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
699 visitor.visit_root_schema(&mut actual_schema);
700
701 let expected_schema = as_schema(json!({
702 "type": "object",
703 "anyOf": [{
704 "type": "object",
705 "properties": {
706 "a": { "type": "string" }
707 }
708 },
709 {
710 "type": "object",
711 "properties": {
712 "b": { "type": "string" }
713 }
714 }],
715 "unevaluatedProperties": false
716 }));
717
718 assert_schemas_eq(expected_schema, actual_schema);
719 }
720
721 #[test]
722 fn ignores_object_schema_with_non_boolean_additional_properties() {
723 let mut actual_schema = as_schema(json!({
724 "type": "object",
725 "properties": {
726 "a": { "type": "string" }
727 },
728 "additionalProperties": { "type": "number" }
729 }));
730 let expected_schema = actual_schema.clone();
731
732 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
733 visitor.visit_root_schema(&mut actual_schema);
734
735 assert_schemas_eq(expected_schema, actual_schema);
736 }
737
738 #[test]
739 fn object_schema_with_boolean_additional_properties() {
740 let mut actual_schema = as_schema(json!({
741 "type": "object",
742 "properties": {
743 "a": { "type": "string" }
744 },
745 "additionalProperties": false
746 }));
747
748 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
749 visitor.visit_root_schema(&mut actual_schema);
750
751 let expected_schema = as_schema(json!({
752 "type": "object",
753 "properties": {
754 "a": { "type": "string" }
755 },
756 "additionalProperties": false,
757 "unevaluatedProperties": false
758 }));
759
760 assert_schemas_eq(expected_schema, actual_schema);
761 }
762
763 #[test]
764 fn all_of_with_object_props_using_schema_refs() {
765 let mut actual_schema = as_schema(json!({
766 "type": "object",
767 "allOf": [{
768 "type": "object",
769 "properties": {
770 "a": { "$ref": "#/definitions/subschema" }
771 }
772 },
773 {
774 "type": "object",
775 "properties": {
776 "aa": {
777 "type": "object",
778 "properties": {
779 "a": { "$ref": "#/definitions/subschema" }
780 }
781 }
782 }
783 }],
784 "definitions": {
785 "subschema": {
786 "type": "object",
787 "properties": {
788 "f": { "type": "string" }
789 }
790 }
791 }
792 }));
793
794 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
795 visitor.visit_root_schema(&mut actual_schema);
796
797 let expected_schema = as_schema(json!({
798 "type": "object",
799 "allOf": [{
800 "type": "object",
801 "properties": {
802 "a": { "$ref": "#/definitions/subschema" }
803 }
804 },
805 {
806 "type": "object",
807 "properties": {
808 "aa": {
809 "type": "object",
810 "properties": {
811 "a": { "$ref": "#/definitions/subschema" }
812 },
813 "unevaluatedProperties": false
814 }
815 }
816 }],
817 "definitions": {
818 "subschema": {
819 "type": "object",
820 "properties": {
821 "f": { "type": "string" }
822 },
823 "unevaluatedProperties": false
824 }
825 },
826 "unevaluatedProperties": false
827 }));
828
829 assert_schemas_eq(expected_schema, actual_schema);
830 }
831
832 #[test]
833 fn conflicting_schema_usages_get_duplicated_and_flattened() {
834 let mut actual_schema = as_schema(json!({
835 "type": "object",
836 "properties": {
837 "acks": { "$ref": "#/definitions/acks" },
838 "custom_acks": { "$ref": "#/definitions/custom_acks" }
839 },
840 "definitions": {
841 "custom_acks": {
842 "allOf": [{ "type": "object", "properties": { "ack_count": { "type": "number" } } },
843 { "$ref": "#/definitions/acks" }]
844 },
845 "acks": { "type": "object", "properties": { "enabled": { "type": "boolean" } } }
846 }
847 }));
848
849 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
850 visitor.visit_root_schema(&mut actual_schema);
851
852 let expected_schema = as_schema(json!({
853 "type": "object",
854 "properties": {
855 "acks": { "$ref": "#/definitions/acks" },
856 "custom_acks": { "$ref": "#/definitions/custom_acks" }
857 },
858 "definitions": {
859 "custom_acks": {
860 "allOf": [
861 { "type": "object", "properties": { "ack_count": { "type": "number" } } },
862 { "type": "object", "properties": { "enabled": { "type": "boolean" } } }
863 ],
864 "unevaluatedProperties": false
865 },
866 "acks": {
867 "type": "object",
868 "properties": { "enabled": { "type": "boolean" } },
869 "unevaluatedProperties": false
870 }
871 },
872 "unevaluatedProperties": false
873 }));
874
875 assert_schemas_eq(expected_schema, actual_schema);
876 }
877
878 #[test]
879 fn multiple_mark_unmark_references_flattened_efficiently() {
880 let mut actual_schema = as_schema(json!({
885 "type": "object",
886 "properties": {
887 "a": { "$ref": "#/definitions/a" },
888 "b": { "$ref": "#/definitions/b" },
889 "c": { "$ref": "#/definitions/c" },
890 "one": { "$ref": "#/definitions/one" },
891 "two": { "$ref": "#/definitions/two" }
892 },
893 "definitions": {
894 "one": {
895 "allOf": [{ "$ref": "#/definitions/c" }]
896 },
897 "two": {
898 "allOf": [{ "$ref": "#/definitions/b" }, { "$ref": "#/definitions/c" }]
899 },
900 "a": {
901 "type": "object",
902 "properties": { "a": { "type": "boolean" } }
903 },
904 "b": {
905 "type": "object",
906 "properties": { "b": { "type": "boolean" } }
907 },
908 "c": {
909 "type": "object",
910 "properties": { "c": { "type": "boolean" } }
911 }
912 }
913 }));
914
915 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
916 visitor.visit_root_schema(&mut actual_schema);
917
918 let expected_schema = as_schema(json!({
929 "type": "object",
930 "properties": {
931 "a": { "$ref": "#/definitions/a" },
932 "b": { "$ref": "#/definitions/b" },
933 "c": {
934 "type": "object",
935 "properties": { "c": { "type": "boolean" } },
936 "unevaluatedProperties": false
937 },
938 "one": { "$ref": "#/definitions/one" },
939 "two": { "$ref": "#/definitions/two" }
940 },
941 "definitions": {
942 "one": {
943 "allOf": [{ "$ref": "#/definitions/c" }],
944 "unevaluatedProperties": false
945 },
946 "two": {
947 "allOf": [
948 {
949 "type": "object",
950 "properties": { "b": { "type": "boolean" } }
951 },
952 { "$ref": "#/definitions/c" }
953 ],
954 "unevaluatedProperties": false
955 },
956 "a": {
957 "type": "object",
958 "properties": { "a": { "type": "boolean" } },
959 "unevaluatedProperties": false
960 },
961 "b": {
962 "type": "object",
963 "properties": { "b": { "type": "boolean" } },
964 "unevaluatedProperties": false
965 },
966 "c": {
967 "type": "object",
968 "properties": { "c": { "type": "boolean" } }
969 }
970 },
971 "unevaluatedProperties": false
972 }));
973
974 assert_schemas_eq(expected_schema, actual_schema);
975 }
976}