1use std::collections::{HashMap, HashSet};
2
3use tracing::debug;
4use vector_config_common::schema::{
5 visit::{Visitor, with_resolved_schema_reference},
6 *,
7};
8
9use super::scoped_visit::{
10 SchemaReference, SchemaScopeStack, ScopedVisitor, visit_schema_object_scoped,
11};
12use crate::schema::visitors::merge::Mergeable;
13
14#[derive(Debug, Default)]
26pub struct DisallowUnevaluatedPropertiesVisitor {
27 scope_stack: SchemaScopeStack,
28 eligible_to_flatten: HashMap<String, HashSet<SchemaReference>>,
29}
30
31impl DisallowUnevaluatedPropertiesVisitor {
32 pub fn from_settings(_: &SchemaSettings) -> Self {
33 Self {
34 scope_stack: SchemaScopeStack::default(),
35 eligible_to_flatten: HashMap::new(),
36 }
37 }
38}
39
40impl Visitor for DisallowUnevaluatedPropertiesVisitor {
41 fn visit_root_schema(&mut self, root: &mut RootSchema) {
42 let eligible_to_flatten = build_closed_schema_flatten_eligibility_mappings(root);
43
44 debug!(
45 "Found {} referents eligible for flattening: {:?}",
46 eligible_to_flatten.len(),
47 eligible_to_flatten,
48 );
49
50 self.eligible_to_flatten = eligible_to_flatten;
51
52 visit::visit_root_schema(self, root);
53 }
54
55 fn visit_schema_object(
56 &mut self,
57 definitions: &mut Map<String, Schema>,
58 schema: &mut SchemaObject,
59 ) {
60 if let Some(reference) = schema.reference.as_ref() {
72 let current_parent_schema_ref = self.get_current_schema_scope();
73
74 if let Some(referrers) = self.eligible_to_flatten.get(reference)
75 && referrers.contains(current_parent_schema_ref)
76 {
77 let current_schema_ref = get_cleaned_schema_reference(reference);
78 let referenced_schema = definitions
79 .get(current_schema_ref)
80 .expect("schema definition must exist");
81
82 debug!(
83 referent = current_schema_ref,
84 referrer = current_parent_schema_ref.as_ref(),
85 "Found eligible referent/referrer mapping."
86 );
87
88 if let Schema::Object(referenced_schema) = referenced_schema {
89 debug!(
90 referent = current_schema_ref,
91 referrer = current_parent_schema_ref.as_ref(),
92 "Flattening referent into referrer."
93 );
94
95 schema.reference = None;
96 schema.merge(referenced_schema);
97 }
98 }
99 }
100
101 visit_schema_object_scoped(self, definitions, schema);
104
105 let mut had_relevant_subschemas = false;
114 if let Some(subschema) = schema.subschemas.as_mut() {
115 let subschemas = get_object_subschemas_from_parent_mut(subschema.as_mut());
116 for subschema in subschemas {
117 had_relevant_subschemas = true;
118
119 unmark_or_flatten_schema(definitions, subschema);
120 }
121 }
122
123 if had_relevant_subschemas || is_object_schema(schema) {
126 mark_schema_closed(schema);
127 }
128 }
129}
130
131impl ScopedVisitor for DisallowUnevaluatedPropertiesVisitor {
132 fn push_schema_scope<S: Into<SchemaReference>>(&mut self, scope: S) {
133 self.scope_stack.push(scope.into());
134 }
135
136 fn pop_schema_scope(&mut self) {
137 self.scope_stack.pop().expect("stack was empty during pop");
138 }
139
140 fn get_current_schema_scope(&self) -> &SchemaReference {
141 self.scope_stack.current().unwrap_or(&SchemaReference::Root)
142 }
143}
144
145fn unmark_or_flatten_schema(definitions: &mut Map<String, Schema>, schema: &mut SchemaObject) {
146 if let Some(object) = schema.object.as_mut() {
154 debug!("Unmarked object subschema directly.");
155
156 object.unevaluated_properties = Some(Box::new(Schema::Bool(true)));
157 } else {
158 with_resolved_schema_reference(definitions, schema, |_, schema_ref, resolved| {
159 if let Schema::Object(resolved) = resolved
160 && let Some(object) = resolved.object.as_mut()
161 {
162 debug!(
163 referent = schema_ref,
164 "Unmarked subschema by traversing schema reference."
165 );
166
167 object.unevaluated_properties = Some(Box::new(Schema::Bool(true)));
168 }
169 });
170 }
171}
172
173#[derive(Clone, Debug, Eq, Hash, PartialEq)]
175struct MarkableReferent {
176 would_unmark: bool,
178
179 referent: SchemaReference,
181}
182
183impl MarkableReferent {
184 fn would_unmark<R: Into<SchemaReference>>(referent: R) -> Self {
185 Self {
186 would_unmark: true,
187 referent: referent.into(),
188 }
189 }
190
191 fn would_not_unmark<R: Into<SchemaReference>>(referent: R) -> Self {
192 Self {
193 would_unmark: false,
194 referent: referent.into(),
195 }
196 }
197
198 fn with_new_referent<R: Into<SchemaReference>>(&self, new_referent: R) -> Self {
199 Self {
200 would_unmark: self.would_unmark,
201 referent: new_referent.into(),
202 }
203 }
204}
205
206fn build_closed_schema_flatten_eligibility_mappings(
207 root_schema: &RootSchema,
208) -> HashMap<String, HashSet<SchemaReference>> {
209 let mut parent_to_child = HashMap::new();
229 for (definition_name, definition) in &root_schema.definitions {
230 let parent_schema = match definition {
232 Schema::Bool(_) => continue,
233 Schema::Object(schema) => schema,
234 };
235
236 debug!(
237 "Evaluating schema definition '{}' for markability.",
238 definition_name
239 );
240
241 if !is_markable_schema(&root_schema.definitions, parent_schema) {
245 debug!("Schema definition '{}' not markable.", definition_name);
246 continue;
247 } else {
248 debug!(
249 "Schema definition '{}' markable. Collecting referents.",
250 definition_name
251 );
252 }
253
254 let mut referents = HashSet::new();
258 get_referents(parent_schema, &mut referents);
259
260 debug!(
261 "Collected {} referents for '{}': {:?}",
262 referents.len(),
263 definition_name,
264 referents
265 );
266
267 parent_to_child.insert(SchemaReference::from(definition_name), referents);
269 }
270
271 let mut root_referents = HashSet::new();
273 get_referents(&root_schema.schema, &mut root_referents);
274 parent_to_child.insert(SchemaReference::Root, root_referents);
275
276 let mut child_to_parent = HashMap::new();
280 for (parent_schema_ref, child_referents) in parent_to_child {
281 for child_referent in child_referents {
282 let entry = child_to_parent
283 .entry(child_referent.referent.as_ref().to_string())
284 .or_insert_with(HashSet::new);
285
286 entry.insert(child_referent.with_new_referent(parent_schema_ref.clone()));
290 }
291 }
292
293 let mut eligible_to_flatten = HashMap::new();
294 for (child_schema_ref, referrers) in child_to_parent {
295 if referrers.len() < 2 {
297 continue;
298 }
299
300 let would_unmark = referrers
301 .iter()
302 .filter(|r| r.would_unmark)
303 .map(|r| r.referent.clone())
304 .collect::<HashSet<_>>();
305 let would_not_unmark = referrers
306 .iter()
307 .filter(|r| !r.would_unmark)
308 .map(|r| r.referent.clone())
309 .collect::<HashSet<_>>();
310
311 if would_not_unmark.len() >= would_unmark.len() {
312 eligible_to_flatten.insert(child_schema_ref.to_string(), would_unmark);
313 } else {
314 eligible_to_flatten.insert(child_schema_ref.to_string(), would_not_unmark);
315 }
316 }
317
318 eligible_to_flatten
319}
320
321fn is_markable_schema(definitions: &Map<String, Schema>, schema: &SchemaObject) -> bool {
323 let has_additional_properties = schema
327 .object
328 .as_ref()
329 .and_then(|object| object.additional_properties.as_ref())
330 .map(|schema| matches!(schema.as_ref(), Schema::Object(_)))
331 .unwrap_or(false);
332
333 if is_object_schema(schema) && !has_additional_properties {
334 return true;
335 }
336
337 if let Some(subschema) = schema.subschemas.as_ref() {
345 let subschemas = get_object_subschemas_from_parent(subschema).collect::<Vec<_>>();
346
347 debug!("{} subschemas detected.", subschemas.len());
348
349 let has_object_subschema = subschemas
350 .iter()
351 .any(|schema| is_markable_schema(definitions, schema));
352 let has_referenced_object_subschema = subschemas.iter().any(|subschema| {
353 subschema
354 .reference
355 .as_ref()
356 .and_then(|reference| {
357 let reference = get_cleaned_schema_reference(reference);
358 definitions.get_key_value(reference)
359 })
360 .and_then(|(name, schema)| schema.as_object().map(|schema| (name, schema)))
361 .is_some_and(|(name, schema)| {
362 debug!(
363 "Following schema reference '{}' for subschema markability.",
364 name
365 );
366 is_markable_schema(definitions, schema)
367 })
368 });
369
370 debug!(
371 "Schema {} object subschema(s) and {} referenced subschemas.",
372 if has_object_subschema {
373 "has"
374 } else {
375 "does not have"
376 },
377 if has_referenced_object_subschema {
378 "has"
379 } else {
380 "does not have"
381 },
382 );
383
384 if has_object_subschema || has_referenced_object_subschema {
385 return true;
386 }
387 }
388
389 false
390}
391
392fn get_referents(parent_schema: &SchemaObject, referents: &mut HashSet<MarkableReferent>) {
408 if let Some(parent_object) = parent_schema.object.as_ref() {
409 for (_, property_schema) in parent_object
412 .properties
413 .iter()
414 .chain(parent_object.pattern_properties.iter())
415 {
416 if let Some(child_schema) = property_schema.as_object() {
417 if let Some(child_schema_ref) = child_schema.reference.as_ref() {
418 referents.insert(MarkableReferent::would_not_unmark(child_schema_ref));
419 } else {
420 get_referents(child_schema, referents);
421 }
422 }
423 }
424
425 if let Some(additional_properties) = parent_object.additional_properties.as_ref()
428 && let Some(child_schema) = additional_properties.as_ref().as_object()
429 {
430 if let Some(child_schema_ref) = child_schema.reference.as_ref() {
431 referents.insert(MarkableReferent::would_not_unmark(child_schema_ref));
432 } else {
433 get_referents(child_schema, referents);
434 }
435 }
436 }
437
438 if let Some(subschema) = parent_schema.subschemas.as_ref() {
439 for subschema in get_object_subschemas_from_parent(subschema) {
442 if let Some(child_schema_ref) = subschema.reference.as_ref() {
443 referents.insert(MarkableReferent::would_unmark(child_schema_ref));
444 } else {
445 get_referents(subschema, referents);
446 }
447 }
448 }
449}
450
451fn get_object_subschemas_from_parent(
452 subschema: &SubschemaValidation,
453) -> impl Iterator<Item = &SchemaObject> {
454 [
455 subschema.all_of.as_ref(),
456 subschema.one_of.as_ref(),
457 subschema.any_of.as_ref(),
458 ]
459 .into_iter()
460 .flatten()
461 .flatten()
462 .filter_map(Schema::as_object)
463}
464
465fn get_object_subschemas_from_parent_mut(
466 subschema: &mut SubschemaValidation,
467) -> impl Iterator<Item = &mut SchemaObject> {
468 [
469 subschema.all_of.as_mut(),
470 subschema.one_of.as_mut(),
471 subschema.any_of.as_mut(),
472 ]
473 .into_iter()
474 .flatten()
475 .flatten()
476 .filter_map(Schema::as_object_mut)
477}
478
479fn mark_schema_closed(schema: &mut SchemaObject) {
480 if let Some(Schema::Object(_)) = schema
491 .object()
492 .additional_properties
493 .as_ref()
494 .map(|v| v.as_ref())
495 {
496 return;
497 }
498
499 if schema
503 .object
504 .as_ref()
505 .and_then(|object| object.unevaluated_properties.as_ref())
506 .is_some()
507 {
508 return;
509 }
510
511 schema.object().unevaluated_properties = Some(Box::new(Schema::Bool(false)));
512}
513
514fn schema_type_matches(
515 schema: &SchemaObject,
516 instance_type: InstanceType,
517 allow_multiple: bool,
518) -> bool {
519 match schema.instance_type.as_ref() {
520 Some(sov) => match sov {
521 SingleOrVec::Single(inner) => inner.as_ref() == &instance_type,
522 SingleOrVec::Vec(inner) => inner.contains(&instance_type) && allow_multiple,
523 },
524 None => false,
525 }
526}
527
528fn is_object_schema(schema: &SchemaObject) -> bool {
529 schema_type_matches(schema, InstanceType::Object, true)
530}
531
532#[cfg(test)]
533mod tests {
534 use serde_json::json;
535 use vector_config_common::schema::visit::Visitor;
536
537 use super::DisallowUnevaluatedPropertiesVisitor;
538 use crate::schema::visitors::test::{as_schema, assert_schemas_eq};
539
540 #[test]
541 fn basic_object_schema() {
542 let mut actual_schema = as_schema(json!({
543 "type": "object",
544 "properties": {
545 "a": { "type": "string" }
546 }
547 }));
548
549 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
550 visitor.visit_root_schema(&mut actual_schema);
551
552 let expected_schema = as_schema(json!({
553 "type": "object",
554 "properties": {
555 "a": { "type": "string" }
556 },
557 "unevaluatedProperties": false
558 }));
559
560 assert_schemas_eq(expected_schema, actual_schema);
561 }
562
563 #[test]
564 fn basic_object_schema_through_ref() {
565 let mut actual_schema = as_schema(json!({
566 "$ref": "#/definitions/simple",
567 "definitions": {
568 "simple": {
569 "type": "object",
570 "properties": {
571 "a": { "type": "string" }
572 }
573 }
574 }
575 }));
576
577 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
578 visitor.visit_root_schema(&mut actual_schema);
579
580 let expected_schema = as_schema(json!({
581 "$ref": "#/definitions/simple",
582 "definitions": {
583 "simple": {
584 "type": "object",
585 "properties": {
586 "a": { "type": "string" }
587 },
588 "unevaluatedProperties": false
589 }
590 }
591 }));
592
593 assert_schemas_eq(expected_schema, actual_schema);
594 }
595
596 #[test]
597 fn all_of_with_basic_object_schemas() {
598 let mut actual_schema = as_schema(json!({
599 "type": "object",
600 "allOf": [{
601 "type": "object",
602 "properties": {
603 "a": { "type": "string" }
604 }
605 },
606 {
607 "type": "object",
608 "properties": {
609 "b": { "type": "string" }
610 }
611 }]
612 }));
613
614 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
615 visitor.visit_root_schema(&mut actual_schema);
616
617 let expected_schema = as_schema(json!({
618 "type": "object",
619 "allOf": [{
620 "type": "object",
621 "properties": {
622 "a": { "type": "string" }
623 }
624 },
625 {
626 "type": "object",
627 "properties": {
628 "b": { "type": "string" }
629 }
630 }],
631 "unevaluatedProperties": false
632 }));
633
634 assert_schemas_eq(expected_schema, actual_schema);
635 }
636
637 #[test]
638 fn one_of_with_basic_object_schemas() {
639 let mut actual_schema = as_schema(json!({
640 "type": "object",
641 "oneOf": [{
642 "type": "object",
643 "properties": {
644 "a": { "type": "string" }
645 }
646 },
647 {
648 "type": "object",
649 "properties": {
650 "b": { "type": "string" }
651 }
652 }]
653 }));
654
655 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
656 visitor.visit_root_schema(&mut actual_schema);
657
658 let expected_schema = as_schema(json!({
659 "type": "object",
660 "oneOf": [{
661 "type": "object",
662 "properties": {
663 "a": { "type": "string" }
664 }
665 },
666 {
667 "type": "object",
668 "properties": {
669 "b": { "type": "string" }
670 }
671 }],
672 "unevaluatedProperties": false
673 }));
674
675 assert_schemas_eq(expected_schema, actual_schema);
676 }
677
678 #[test]
679 fn any_of_with_basic_object_schemas() {
680 let mut actual_schema = as_schema(json!({
681 "type": "object",
682 "anyOf": [{
683 "type": "object",
684 "properties": {
685 "a": { "type": "string" }
686 }
687 },
688 {
689 "type": "object",
690 "properties": {
691 "b": { "type": "string" }
692 }
693 }]
694 }));
695
696 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
697 visitor.visit_root_schema(&mut actual_schema);
698
699 let expected_schema = as_schema(json!({
700 "type": "object",
701 "anyOf": [{
702 "type": "object",
703 "properties": {
704 "a": { "type": "string" }
705 }
706 },
707 {
708 "type": "object",
709 "properties": {
710 "b": { "type": "string" }
711 }
712 }],
713 "unevaluatedProperties": false
714 }));
715
716 assert_schemas_eq(expected_schema, actual_schema);
717 }
718
719 #[test]
720 fn ignores_object_schema_with_non_boolean_additional_properties() {
721 let mut actual_schema = as_schema(json!({
722 "type": "object",
723 "properties": {
724 "a": { "type": "string" }
725 },
726 "additionalProperties": { "type": "number" }
727 }));
728 let expected_schema = actual_schema.clone();
729
730 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
731 visitor.visit_root_schema(&mut actual_schema);
732
733 assert_schemas_eq(expected_schema, actual_schema);
734 }
735
736 #[test]
737 fn object_schema_with_boolean_additional_properties() {
738 let mut actual_schema = as_schema(json!({
739 "type": "object",
740 "properties": {
741 "a": { "type": "string" }
742 },
743 "additionalProperties": false
744 }));
745
746 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
747 visitor.visit_root_schema(&mut actual_schema);
748
749 let expected_schema = as_schema(json!({
750 "type": "object",
751 "properties": {
752 "a": { "type": "string" }
753 },
754 "additionalProperties": false,
755 "unevaluatedProperties": false
756 }));
757
758 assert_schemas_eq(expected_schema, actual_schema);
759 }
760
761 #[test]
762 fn all_of_with_object_props_using_schema_refs() {
763 let mut actual_schema = as_schema(json!({
764 "type": "object",
765 "allOf": [{
766 "type": "object",
767 "properties": {
768 "a": { "$ref": "#/definitions/subschema" }
769 }
770 },
771 {
772 "type": "object",
773 "properties": {
774 "aa": {
775 "type": "object",
776 "properties": {
777 "a": { "$ref": "#/definitions/subschema" }
778 }
779 }
780 }
781 }],
782 "definitions": {
783 "subschema": {
784 "type": "object",
785 "properties": {
786 "f": { "type": "string" }
787 }
788 }
789 }
790 }));
791
792 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
793 visitor.visit_root_schema(&mut actual_schema);
794
795 let expected_schema = as_schema(json!({
796 "type": "object",
797 "allOf": [{
798 "type": "object",
799 "properties": {
800 "a": { "$ref": "#/definitions/subschema" }
801 }
802 },
803 {
804 "type": "object",
805 "properties": {
806 "aa": {
807 "type": "object",
808 "properties": {
809 "a": { "$ref": "#/definitions/subschema" }
810 },
811 "unevaluatedProperties": false
812 }
813 }
814 }],
815 "definitions": {
816 "subschema": {
817 "type": "object",
818 "properties": {
819 "f": { "type": "string" }
820 },
821 "unevaluatedProperties": false
822 }
823 },
824 "unevaluatedProperties": false
825 }));
826
827 assert_schemas_eq(expected_schema, actual_schema);
828 }
829
830 #[test]
831 fn conflicting_schema_usages_get_duplicated_and_flattened() {
832 let mut actual_schema = as_schema(json!({
833 "type": "object",
834 "properties": {
835 "acks": { "$ref": "#/definitions/acks" },
836 "custom_acks": { "$ref": "#/definitions/custom_acks" }
837 },
838 "definitions": {
839 "custom_acks": {
840 "allOf": [{ "type": "object", "properties": { "ack_count": { "type": "number" } } },
841 { "$ref": "#/definitions/acks" }]
842 },
843 "acks": { "type": "object", "properties": { "enabled": { "type": "boolean" } } }
844 }
845 }));
846
847 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
848 visitor.visit_root_schema(&mut actual_schema);
849
850 let expected_schema = as_schema(json!({
851 "type": "object",
852 "properties": {
853 "acks": { "$ref": "#/definitions/acks" },
854 "custom_acks": { "$ref": "#/definitions/custom_acks" }
855 },
856 "definitions": {
857 "custom_acks": {
858 "allOf": [
859 { "type": "object", "properties": { "ack_count": { "type": "number" } } },
860 { "type": "object", "properties": { "enabled": { "type": "boolean" } } }
861 ],
862 "unevaluatedProperties": false
863 },
864 "acks": {
865 "type": "object",
866 "properties": { "enabled": { "type": "boolean" } },
867 "unevaluatedProperties": false
868 }
869 },
870 "unevaluatedProperties": false
871 }));
872
873 assert_schemas_eq(expected_schema, actual_schema);
874 }
875
876 #[test]
877 fn multiple_mark_unmark_references_flattened_efficiently() {
878 let mut actual_schema = as_schema(json!({
883 "type": "object",
884 "properties": {
885 "a": { "$ref": "#/definitions/a" },
886 "b": { "$ref": "#/definitions/b" },
887 "c": { "$ref": "#/definitions/c" },
888 "one": { "$ref": "#/definitions/one" },
889 "two": { "$ref": "#/definitions/two" }
890 },
891 "definitions": {
892 "one": {
893 "allOf": [{ "$ref": "#/definitions/c" }]
894 },
895 "two": {
896 "allOf": [{ "$ref": "#/definitions/b" }, { "$ref": "#/definitions/c" }]
897 },
898 "a": {
899 "type": "object",
900 "properties": { "a": { "type": "boolean" } }
901 },
902 "b": {
903 "type": "object",
904 "properties": { "b": { "type": "boolean" } }
905 },
906 "c": {
907 "type": "object",
908 "properties": { "c": { "type": "boolean" } }
909 }
910 }
911 }));
912
913 let mut visitor = DisallowUnevaluatedPropertiesVisitor::default();
914 visitor.visit_root_schema(&mut actual_schema);
915
916 let expected_schema = as_schema(json!({
927 "type": "object",
928 "properties": {
929 "a": { "$ref": "#/definitions/a" },
930 "b": { "$ref": "#/definitions/b" },
931 "c": {
932 "type": "object",
933 "properties": { "c": { "type": "boolean" } },
934 "unevaluatedProperties": false
935 },
936 "one": { "$ref": "#/definitions/one" },
937 "two": { "$ref": "#/definitions/two" }
938 },
939 "definitions": {
940 "one": {
941 "allOf": [{ "$ref": "#/definitions/c" }],
942 "unevaluatedProperties": false
943 },
944 "two": {
945 "allOf": [
946 {
947 "type": "object",
948 "properties": { "b": { "type": "boolean" } }
949 },
950 { "$ref": "#/definitions/c" }
951 ],
952 "unevaluatedProperties": false
953 },
954 "a": {
955 "type": "object",
956 "properties": { "a": { "type": "boolean" } },
957 "unevaluatedProperties": false
958 },
959 "b": {
960 "type": "object",
961 "properties": { "b": { "type": "boolean" } },
962 "unevaluatedProperties": false
963 },
964 "c": {
965 "type": "object",
966 "properties": { "c": { "type": "boolean" } }
967 }
968 },
969 "unevaluatedProperties": false
970 }));
971
972 assert_schemas_eq(expected_schema, actual_schema);
973 }
974}