vector_config/schema/visitors/
inline_single.rs

1use std::collections::{HashMap, HashSet};
2
3use serde_json::Value;
4use tracing::debug;
5use vector_config_common::schema::{visit::Visitor, *};
6
7use crate::schema::visitors::merge::Mergeable;
8
9use super::scoped_visit::{
10    visit_schema_object_scoped, SchemaReference, SchemaScopeStack, ScopedVisitor,
11};
12
13/// A visitor that inlines schema references where the referenced schema is only referenced once.
14///
15/// In many cases, the schema generation will produce schema definitions where either generics or
16/// flattening are involved, which leads to schema definitions that may only be referenced by one
17/// other schema definition, and so on.
18///
19/// This is suboptimal due to the "pointer chasing" involved to resolve those schema references,
20/// when there's no reason to inherently have a schema be defined such that it can be referenced.
21///
22/// This visitor collects a list of all schema references, and for any schemas which are referenced
23/// only once, will replace those references by inlining the referenced schema directly, and
24/// deleting the schema definition from the root definitions.
25#[derive(Debug, Default)]
26pub struct InlineSingleUseReferencesVisitor {
27    eligible_to_inline: HashSet<String>,
28}
29
30impl InlineSingleUseReferencesVisitor {
31    pub fn from_settings(_: &SchemaSettings) -> Self {
32        Self {
33            eligible_to_inline: HashSet::new(),
34        }
35    }
36}
37
38impl Visitor for InlineSingleUseReferencesVisitor {
39    fn visit_root_schema(&mut self, root: &mut RootSchema) {
40        // Build a map of schema references and the number of times they're referenced through the
41        // entire schema, by visiting the root schema in a recursive fashion, using a helper visitor.
42        let mut occurrence_visitor = OccurrenceVisitor::default();
43        occurrence_visitor.visit_root_schema(root);
44        let occurrence_map = occurrence_visitor.occurrence_map;
45
46        self.eligible_to_inline = occurrence_map
47            .into_iter()
48            // Filter out any schemas which have more than one occurrence, as naturally, we're
49            // trying to inline single-use schema references. :)
50            .filter_map(|(def_name, occurrences)| (occurrences == 1).then_some(def_name))
51            // However, we'll also filter out some specific schema definitions which are only
52            // referenced once, specifically: component base types and component types themselves.
53            //
54            // We do this as a lot of the tooling that parses the schema to generate documentation,
55            // and the like, depends on these schemas existing in the top-level definitions for easy
56            // lookup.
57            .filter(|def_name| {
58                let schema = root
59                    .definitions
60                    .get(def_name.as_ref())
61                    .and_then(Schema::as_object)
62                    .expect("schema definition must exist");
63
64                is_inlineable_schema(def_name.as_ref(), schema)
65            })
66            .map(|s| s.as_ref().to_string())
67            .collect::<HashSet<_>>();
68
69        // Now run our own visitor logic, which will use the inline eligibility to determine if a
70        // schema reference in a being-visited schema should be replaced inline with the original
71        // referenced schema, in turn removing the schema definition.
72        visit::visit_root_schema(self, root);
73
74        // Now remove all of the definitions for schemas that were eligible for inlining.
75        for schema_def_name in self.eligible_to_inline.drain() {
76            debug!(
77                referent = schema_def_name,
78                "Removing schema definition from root schema."
79            );
80
81            root.definitions
82                .remove(&schema_def_name)
83                .expect("referenced schema must exist in definitions");
84        }
85    }
86
87    fn visit_schema_object(
88        &mut self,
89        definitions: &mut Map<String, Schema>,
90        schema: &mut SchemaObject,
91    ) {
92        // Recursively visit this schema first.
93        visit::visit_schema_object(self, definitions, schema);
94
95        // If this schema has a schema reference, see if it's in our inline eligibility map. If so,
96        // we remove the referenced schema from the definitions, and then merge it into the current
97        // schema, after removing the `$ref` field.
98        if let Some(schema_ref) = schema.reference.as_ref().cloned() {
99            let schema_ref = get_cleaned_schema_reference(&schema_ref);
100            if self.eligible_to_inline.contains(schema_ref) {
101                let referenced_schema = definitions
102                    .get(schema_ref)
103                    .expect("referenced schema must exist in definitions");
104
105                if let Schema::Object(referenced_schema) = referenced_schema {
106                    debug!(
107                        referent = schema_ref,
108                        "Inlining eligible schema reference into current schema."
109                    );
110
111                    schema.reference = None;
112                    schema.merge(referenced_schema);
113                }
114            }
115        }
116    }
117}
118
119fn is_inlineable_schema(definition_name: &str, schema: &SchemaObject) -> bool {
120    static DISALLOWED_SCHEMAS: &[&str] = &[
121        "vector::sources::Sources",
122        "vector::transforms::Transforms",
123        "vector::sinks::Sinks",
124    ];
125
126    // We want to avoid inlining all of the relevant top-level types used for defining components:
127    // the "outer" types (i.e. `SinkOuter<T>`), the enum/collection types (i.e. the big `Sources`
128    // enum), and the component configuration types themselves (i.e. `AmqpSinkConfig`).
129    //
130    // There's nothing _technically_ wrong with doing so, but it would break downstream consumers of
131    // the schema that parse it in order to extract the individual components and other
132    // component-specific metadata.
133    let is_component_base = get_schema_metadata_attr(schema, "docs::component_base_type").is_some();
134    let is_component = get_schema_metadata_attr(schema, "docs::component_type").is_some();
135
136    let is_allowed_schema = !DISALLOWED_SCHEMAS.contains(&definition_name);
137
138    !is_component_base && !is_component && is_allowed_schema
139}
140
141#[derive(Debug, Default)]
142struct OccurrenceVisitor {
143    scope_stack: SchemaScopeStack,
144    occurrence_map: HashMap<SchemaReference, usize>,
145}
146
147impl Visitor for OccurrenceVisitor {
148    fn visit_schema_object(
149        &mut self,
150        definitions: &mut Map<String, Schema>,
151        schema: &mut SchemaObject,
152    ) {
153        visit_schema_object_scoped(self, definitions, schema);
154
155        if let Some(current_schema_ref) = schema.reference.as_ref() {
156            let current_schema_ref = get_cleaned_schema_reference(current_schema_ref);
157            *self
158                .occurrence_map
159                .entry(current_schema_ref.into())
160                .or_default() += 1;
161        }
162    }
163}
164
165impl ScopedVisitor for OccurrenceVisitor {
166    fn push_schema_scope<S: Into<SchemaReference>>(&mut self, scope: S) {
167        self.scope_stack.push(scope.into());
168    }
169
170    fn pop_schema_scope(&mut self) {
171        self.scope_stack.pop().expect("stack was empty during pop");
172    }
173
174    fn get_current_schema_scope(&self) -> &SchemaReference {
175        self.scope_stack.current().unwrap_or(&SchemaReference::Root)
176    }
177}
178
179fn get_schema_metadata_attr<'a>(schema: &'a SchemaObject, key: &str) -> Option<&'a Value> {
180    schema
181        .extensions
182        .get("_metadata")
183        .and_then(|metadata| metadata.get(key))
184}
185
186#[cfg(test)]
187mod tests {
188    use serde_json::json;
189    use vector_config_common::schema::visit::Visitor;
190
191    use crate::schema::visitors::test::{as_schema, assert_schemas_eq};
192
193    use super::InlineSingleUseReferencesVisitor;
194
195    #[test]
196    fn no_refs() {
197        let mut actual_schema = as_schema(json!({
198            "type": "object",
199            "properties": {
200                "a": { "type": "string" }
201            }
202        }));
203
204        let expected_schema = actual_schema.clone();
205
206        let mut visitor = InlineSingleUseReferencesVisitor::default();
207        visitor.visit_root_schema(&mut actual_schema);
208
209        assert_schemas_eq(expected_schema, actual_schema);
210    }
211
212    #[test]
213    fn single_ref_single_usage() {
214        let mut actual_schema = as_schema(json!({
215            "$ref": "#/definitions/simple",
216            "definitions": {
217                "simple": {
218                    "type": "object",
219                    "properties": {
220                        "a": { "type": "string" }
221                    }
222                }
223            }
224        }));
225
226        let mut visitor = InlineSingleUseReferencesVisitor::default();
227        visitor.visit_root_schema(&mut actual_schema);
228
229        let expected_schema = as_schema(json!({
230            "type": "object",
231            "properties": {
232                "a": { "type": "string" }
233            },
234        }));
235
236        assert_schemas_eq(expected_schema, actual_schema);
237    }
238
239    #[test]
240    fn single_ref_multiple_usages() {
241        let mut actual_schema = as_schema(json!({
242            "definitions": {
243                "simple": {
244                    "type": "object",
245                    "properties": {
246                        "a": { "type": "string" }
247                    }
248                }
249            },
250            "type": "object",
251            "properties": {
252                "a": { "$ref": "#/definitions/simple" },
253                "b": { "$ref": "#/definitions/simple" }
254            }
255        }));
256
257        let expected_schema = actual_schema.clone();
258
259        let mut visitor = InlineSingleUseReferencesVisitor::default();
260        visitor.visit_root_schema(&mut actual_schema);
261
262        assert_schemas_eq(expected_schema, actual_schema);
263    }
264
265    #[test]
266    fn multiple_refs_mixed_usages() {
267        let mut actual_schema = as_schema(json!({
268            "definitions": {
269                "simple": {
270                    "type": "object",
271                    "properties": {
272                        "a": { "type": "string" }
273                    }
274                },
275                "advanced": {
276                    "type": "object",
277                    "properties": {
278                        "b": { "type": "string" }
279                    }
280                }
281            },
282            "type": "object",
283            "properties": {
284                "a": { "$ref": "#/definitions/simple" },
285                "b": { "$ref": "#/definitions/simple" },
286                "c": { "$ref": "#/definitions/advanced" },
287            }
288        }));
289
290        let mut visitor = InlineSingleUseReferencesVisitor::default();
291        visitor.visit_root_schema(&mut actual_schema);
292
293        let expected_schema = as_schema(json!({
294            "definitions": {
295                "simple": {
296                    "type": "object",
297                    "properties": {
298                        "a": { "type": "string" }
299                    }
300                }
301            },
302            "type": "object",
303            "properties": {
304                "a": { "$ref": "#/definitions/simple" },
305                "b": { "$ref": "#/definitions/simple" },
306                "c": {
307                    "type": "object",
308                    "properties": {
309                        "b": { "type": "string" }
310                    }
311                }
312            }
313        }));
314
315        assert_schemas_eq(expected_schema, actual_schema);
316    }
317
318    #[test]
319    fn reference_in_multiple_arrays() {
320        let mut actual_schema = as_schema(json!({
321            "definitions": {
322                "item": {
323                    "type": "object",
324                    "properties": {
325                        "x": { "type": "string" }
326                    }
327                }
328            },
329            "type": "object",
330            "properties": {
331                "arr1": { "type": "array", "items": { "$ref": "#/definitions/item" } },
332                "arr2": { "type": "array", "items": { "$ref": "#/definitions/item" } }
333            }
334        }));
335
336        let expected_schema = actual_schema.clone();
337
338        let mut visitor = InlineSingleUseReferencesVisitor::default();
339        visitor.visit_root_schema(&mut actual_schema);
340
341        assert_schemas_eq(expected_schema, actual_schema);
342    }
343
344    #[test]
345    fn reference_in_oneof_anyof_allof() {
346        let mut actual_schema = as_schema(json!({
347            "definitions": {
348                "shared": {
349                    "type": "object",
350                    "properties": {
351                        "y": { "type": "string" }
352                    }
353                }
354            },
355            "type": "object",
356            "properties": {
357                "choice": {
358                    "oneOf": [
359                        { "$ref": "#/definitions/shared" },
360                        { "$ref": "#/definitions/shared" }
361                    ],
362                    "anyOf": [
363                        { "$ref": "#/definitions/shared" },
364                        { "type": "null" }
365                    ],
366                    "allOf": [
367                        { "$ref": "#/definitions/shared" },
368                        { "type": "object" }
369                    ]
370                }
371            }
372        }));
373
374        let expected_schema = actual_schema.clone();
375
376        let mut visitor = InlineSingleUseReferencesVisitor::default();
377        visitor.visit_root_schema(&mut actual_schema);
378
379        assert_schemas_eq(expected_schema, actual_schema);
380    }
381
382    #[test]
383    fn reference_in_additional_properties() {
384        let mut actual_schema = as_schema(json!({
385            "definitions": {
386                "val": {
387                    "type": "object",
388                    "properties": {
389                        "z": { "type": "string" }
390                    }
391                }
392            },
393            "type": "object",
394            "properties": {
395                "obj1": {
396                    "type": "object",
397                    "additionalProperties": { "$ref": "#/definitions/val" }
398                },
399                "obj2": {
400                    "type": "object",
401                    "additionalProperties": { "$ref": "#/definitions/val" }
402                }
403            }
404        }));
405
406        let expected_schema = actual_schema.clone();
407
408        let mut visitor = InlineSingleUseReferencesVisitor::default();
409        visitor.visit_root_schema(&mut actual_schema);
410
411        assert_schemas_eq(expected_schema, actual_schema);
412    }
413
414    #[test]
415    fn reference_in_pattern_properties() {
416        let mut actual_schema = as_schema(json!({
417            "definitions": {
418                "pat": {
419                    "type": "object",
420                    "properties": {
421                        "w": { "type": "string" }
422                    }
423                }
424            },
425            "type": "object",
426            "properties": {
427                "obj": {
428                    "type": "object",
429                    "patternProperties": {
430                        "^foo$": { "$ref": "#/definitions/pat" },
431                        "^bar$": { "$ref": "#/definitions/pat" }
432                    }
433                }
434            }
435        }));
436
437        let expected_schema = actual_schema.clone();
438
439        let mut visitor = InlineSingleUseReferencesVisitor::default();
440        visitor.visit_root_schema(&mut actual_schema);
441
442        assert_schemas_eq(expected_schema, actual_schema);
443    }
444}