vector_config/schema/visitors/
inline_single.rs

1use std::collections::{HashMap, HashSet};
2
3use serde_json::Value;
4use tracing::debug;
5use vector_config_common::schema::{visit::Visitor, *};
6
7use super::scoped_visit::{
8    SchemaReference, SchemaScopeStack, ScopedVisitor, visit_schema_object_scoped,
9};
10use crate::schema::visitors::merge::Mergeable;
11
12/// A visitor that inlines schema references where the referenced schema is only referenced once.
13///
14/// In many cases, the schema generation will produce schema definitions where either generics or
15/// flattening are involved, which leads to schema definitions that may only be referenced by one
16/// other schema definition, and so on.
17///
18/// This is suboptimal due to the "pointer chasing" involved to resolve those schema references,
19/// when there's no reason to inherently have a schema be defined such that it can be referenced.
20///
21/// This visitor collects a list of all schema references, and for any schemas which are referenced
22/// only once, will replace those references by inlining the referenced schema directly, and
23/// deleting the schema definition from the root definitions.
24#[derive(Debug, Default)]
25pub struct InlineSingleUseReferencesVisitor {
26    eligible_to_inline: HashSet<String>,
27}
28
29impl InlineSingleUseReferencesVisitor {
30    pub fn from_settings(_: &SchemaSettings) -> Self {
31        Self {
32            eligible_to_inline: HashSet::new(),
33        }
34    }
35}
36
37impl Visitor for InlineSingleUseReferencesVisitor {
38    fn visit_root_schema(&mut self, root: &mut RootSchema) {
39        // Build a map of schema references and the number of times they're referenced through the
40        // entire schema, by visiting the root schema in a recursive fashion, using a helper visitor.
41        let mut occurrence_visitor = OccurrenceVisitor::default();
42        occurrence_visitor.visit_root_schema(root);
43        let occurrence_map = occurrence_visitor.occurrence_map;
44
45        self.eligible_to_inline = occurrence_map
46            .into_iter()
47            // Filter out any schemas which have more than one occurrence, as naturally, we're
48            // trying to inline single-use schema references. :)
49            .filter_map(|(def_name, occurrences)| (occurrences == 1).then_some(def_name))
50            // However, we'll also filter out some specific schema definitions which are only
51            // referenced once, specifically: component base types and component types themselves.
52            //
53            // We do this as a lot of the tooling that parses the schema to generate documentation,
54            // and the like, depends on these schemas existing in the top-level definitions for easy
55            // lookup.
56            .filter(|def_name| {
57                let schema = root
58                    .definitions
59                    .get(def_name.as_ref())
60                    .and_then(Schema::as_object)
61                    .expect("schema definition must exist");
62
63                is_inlineable_schema(def_name.as_ref(), schema)
64            })
65            .map(|s| s.as_ref().to_string())
66            .collect::<HashSet<_>>();
67
68        // Now run our own visitor logic, which will use the inline eligibility to determine if a
69        // schema reference in a being-visited schema should be replaced inline with the original
70        // referenced schema, in turn removing the schema definition.
71        visit::visit_root_schema(self, root);
72
73        // Now remove all of the definitions for schemas that were eligible for inlining.
74        for schema_def_name in self.eligible_to_inline.drain() {
75            debug!(
76                referent = schema_def_name,
77                "Removing schema definition from root schema."
78            );
79
80            root.definitions
81                .remove(&schema_def_name)
82                .expect("referenced schema must exist in definitions");
83        }
84    }
85
86    fn visit_schema_object(
87        &mut self,
88        definitions: &mut Map<String, Schema>,
89        schema: &mut SchemaObject,
90    ) {
91        // Recursively visit this schema first.
92        visit::visit_schema_object(self, definitions, schema);
93
94        // If this schema has a schema reference, see if it's in our inline eligibility map. If so,
95        // we remove the referenced schema from the definitions, and then merge it into the current
96        // schema, after removing the `$ref` field.
97        if let Some(schema_ref) = schema.reference.as_ref().cloned() {
98            let schema_ref = get_cleaned_schema_reference(&schema_ref);
99            if self.eligible_to_inline.contains(schema_ref) {
100                let referenced_schema = definitions
101                    .get(schema_ref)
102                    .expect("referenced schema must exist in definitions");
103
104                if let Schema::Object(referenced_schema) = referenced_schema {
105                    debug!(
106                        referent = schema_ref,
107                        "Inlining eligible schema reference into current schema."
108                    );
109
110                    schema.reference = None;
111                    schema.merge(referenced_schema);
112                }
113            }
114        }
115    }
116}
117
118fn is_inlineable_schema(definition_name: &str, schema: &SchemaObject) -> bool {
119    static DISALLOWED_SCHEMAS: &[&str] = &[
120        "vector::sources::Sources",
121        "vector::transforms::Transforms",
122        "vector::sinks::Sinks",
123    ];
124
125    // We want to avoid inlining all of the relevant top-level types used for defining components:
126    // the "outer" types (i.e. `SinkOuter<T>`), the enum/collection types (i.e. the big `Sources`
127    // enum), and the component configuration types themselves (i.e. `AmqpSinkConfig`).
128    //
129    // There's nothing _technically_ wrong with doing so, but it would break downstream consumers of
130    // the schema that parse it in order to extract the individual components and other
131    // component-specific metadata.
132    let is_component_base = get_schema_metadata_attr(schema, "docs::component_base_type").is_some();
133    let is_component = get_schema_metadata_attr(schema, "docs::component_type").is_some();
134
135    let is_allowed_schema = !DISALLOWED_SCHEMAS.contains(&definition_name);
136
137    !is_component_base && !is_component && is_allowed_schema
138}
139
140#[derive(Debug, Default)]
141struct OccurrenceVisitor {
142    scope_stack: SchemaScopeStack,
143    occurrence_map: HashMap<SchemaReference, usize>,
144}
145
146impl Visitor for OccurrenceVisitor {
147    fn visit_schema_object(
148        &mut self,
149        definitions: &mut Map<String, Schema>,
150        schema: &mut SchemaObject,
151    ) {
152        visit_schema_object_scoped(self, definitions, schema);
153
154        if let Some(current_schema_ref) = schema.reference.as_ref() {
155            let current_schema_ref = get_cleaned_schema_reference(current_schema_ref);
156            *self
157                .occurrence_map
158                .entry(current_schema_ref.into())
159                .or_default() += 1;
160        }
161    }
162}
163
164impl ScopedVisitor for OccurrenceVisitor {
165    fn push_schema_scope<S: Into<SchemaReference>>(&mut self, scope: S) {
166        self.scope_stack.push(scope.into());
167    }
168
169    fn pop_schema_scope(&mut self) {
170        self.scope_stack.pop().expect("stack was empty during pop");
171    }
172
173    fn get_current_schema_scope(&self) -> &SchemaReference {
174        self.scope_stack.current().unwrap_or(&SchemaReference::Root)
175    }
176}
177
178fn get_schema_metadata_attr<'a>(schema: &'a SchemaObject, key: &str) -> Option<&'a Value> {
179    schema
180        .extensions
181        .get("_metadata")
182        .and_then(|metadata| metadata.get(key))
183}
184
185#[cfg(test)]
186mod tests {
187    use serde_json::json;
188    use vector_config_common::schema::visit::Visitor;
189
190    use super::InlineSingleUseReferencesVisitor;
191    use crate::schema::visitors::test::{as_schema, assert_schemas_eq};
192
193    #[test]
194    fn no_refs() {
195        let mut actual_schema = as_schema(json!({
196            "type": "object",
197            "properties": {
198                "a": { "type": "string" }
199            }
200        }));
201
202        let expected_schema = actual_schema.clone();
203
204        let mut visitor = InlineSingleUseReferencesVisitor::default();
205        visitor.visit_root_schema(&mut actual_schema);
206
207        assert_schemas_eq(expected_schema, actual_schema);
208    }
209
210    #[test]
211    fn single_ref_single_usage() {
212        let mut actual_schema = as_schema(json!({
213            "$ref": "#/definitions/simple",
214            "definitions": {
215                "simple": {
216                    "type": "object",
217                    "properties": {
218                        "a": { "type": "string" }
219                    }
220                }
221            }
222        }));
223
224        let mut visitor = InlineSingleUseReferencesVisitor::default();
225        visitor.visit_root_schema(&mut actual_schema);
226
227        let expected_schema = as_schema(json!({
228            "type": "object",
229            "properties": {
230                "a": { "type": "string" }
231            },
232        }));
233
234        assert_schemas_eq(expected_schema, actual_schema);
235    }
236
237    #[test]
238    fn single_ref_multiple_usages() {
239        let mut actual_schema = as_schema(json!({
240            "definitions": {
241                "simple": {
242                    "type": "object",
243                    "properties": {
244                        "a": { "type": "string" }
245                    }
246                }
247            },
248            "type": "object",
249            "properties": {
250                "a": { "$ref": "#/definitions/simple" },
251                "b": { "$ref": "#/definitions/simple" }
252            }
253        }));
254
255        let expected_schema = actual_schema.clone();
256
257        let mut visitor = InlineSingleUseReferencesVisitor::default();
258        visitor.visit_root_schema(&mut actual_schema);
259
260        assert_schemas_eq(expected_schema, actual_schema);
261    }
262
263    #[test]
264    fn multiple_refs_mixed_usages() {
265        let mut actual_schema = as_schema(json!({
266            "definitions": {
267                "simple": {
268                    "type": "object",
269                    "properties": {
270                        "a": { "type": "string" }
271                    }
272                },
273                "advanced": {
274                    "type": "object",
275                    "properties": {
276                        "b": { "type": "string" }
277                    }
278                }
279            },
280            "type": "object",
281            "properties": {
282                "a": { "$ref": "#/definitions/simple" },
283                "b": { "$ref": "#/definitions/simple" },
284                "c": { "$ref": "#/definitions/advanced" },
285            }
286        }));
287
288        let mut visitor = InlineSingleUseReferencesVisitor::default();
289        visitor.visit_root_schema(&mut actual_schema);
290
291        let expected_schema = as_schema(json!({
292            "definitions": {
293                "simple": {
294                    "type": "object",
295                    "properties": {
296                        "a": { "type": "string" }
297                    }
298                }
299            },
300            "type": "object",
301            "properties": {
302                "a": { "$ref": "#/definitions/simple" },
303                "b": { "$ref": "#/definitions/simple" },
304                "c": {
305                    "type": "object",
306                    "properties": {
307                        "b": { "type": "string" }
308                    }
309                }
310            }
311        }));
312
313        assert_schemas_eq(expected_schema, actual_schema);
314    }
315
316    #[test]
317    fn reference_in_multiple_arrays() {
318        let mut actual_schema = as_schema(json!({
319            "definitions": {
320                "item": {
321                    "type": "object",
322                    "properties": {
323                        "x": { "type": "string" }
324                    }
325                }
326            },
327            "type": "object",
328            "properties": {
329                "arr1": { "type": "array", "items": { "$ref": "#/definitions/item" } },
330                "arr2": { "type": "array", "items": { "$ref": "#/definitions/item" } }
331            }
332        }));
333
334        let expected_schema = actual_schema.clone();
335
336        let mut visitor = InlineSingleUseReferencesVisitor::default();
337        visitor.visit_root_schema(&mut actual_schema);
338
339        assert_schemas_eq(expected_schema, actual_schema);
340    }
341
342    #[test]
343    fn reference_in_oneof_anyof_allof() {
344        let mut actual_schema = as_schema(json!({
345            "definitions": {
346                "shared": {
347                    "type": "object",
348                    "properties": {
349                        "y": { "type": "string" }
350                    }
351                }
352            },
353            "type": "object",
354            "properties": {
355                "choice": {
356                    "oneOf": [
357                        { "$ref": "#/definitions/shared" },
358                        { "$ref": "#/definitions/shared" }
359                    ],
360                    "anyOf": [
361                        { "$ref": "#/definitions/shared" },
362                        { "type": "null" }
363                    ],
364                    "allOf": [
365                        { "$ref": "#/definitions/shared" },
366                        { "type": "object" }
367                    ]
368                }
369            }
370        }));
371
372        let expected_schema = actual_schema.clone();
373
374        let mut visitor = InlineSingleUseReferencesVisitor::default();
375        visitor.visit_root_schema(&mut actual_schema);
376
377        assert_schemas_eq(expected_schema, actual_schema);
378    }
379
380    #[test]
381    fn reference_in_additional_properties() {
382        let mut actual_schema = as_schema(json!({
383            "definitions": {
384                "val": {
385                    "type": "object",
386                    "properties": {
387                        "z": { "type": "string" }
388                    }
389                }
390            },
391            "type": "object",
392            "properties": {
393                "obj1": {
394                    "type": "object",
395                    "additionalProperties": { "$ref": "#/definitions/val" }
396                },
397                "obj2": {
398                    "type": "object",
399                    "additionalProperties": { "$ref": "#/definitions/val" }
400                }
401            }
402        }));
403
404        let expected_schema = actual_schema.clone();
405
406        let mut visitor = InlineSingleUseReferencesVisitor::default();
407        visitor.visit_root_schema(&mut actual_schema);
408
409        assert_schemas_eq(expected_schema, actual_schema);
410    }
411
412    #[test]
413    fn reference_in_pattern_properties() {
414        let mut actual_schema = as_schema(json!({
415            "definitions": {
416                "pat": {
417                    "type": "object",
418                    "properties": {
419                        "w": { "type": "string" }
420                    }
421                }
422            },
423            "type": "object",
424            "properties": {
425                "obj": {
426                    "type": "object",
427                    "patternProperties": {
428                        "^foo$": { "$ref": "#/definitions/pat" },
429                        "^bar$": { "$ref": "#/definitions/pat" }
430                    }
431                }
432            }
433        }));
434
435        let expected_schema = actual_schema.clone();
436
437        let mut visitor = InlineSingleUseReferencesVisitor::default();
438        visitor.visit_root_schema(&mut actual_schema);
439
440        assert_schemas_eq(expected_schema, actual_schema);
441    }
442}