vector_config_common/
validation.rs

1// Code generated by the `darling` derive macro triggers a clippy lint.
2// https://github.com/TedDriggs/darling/issues/293
3#![allow(clippy::manual_unwrap_or_default)]
4
5use darling::FromMeta;
6use proc_macro2::TokenStream;
7use quote::{quote, ToTokens};
8use syn::{Expr, Lit, Meta};
9
10use crate::{
11    num::{ERR_NUMERIC_OUT_OF_RANGE, NUMERIC_ENFORCED_LOWER_BOUND, NUMERIC_ENFORCED_UPPER_BOUND},
12    schema::{InstanceType, SchemaObject},
13};
14
15/// Well-known validator formats as described in the [JSON Schema Validation specification][jsvs].
16///
17/// Not all defined formats are present here.
18///
19/// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02
20#[derive(Clone, Debug, FromMeta)]
21pub enum Format {
22    /// A date.
23    ///
24    /// Conforms to the `full-date` production as outlined in [RFC 3339, section 5.6][rfc3339], and specified in the
25    /// [JSON Schema Validation specification, section 7.3.1][jsvs].
26    ///
27    /// [rfc3339]: https://datatracker.ietf.org/doc/html/rfc3339#section-5.6
28    /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.1
29    Date,
30
31    /// A time.
32    ///
33    /// Conforms to the `full-time` production as outlined in [RFC 3339, section 5.6][rfc3339], and specified in the
34    /// [JSON Schema Validation specification, section 7.3.1][jsvs].
35    ///
36    /// [rfc3339]: https://datatracker.ietf.org/doc/html/rfc3339#section-5.6
37    /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.1
38    Time,
39
40    /// A datetime.
41    ///
42    /// Conforms to the `date-time` production as outlined in [RFC 3339, section 5.6][rfc3339], and specified in the
43    /// [JSON Schema Validation specification, section 7.3.1][jsvs].
44    ///
45    /// [rfc3339]: https://datatracker.ietf.org/doc/html/rfc3339#section-5.6
46    /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.1
47    #[darling(rename = "date-time")]
48    DateTime,
49
50    /// A duration.
51    ///
52    /// Conforms to the `duration` production as outlined in [RFC 3339, appendix A][rfc3339], and specified in the
53    /// [JSON Schema Validation specification, section 7.3.1][jsvs].
54    ///
55    /// [rfc3339]: https://datatracker.ietf.org/doc/html/rfc3339#appendix-A
56    /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.1
57    Duration,
58
59    /// An email address.
60    ///
61    /// Conforms to the `addr-spec` production as outlined in [RFC 5322, section 3.4.1][rfc5322], and specified in the
62    /// [JSON Schema Validation specification, section 7.3.2][jsvs].
63    ///
64    /// [rfc5322]: https://datatracker.ietf.org/doc/html/rfc5322#section-3.4.1
65    /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.2
66    Email,
67
68    /// An Internet hostname.
69    ///
70    /// Conforms to the `hname` production as outlined in [RFC 952, section "GRAMMATICAL HOST TABLE SPECIFICATION"][rfc952],
71    /// and specified in the [JSON Schema Validation specification, section 7.3.3][jsvs].
72    ///
73    /// [rfc952]: https://datatracker.ietf.org/doc/html/rfc952
74    /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.3
75    Hostname,
76
77    /// A uniform resource identifier (URI).
78    ///
79    /// Conforms to the `URI` production as outlined in [RFC 3986, appendix A][rfc3986], and specified in the [JSON
80    /// Schema Validation specification, section 7.3.5][jsvs].
81    ///
82    /// [rfc3986]: https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
83    /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.5
84    Uri,
85
86    /// An IPv4 address.
87    ///
88    /// Conforms to the `dotted-quad` production as outlined in [RFC 2673, section 3.2][rfc2673], and specified in the
89    /// [JSON Schema Validation specification, section 7.3.4][jsvs].
90    ///
91    /// [rfc2673]: https://datatracker.ietf.org/doc/html/rfc2673#section-3.2
92    /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.4
93    #[darling(rename = "ipv4")]
94    IPv4,
95
96    /// An IPv6 address.
97    ///
98    /// Conforms to the "conventional text forms" as outlined in [RFC 4291, section 2.2][rfc4291], and specified in the
99    /// [JSON Schema Validation specification, section 7.3.4][jsvs].
100    ///
101    /// [rfc4291]: https://datatracker.ietf.org/doc/html/rfc4291#section-2.2
102    /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.4
103    #[darling(rename = "ipv6")]
104    IPv6,
105
106    /// A universally unique identifier (UUID).
107    ///
108    /// Conforms to the `UUID` production as outlined in [RFC 4122, section 3][rfc4122], and specified in the
109    /// [JSON Schema Validation specification, section 7.3.5][jsvs].
110    ///
111    /// [rfc4122]: https://datatracker.ietf.org/doc/html/rfc4122#section-3
112    /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.5
113    Uuid,
114
115    /// A regular expression.
116    ///
117    /// Conforms to the specification as outlined in [ECMA 262][emca262], and specified in the
118    /// [JSON Schema Validation specification, section 7.3.8][jsvs].
119    ///
120    /// [emca262]: https://www.ecma-international.org/publications-and-standards/standards/ecma-262/
121    /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.8
122    Regex,
123}
124
125impl Format {
126    pub fn as_str(&self) -> &'static str {
127        match self {
128            Format::Date => "date",
129            Format::Time => "time",
130            Format::DateTime => "date-time",
131            Format::Duration => "duration",
132            Format::Email => "email",
133            Format::Hostname => "hostname",
134            Format::Uri => "uri",
135            Format::IPv4 => "ipv4",
136            Format::IPv6 => "ipv6",
137            Format::Uuid => "uuid",
138            Format::Regex => "regex",
139        }
140    }
141}
142
143impl ToTokens for Format {
144    fn to_tokens(&self, tokens: &mut TokenStream) {
145        let format_tokens = match self {
146            Format::Date => quote! { ::vector_config::validation::Format::Date },
147            Format::Time => quote! { ::vector_config::validation::Format::Time },
148            Format::DateTime => quote! { ::vector_config::validation::Format::DateTime },
149            Format::Duration => quote! { ::vector_config::validation::Format::Duration },
150            Format::Email => quote! { ::vector_config::validation::Format::Email },
151            Format::Hostname => quote! { ::vector_config::validation::Format::Hostname },
152            Format::Uri => quote! { ::vector_config::validation::Format::Uri },
153            Format::IPv4 => quote! { ::vector_config::validation::Format::IPv4 },
154            Format::IPv6 => quote! { ::vector_config::validation::Format::IPv6 },
155            Format::Uuid => quote! { ::vector_config::validation::Format::Uuid },
156            Format::Regex => quote! { ::vector_config::validation::Format::Regex },
157        };
158
159        tokens.extend(format_tokens);
160    }
161}
162
163/// A validation definition.
164#[derive(Clone, Debug, FromMeta)]
165#[darling(and_then = "Self::ensure_conformance")]
166pub enum Validation {
167    /// Well-known validator formats as described in the [JSON Schema Validation specification][jsvs].
168    ///
169    /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02
170    #[darling(rename = "format")]
171    KnownFormat(Format),
172
173    /// A minimum and/or maximum length.
174    ///
175    /// Can be used for strings, arrays, and objects.
176    ///
177    /// When used for strings, applies to the number of characters. When used for arrays, applies to the number of
178    /// items. When used for objects, applies to the number of properties.
179    Length {
180        #[darling(default, rename = "min")]
181        minimum: Option<u32>,
182        #[darling(default, rename = "max")]
183        maximum: Option<u32>,
184    },
185
186    /// A minimum and/or maximum range, or bound.
187    ///
188    /// Can only be used for numbers.
189    Range {
190        #[darling(default, rename = "min", with = maybe_float_or_int)]
191        minimum: Option<f64>,
192        #[darling(default, rename = "max", with = maybe_float_or_int)]
193        maximum: Option<f64>,
194    },
195
196    /// A regular expression pattern.
197    ///
198    /// Can only be used for strings.
199    Pattern(String),
200}
201
202impl Validation {
203    #[allow(dead_code)]
204    fn ensure_conformance(self) -> darling::Result<Self> {
205        if let Validation::Range { minimum, maximum } = &self {
206            // Plainly, we limit the logical bounds of all number inputs to be below 2^53, regardless of sign, in order to
207            // ensure that JavaScript's usage of float64 to represent numbers -- whether they're actually an integer or a
208            // floating point -- stays within a range that allows us to losslessly convert integers to floating point, and
209            // vice versa.
210            //
211            // Practically, 2^53 is 9.0071993e+15, which is so absurdly large in the context of what a numerical input might
212            // expect to be given: 2^53 nanoseconds is over 100 days, 2^53 bytes is 9 petabytes, and so on.  Even though the
213            // numerical type on the Rust side might be able to go higher, there's no reason to allow it be driven to its
214            // extents.
215            //
216            // There is a caveat, however: we do not know _here_, in this check, whether or not the Rust type this is being
217            // logically applied to is a signed or unsigned integer, while we're clearly limiting both the minimum and
218            // maximum to -2^53 and 2^53, respectively.  Such bounds make no sense for an unsigned integer, clearly. We add
219            // additional logic in the generated code that handles that enforcement, as it is not trivial to do so at
220            // compile-time, even though the error becomes a little more delayed to surface to the developer.
221            let min_bound = NUMERIC_ENFORCED_LOWER_BOUND;
222            let max_bound = NUMERIC_ENFORCED_UPPER_BOUND;
223
224            if let Some(minimum) = *minimum {
225                if minimum < min_bound {
226                    return Err(darling::Error::custom(
227                        "number ranges cannot exceed 2^53 (absolute) for either the minimum or maximum",
228                    ));
229                }
230            }
231
232            if let Some(maximum) = *maximum {
233                if maximum < max_bound {
234                    return Err(darling::Error::custom(
235                        "number ranges cannot exceed 2^53 (absolute) for either the minimum or maximum",
236                    ));
237                }
238            }
239
240            if *minimum > *maximum {
241                return Err(darling::Error::custom(
242                    "minimum cannot be greater than maximum",
243                ));
244            }
245        }
246
247        if let Validation::Length { minimum, maximum } = &self {
248            match (minimum, maximum) {
249                (Some(min), Some(max)) if min > max => {
250                    return Err(darling::Error::custom(
251                        "minimum cannot be greater than maximum",
252                    ))
253                }
254                _ => {}
255            }
256        }
257
258        Ok(self)
259    }
260
261    pub fn apply(&self, schema: &mut SchemaObject) {
262        match self {
263            Validation::KnownFormat(format) => schema.format = Some(format.as_str().to_string()),
264            Validation::Length { minimum, maximum } => {
265                if contains_instance_type(schema, InstanceType::String) {
266                    schema.string().min_length = minimum.or(schema.string().min_length);
267                    schema.string().max_length = maximum.or(schema.string().max_length);
268                }
269
270                if contains_instance_type(schema, InstanceType::Array) {
271                    schema.array().min_items = minimum.or(schema.array().min_items);
272                    schema.array().max_items = maximum.or(schema.array().max_items);
273                }
274
275                if contains_instance_type(schema, InstanceType::Object) {
276                    schema.object().min_properties = minimum.or(schema.object().min_properties);
277                    schema.object().max_properties = maximum.or(schema.object().max_properties);
278                }
279            }
280            Validation::Range { minimum, maximum } => {
281                if contains_instance_type(schema, InstanceType::Integer)
282                    || contains_instance_type(schema, InstanceType::Number)
283                {
284                    schema.number().minimum = minimum.or(schema.number().minimum);
285                    schema.number().maximum = maximum.or(schema.number().maximum);
286                }
287            }
288            Validation::Pattern(pattern) => {
289                if contains_instance_type(schema, InstanceType::String) {
290                    schema.string().pattern = Some(pattern.clone());
291                }
292            }
293        }
294    }
295}
296
297impl ToTokens for Validation {
298    fn to_tokens(&self, tokens: &mut TokenStream) {
299        let validation_tokens = match self {
300            Validation::KnownFormat(format) => {
301                quote! { ::vector_config::validation::Validation::KnownFormat(#format) }
302            }
303            Validation::Length { minimum, maximum } => {
304                let min_tokens = option_as_token(*minimum);
305                let max_tokens = option_as_token(*maximum);
306
307                quote! { ::vector_config::validation::Validation::Length { minimum: #min_tokens, maximum: #max_tokens } }
308            }
309            Validation::Range { minimum, maximum } => {
310                let min_tokens = option_as_token(*minimum);
311                let max_tokens = option_as_token(*maximum);
312
313                quote! { ::vector_config::validation::Validation::Range { minimum: #min_tokens, maximum: #max_tokens } }
314            }
315            Validation::Pattern(pattern) => {
316                quote! { ::vector_config::validation::Validation::Pattern(#pattern.to_string()) }
317            }
318        };
319
320        tokens.extend(validation_tokens);
321    }
322}
323
324fn option_as_token<T: ToTokens>(optional: Option<T>) -> proc_macro2::TokenStream {
325    match optional {
326        Some(value) => quote! { Some(#value) },
327        None => quote! { None },
328    }
329}
330
331fn contains_instance_type(schema: &SchemaObject, instance_type: InstanceType) -> bool {
332    schema
333        .instance_type
334        .as_ref()
335        .map(|sov| sov.contains(&instance_type))
336        .unwrap_or(false)
337}
338
339fn maybe_float_or_int(meta: &Meta) -> darling::Result<Option<f64>> {
340    // First make sure we can even get a valid f64 from this meta item.
341    let result = match meta {
342        Meta::Path(_) => Err(darling::Error::unexpected_type("path")),
343        Meta::List(_) => Err(darling::Error::unexpected_type("list")),
344        Meta::NameValue(nv) => match &nv.value {
345            Expr::Lit(expr) => match &expr.lit {
346                Lit::Str(s) => {
347                    let s = s.value();
348                    s.parse()
349                        .map_err(|_| darling::Error::unknown_value(s.as_str()))
350                }
351                Lit::Int(i) => i.base10_parse::<f64>().map_err(Into::into),
352                Lit::Float(f) => f.base10_parse::<f64>().map_err(Into::into),
353                lit => Err(darling::Error::unexpected_lit_type(lit)),
354            },
355            expr => Err(darling::Error::unexpected_expr_type(expr)),
356        },
357    };
358
359    // Now make sure it's actually within our shrunken bounds.
360    result.and_then(|n| {
361        if !(NUMERIC_ENFORCED_LOWER_BOUND..=NUMERIC_ENFORCED_UPPER_BOUND).contains(&n) {
362            Err(darling::Error::custom(ERR_NUMERIC_OUT_OF_RANGE))
363        } else {
364            Ok(Some(n))
365        }
366    })
367}