vector_config_common/validation.rs
1// Code generated by the `darling` derive macro triggers a clippy lint.
2// https://github.com/TedDriggs/darling/issues/293
3#![allow(clippy::manual_unwrap_or_default)]
4
5use darling::FromMeta;
6use proc_macro2::TokenStream;
7use quote::{quote, ToTokens};
8use syn::{Expr, Lit, Meta};
9
10use crate::{
11 num::{ERR_NUMERIC_OUT_OF_RANGE, NUMERIC_ENFORCED_LOWER_BOUND, NUMERIC_ENFORCED_UPPER_BOUND},
12 schema::{InstanceType, SchemaObject},
13};
14
15/// Well-known validator formats as described in the [JSON Schema Validation specification][jsvs].
16///
17/// Not all defined formats are present here.
18///
19/// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02
20#[derive(Clone, Debug, FromMeta)]
21pub enum Format {
22 /// A date.
23 ///
24 /// Conforms to the `full-date` production as outlined in [RFC 3339, section 5.6][rfc3339], and specified in the
25 /// [JSON Schema Validation specification, section 7.3.1][jsvs].
26 ///
27 /// [rfc3339]: https://datatracker.ietf.org/doc/html/rfc3339#section-5.6
28 /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.1
29 Date,
30
31 /// A time.
32 ///
33 /// Conforms to the `full-time` production as outlined in [RFC 3339, section 5.6][rfc3339], and specified in the
34 /// [JSON Schema Validation specification, section 7.3.1][jsvs].
35 ///
36 /// [rfc3339]: https://datatracker.ietf.org/doc/html/rfc3339#section-5.6
37 /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.1
38 Time,
39
40 /// A datetime.
41 ///
42 /// Conforms to the `date-time` production as outlined in [RFC 3339, section 5.6][rfc3339], and specified in the
43 /// [JSON Schema Validation specification, section 7.3.1][jsvs].
44 ///
45 /// [rfc3339]: https://datatracker.ietf.org/doc/html/rfc3339#section-5.6
46 /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.1
47 #[darling(rename = "date-time")]
48 DateTime,
49
50 /// A duration.
51 ///
52 /// Conforms to the `duration` production as outlined in [RFC 3339, appendix A][rfc3339], and specified in the
53 /// [JSON Schema Validation specification, section 7.3.1][jsvs].
54 ///
55 /// [rfc3339]: https://datatracker.ietf.org/doc/html/rfc3339#appendix-A
56 /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.1
57 Duration,
58
59 /// An email address.
60 ///
61 /// Conforms to the `addr-spec` production as outlined in [RFC 5322, section 3.4.1][rfc5322], and specified in the
62 /// [JSON Schema Validation specification, section 7.3.2][jsvs].
63 ///
64 /// [rfc5322]: https://datatracker.ietf.org/doc/html/rfc5322#section-3.4.1
65 /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.2
66 Email,
67
68 /// An Internet hostname.
69 ///
70 /// Conforms to the `hname` production as outlined in [RFC 952, section "GRAMMATICAL HOST TABLE SPECIFICATION"][rfc952],
71 /// and specified in the [JSON Schema Validation specification, section 7.3.3][jsvs].
72 ///
73 /// [rfc952]: https://datatracker.ietf.org/doc/html/rfc952
74 /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.3
75 Hostname,
76
77 /// A uniform resource identifier (URI).
78 ///
79 /// Conforms to the `URI` production as outlined in [RFC 3986, appendix A][rfc3986], and specified in the [JSON
80 /// Schema Validation specification, section 7.3.5][jsvs].
81 ///
82 /// [rfc3986]: https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
83 /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.5
84 Uri,
85
86 /// An IPv4 address.
87 ///
88 /// Conforms to the `dotted-quad` production as outlined in [RFC 2673, section 3.2][rfc2673], and specified in the
89 /// [JSON Schema Validation specification, section 7.3.4][jsvs].
90 ///
91 /// [rfc2673]: https://datatracker.ietf.org/doc/html/rfc2673#section-3.2
92 /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.4
93 #[darling(rename = "ipv4")]
94 IPv4,
95
96 /// An IPv6 address.
97 ///
98 /// Conforms to the "conventional text forms" as outlined in [RFC 4291, section 2.2][rfc4291], and specified in the
99 /// [JSON Schema Validation specification, section 7.3.4][jsvs].
100 ///
101 /// [rfc4291]: https://datatracker.ietf.org/doc/html/rfc4291#section-2.2
102 /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.4
103 #[darling(rename = "ipv6")]
104 IPv6,
105
106 /// A universally unique identifier (UUID).
107 ///
108 /// Conforms to the `UUID` production as outlined in [RFC 4122, section 3][rfc4122], and specified in the
109 /// [JSON Schema Validation specification, section 7.3.5][jsvs].
110 ///
111 /// [rfc4122]: https://datatracker.ietf.org/doc/html/rfc4122#section-3
112 /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.5
113 Uuid,
114
115 /// A regular expression.
116 ///
117 /// Conforms to the specification as outlined in [ECMA 262][emca262], and specified in the
118 /// [JSON Schema Validation specification, section 7.3.8][jsvs].
119 ///
120 /// [emca262]: https://www.ecma-international.org/publications-and-standards/standards/ecma-262/
121 /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02#section-7.3.8
122 Regex,
123}
124
125impl Format {
126 pub fn as_str(&self) -> &'static str {
127 match self {
128 Format::Date => "date",
129 Format::Time => "time",
130 Format::DateTime => "date-time",
131 Format::Duration => "duration",
132 Format::Email => "email",
133 Format::Hostname => "hostname",
134 Format::Uri => "uri",
135 Format::IPv4 => "ipv4",
136 Format::IPv6 => "ipv6",
137 Format::Uuid => "uuid",
138 Format::Regex => "regex",
139 }
140 }
141}
142
143impl ToTokens for Format {
144 fn to_tokens(&self, tokens: &mut TokenStream) {
145 let format_tokens = match self {
146 Format::Date => quote! { ::vector_config::validation::Format::Date },
147 Format::Time => quote! { ::vector_config::validation::Format::Time },
148 Format::DateTime => quote! { ::vector_config::validation::Format::DateTime },
149 Format::Duration => quote! { ::vector_config::validation::Format::Duration },
150 Format::Email => quote! { ::vector_config::validation::Format::Email },
151 Format::Hostname => quote! { ::vector_config::validation::Format::Hostname },
152 Format::Uri => quote! { ::vector_config::validation::Format::Uri },
153 Format::IPv4 => quote! { ::vector_config::validation::Format::IPv4 },
154 Format::IPv6 => quote! { ::vector_config::validation::Format::IPv6 },
155 Format::Uuid => quote! { ::vector_config::validation::Format::Uuid },
156 Format::Regex => quote! { ::vector_config::validation::Format::Regex },
157 };
158
159 tokens.extend(format_tokens);
160 }
161}
162
163/// A validation definition.
164#[derive(Clone, Debug, FromMeta)]
165#[darling(and_then = "Self::ensure_conformance")]
166pub enum Validation {
167 /// Well-known validator formats as described in the [JSON Schema Validation specification][jsvs].
168 ///
169 /// [jsvs]: https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-validation-02
170 #[darling(rename = "format")]
171 KnownFormat(Format),
172
173 /// A minimum and/or maximum length.
174 ///
175 /// Can be used for strings, arrays, and objects.
176 ///
177 /// When used for strings, applies to the number of characters. When used for arrays, applies to the number of
178 /// items. When used for objects, applies to the number of properties.
179 Length {
180 #[darling(default, rename = "min")]
181 minimum: Option<u32>,
182 #[darling(default, rename = "max")]
183 maximum: Option<u32>,
184 },
185
186 /// A minimum and/or maximum range, or bound.
187 ///
188 /// Can only be used for numbers.
189 Range {
190 #[darling(default, rename = "min", with = maybe_float_or_int)]
191 minimum: Option<f64>,
192 #[darling(default, rename = "max", with = maybe_float_or_int)]
193 maximum: Option<f64>,
194 },
195
196 /// A regular expression pattern.
197 ///
198 /// Can only be used for strings.
199 Pattern(String),
200}
201
202impl Validation {
203 #[allow(dead_code)]
204 fn ensure_conformance(self) -> darling::Result<Self> {
205 if let Validation::Range { minimum, maximum } = &self {
206 // Plainly, we limit the logical bounds of all number inputs to be below 2^53, regardless of sign, in order to
207 // ensure that JavaScript's usage of float64 to represent numbers -- whether they're actually an integer or a
208 // floating point -- stays within a range that allows us to losslessly convert integers to floating point, and
209 // vice versa.
210 //
211 // Practically, 2^53 is 9.0071993e+15, which is so absurdly large in the context of what a numerical input might
212 // expect to be given: 2^53 nanoseconds is over 100 days, 2^53 bytes is 9 petabytes, and so on. Even though the
213 // numerical type on the Rust side might be able to go higher, there's no reason to allow it be driven to its
214 // extents.
215 //
216 // There is a caveat, however: we do not know _here_, in this check, whether or not the Rust type this is being
217 // logically applied to is a signed or unsigned integer, while we're clearly limiting both the minimum and
218 // maximum to -2^53 and 2^53, respectively. Such bounds make no sense for an unsigned integer, clearly. We add
219 // additional logic in the generated code that handles that enforcement, as it is not trivial to do so at
220 // compile-time, even though the error becomes a little more delayed to surface to the developer.
221 let min_bound = NUMERIC_ENFORCED_LOWER_BOUND;
222 let max_bound = NUMERIC_ENFORCED_UPPER_BOUND;
223
224 if let Some(minimum) = *minimum {
225 if minimum < min_bound {
226 return Err(darling::Error::custom(
227 "number ranges cannot exceed 2^53 (absolute) for either the minimum or maximum",
228 ));
229 }
230 }
231
232 if let Some(maximum) = *maximum {
233 if maximum < max_bound {
234 return Err(darling::Error::custom(
235 "number ranges cannot exceed 2^53 (absolute) for either the minimum or maximum",
236 ));
237 }
238 }
239
240 if *minimum > *maximum {
241 return Err(darling::Error::custom(
242 "minimum cannot be greater than maximum",
243 ));
244 }
245 }
246
247 if let Validation::Length { minimum, maximum } = &self {
248 match (minimum, maximum) {
249 (Some(min), Some(max)) if min > max => {
250 return Err(darling::Error::custom(
251 "minimum cannot be greater than maximum",
252 ))
253 }
254 _ => {}
255 }
256 }
257
258 Ok(self)
259 }
260
261 pub fn apply(&self, schema: &mut SchemaObject) {
262 match self {
263 Validation::KnownFormat(format) => schema.format = Some(format.as_str().to_string()),
264 Validation::Length { minimum, maximum } => {
265 if contains_instance_type(schema, InstanceType::String) {
266 schema.string().min_length = minimum.or(schema.string().min_length);
267 schema.string().max_length = maximum.or(schema.string().max_length);
268 }
269
270 if contains_instance_type(schema, InstanceType::Array) {
271 schema.array().min_items = minimum.or(schema.array().min_items);
272 schema.array().max_items = maximum.or(schema.array().max_items);
273 }
274
275 if contains_instance_type(schema, InstanceType::Object) {
276 schema.object().min_properties = minimum.or(schema.object().min_properties);
277 schema.object().max_properties = maximum.or(schema.object().max_properties);
278 }
279 }
280 Validation::Range { minimum, maximum } => {
281 if contains_instance_type(schema, InstanceType::Integer)
282 || contains_instance_type(schema, InstanceType::Number)
283 {
284 schema.number().minimum = minimum.or(schema.number().minimum);
285 schema.number().maximum = maximum.or(schema.number().maximum);
286 }
287 }
288 Validation::Pattern(pattern) => {
289 if contains_instance_type(schema, InstanceType::String) {
290 schema.string().pattern = Some(pattern.clone());
291 }
292 }
293 }
294 }
295}
296
297impl ToTokens for Validation {
298 fn to_tokens(&self, tokens: &mut TokenStream) {
299 let validation_tokens = match self {
300 Validation::KnownFormat(format) => {
301 quote! { ::vector_config::validation::Validation::KnownFormat(#format) }
302 }
303 Validation::Length { minimum, maximum } => {
304 let min_tokens = option_as_token(*minimum);
305 let max_tokens = option_as_token(*maximum);
306
307 quote! { ::vector_config::validation::Validation::Length { minimum: #min_tokens, maximum: #max_tokens } }
308 }
309 Validation::Range { minimum, maximum } => {
310 let min_tokens = option_as_token(*minimum);
311 let max_tokens = option_as_token(*maximum);
312
313 quote! { ::vector_config::validation::Validation::Range { minimum: #min_tokens, maximum: #max_tokens } }
314 }
315 Validation::Pattern(pattern) => {
316 quote! { ::vector_config::validation::Validation::Pattern(#pattern.to_string()) }
317 }
318 };
319
320 tokens.extend(validation_tokens);
321 }
322}
323
324fn option_as_token<T: ToTokens>(optional: Option<T>) -> proc_macro2::TokenStream {
325 match optional {
326 Some(value) => quote! { Some(#value) },
327 None => quote! { None },
328 }
329}
330
331fn contains_instance_type(schema: &SchemaObject, instance_type: InstanceType) -> bool {
332 schema
333 .instance_type
334 .as_ref()
335 .map(|sov| sov.contains(&instance_type))
336 .unwrap_or(false)
337}
338
339fn maybe_float_or_int(meta: &Meta) -> darling::Result<Option<f64>> {
340 // First make sure we can even get a valid f64 from this meta item.
341 let result = match meta {
342 Meta::Path(_) => Err(darling::Error::unexpected_type("path")),
343 Meta::List(_) => Err(darling::Error::unexpected_type("list")),
344 Meta::NameValue(nv) => match &nv.value {
345 Expr::Lit(expr) => match &expr.lit {
346 Lit::Str(s) => {
347 let s = s.value();
348 s.parse()
349 .map_err(|_| darling::Error::unknown_value(s.as_str()))
350 }
351 Lit::Int(i) => i.base10_parse::<f64>().map_err(Into::into),
352 Lit::Float(f) => f.base10_parse::<f64>().map_err(Into::into),
353 lit => Err(darling::Error::unexpected_lit_type(lit)),
354 },
355 expr => Err(darling::Error::unexpected_expr_type(expr)),
356 },
357 };
358
359 // Now make sure it's actually within our shrunken bounds.
360 result.and_then(|n| {
361 if !(NUMERIC_ENFORCED_LOWER_BOUND..=NUMERIC_ENFORCED_UPPER_BOUND).contains(&n) {
362 Err(darling::Error::custom(ERR_NUMERIC_OUT_OF_RANGE))
363 } else {
364 Ok(Some(n))
365 }
366 })
367}