1use super::util::example_path_or_basename;
2use crate::compiler::prelude::*;
3#[cfg(not(target_arch = "wasm32"))]
4use std::path::PathBuf;
5use std::sync::LazyLock;
6
7static EXAMPLE_JSON_SCHEMA_VALID_EMAIL: LazyLock<&str> = LazyLock::new(|| {
10 let path =
11 example_path_or_basename("jsonschema/validate_json_schema/schema_with_email_format.json");
12
13 Box::leak(
14 format!(
15 r#"validate_json_schema!(s'{{ "productUser": "valid@email.com" }}', "{path}", false)"#
16 )
17 .into_boxed_str(),
18 )
19});
20
21static EXAMPLE_JSON_SCHEMA_INVALID_EMAIL: LazyLock<&str> = LazyLock::new(|| {
22 let path =
23 example_path_or_basename("jsonschema/validate_json_schema/schema_with_email_format.json");
24
25 Box::leak(
26 format!(
27 r#"validate_json_schema!(s'{{ "productUser": "invalidEmail" }}', "{path}", false)"#
28 )
29 .into_boxed_str(),
30 )
31});
32
33static EXAMPLE_JSON_SCHEMA_CUSTOM_FORMAT_FALSE: LazyLock<&str> = LazyLock::new(|| {
34 let path =
35 example_path_or_basename("jsonschema/validate_json_schema/schema_with_custom_format.json");
36
37 Box::leak(
38 format!(r#"validate_json_schema!(s'{{ "productUser": "a-custom-formatted-string" }}', "{path}", false)"#)
39 .into_boxed_str(),
40 )
41});
42
43static EXAMPLE_JSON_SCHEMA_CUSTOM_FORMAT_TRUE: LazyLock<&str> = LazyLock::new(|| {
44 let path =
45 example_path_or_basename("jsonschema/validate_json_schema/schema_with_custom_format.json");
46
47 Box::leak(
48 format!(r#"validate_json_schema!(s'{{ "productUser": "a-custom-formatted-string" }}', "{path}", true)"#)
49 .into_boxed_str(),
50 )
51});
52
53static EXAMPLES: LazyLock<Vec<Example>> = LazyLock::new(|| {
54 vec![
55 example! {
56 title: "Payload contains a valid email",
57 source: &EXAMPLE_JSON_SCHEMA_VALID_EMAIL,
58 result: Ok("true"),
59 },
60 example! {
61 title: "Payload contains an invalid email",
62 source: &EXAMPLE_JSON_SCHEMA_INVALID_EMAIL,
63 result: Err(Box::leak(
64 format!(
65 r#"function call error for "validate_json_schema" at (0:{}): JSON schema validation failed: "invalidEmail" is not a "email" at /productUser"#,
66 EXAMPLE_JSON_SCHEMA_INVALID_EMAIL.len()
67 )
68 .into_boxed_str(),
69 )),
70 },
71 example! {
72 title: "Payload contains a custom format declaration",
73 source: &EXAMPLE_JSON_SCHEMA_CUSTOM_FORMAT_FALSE,
74 result: Err(Box::leak(
75 format!(
76 r#"function call error for "validate_json_schema" at (0:{}): Failed to compile schema: Unknown format: 'my-custom-format'. Adjust configuration to ignore unrecognized formats"#,
77 EXAMPLE_JSON_SCHEMA_CUSTOM_FORMAT_FALSE.len()
78 )
79 .into_boxed_str(),
80 )),
81 },
82 example! {
83 title: "Payload contains a custom format declaration, with ignore_unknown_formats set to true",
84 source: &EXAMPLE_JSON_SCHEMA_CUSTOM_FORMAT_TRUE,
85 result: Ok("true"),
86 },
87 ]
88});
89
90#[cfg(not(target_arch = "wasm32"))]
91use non_wasm::ValidateJsonSchemaFn;
92#[derive(Clone, Copy, Debug)]
93pub struct ValidateJsonSchema;
94
95impl Function for ValidateJsonSchema {
96 fn identifier(&self) -> &'static str {
97 "validate_json_schema"
98 }
99
100 fn usage(&self) -> &'static str {
101 "Check if `value` conforms to a JSON Schema definition. This function validates a JSON payload against a JSON Schema definition. It can be used to ensure that the data structure and types in `value` match the expectations defined in `schema_definition`."
102 }
103
104 fn category(&self) -> &'static str {
105 Category::Type.as_ref()
106 }
107
108 fn internal_failure_reasons(&self) -> &'static [&'static str] {
109 &[
110 "`value` is not a valid JSON Schema payload.",
111 "`value` contains custom format declarations and `ignore_unknown_formats` has not been set to `true`.",
112 "`schema_definition` is not a valid JSON Schema definition.",
113 "`schema_definition` file does not exist.",
114 ]
115 }
116
117 fn return_kind(&self) -> u16 {
118 kind::BOOLEAN
119 }
120
121 fn return_rules(&self) -> &'static [&'static str] {
122 &[
123 "Returns `true` if `value` conforms to the JSON Schema definition.",
124 "Returns `false` if `value` does not conform to the JSON Schema definition.",
125 ]
126 }
127
128 fn notices(&self) -> &'static [&'static str] {
129 &[indoc! {"
130 This function uses a compiled schema cache. The first time it is called with a specific
131 `schema_definition`, it will compile the schema and cache it for subsequent calls. This
132 improves performance when validating multiple values against the same schema. The cache
133 implementation is fairly naive and does not support refreshing the schema if it changes.
134 If you update the schema definition file, you must restart Vector to clear the cache.
135 "}]
136 }
137
138 fn examples(&self) -> &'static [Example] {
139 EXAMPLES.as_slice()
140 }
141
142 fn parameters(&self) -> &'static [Parameter] {
143 const PARAMETERS: &[Parameter] = &[
144 Parameter::required(
145 "value",
146 kind::BYTES,
147 "The value to check if it conforms to the JSON schema definition.",
148 ),
149 Parameter::required(
150 "schema_definition",
151 kind::BYTES,
152 "The location (path) of the JSON Schema definition.",
153 ),
154 Parameter::optional(
155 "ignore_unknown_formats",
156 kind::BOOLEAN,
157 "Unknown formats can be silently ignored by setting this to `true` and validation continues without failing due to those fields.",
158 ),
159 ];
160 PARAMETERS
161 }
162
163 #[cfg(not(target_arch = "wasm32"))]
164 fn compile(
165 &self,
166 state: &state::TypeState,
167 _ctx: &mut FunctionCompileContext,
168 arguments: ArgumentList,
169 ) -> Compiled {
170 let value = arguments.required("value");
171 let schema_definition = arguments.required_literal("schema_definition", state)?;
172 let ignore_unknown_formats = arguments
173 .optional("ignore_unknown_formats")
174 .unwrap_or(expr!(false));
175
176 let schema_file_str = schema_definition
177 .try_bytes_utf8_lossy()
178 .expect("schema definition file must be a string");
179
180 let schema_file_path = std::path::Path::new(schema_file_str.as_ref());
181
182 Ok(ValidateJsonSchemaFn {
183 value,
184 schema_path: PathBuf::from(schema_file_path),
185 ignore_unknown_formats,
186 }
187 .as_expr())
188 }
189
190 #[cfg(target_arch = "wasm32")]
191 fn compile(
192 &self,
193 _state: &state::TypeState,
194 ctx: &mut FunctionCompileContext,
195 _arguments: ArgumentList,
196 ) -> Compiled {
197 Ok(super::WasmUnsupportedFunction::new(ctx.span(), TypeDef::bytes().fallible()).as_expr())
198 }
199}
200
201#[cfg(not(target_arch = "wasm32"))]
202mod non_wasm {
203 use super::{
204 Context, Expression, FunctionExpression, Resolved, TypeDef, VrlValueConvert, state,
205 };
206 use crate::prelude::ExpressionError;
207 use crate::stdlib::json_utils::bom::StripBomFromUTF8;
208 use crate::value;
209 use jsonschema;
210 use std::collections::HashMap;
211 use std::path::{Path, PathBuf};
212 use std::sync::{Arc, LazyLock, RwLock};
213
214 static SCHEMA_CACHE: LazyLock<RwLock<HashMap<PathBuf, Arc<jsonschema::Validator>>>> =
217 LazyLock::new(|| RwLock::new(HashMap::new()));
218
219 #[derive(Debug, Clone)]
220 pub(super) struct ValidateJsonSchemaFn {
221 pub(super) value: Box<dyn Expression>,
222 pub(super) schema_path: PathBuf, pub(super) ignore_unknown_formats: Box<dyn Expression>,
224 }
225
226 impl FunctionExpression for ValidateJsonSchemaFn {
227 fn resolve(&self, ctx: &mut Context) -> Resolved {
228 let value = self.value.resolve(ctx)?;
229 let ignore_unknown_formats = self.ignore_unknown_formats.resolve(ctx)?.try_boolean()?;
230
231 let bytes = value.try_bytes()?;
233 let stripped_bytes = bytes.strip_bom();
234
235 if bytes.is_empty() {
237 return Err(ExpressionError::from("Empty JSON value")); }
239
240 let json_value = if stripped_bytes.is_empty() {
242 serde_json::Value::Null
243 } else {
244 serde_json::from_slice(stripped_bytes).map_err(|e| format!("Invalid JSON: {e}"))?
245 };
246
247 let schema_validator =
248 get_or_compile_schema(&self.schema_path, ignore_unknown_formats)?;
249
250 let validation_errors = schema_validator
251 .iter_errors(&json_value)
252 .map(|e| {
253 format!(
254 "{} at {}",
255 e,
256 if e.instance_path().as_str().is_empty() {
257 "/"
258 } else {
259 e.instance_path().as_str()
260 }
261 )
262 })
263 .collect::<Vec<String>>()
264 .join(", ");
265
266 if validation_errors.is_empty() {
267 Ok(value!(true))
268 } else {
269 Err(ExpressionError::from(format!(
270 "JSON schema validation failed: {validation_errors}"
271 )))
272 }
273 }
274
275 fn type_def(&self, _: &state::TypeState) -> TypeDef {
276 TypeDef::boolean().fallible()
277 }
278 }
279
280 pub(super) fn get_json_schema_definition(path: &Path) -> Result<serde_json::Value, String> {
286 let b = std::fs::read(path).map_err(|e| {
287 format!(
288 "Failed to open schema definition file '{}': {e}",
289 path.display()
290 )
291 })?;
292 let schema: serde_json::Value = serde_json::from_slice(&b).map_err(|e| {
293 format!(
294 "Failed to parse schema definition file '{}': {e}",
295 path.display()
296 )
297 })?;
298 Ok(schema)
299 }
300
301 pub(super) fn get_or_compile_schema(
302 schema_path: &Path,
303 ignore_unknown_formats: bool,
304 ) -> Result<Arc<jsonschema::Validator>, String> {
305 {
307 let cache = SCHEMA_CACHE.read().unwrap();
308 if let Some(schema) = cache.get(schema_path) {
309 return Ok(schema.clone());
310 }
311 }
312
313 let mut cache = SCHEMA_CACHE.write().unwrap();
315
316 if let Some(schema) = cache.get(schema_path) {
318 return Ok(schema.clone());
319 }
320
321 let schema_definition = get_json_schema_definition(schema_path)
322 .map_err(|e| format!("JSON schema not found: {e}"))?;
323
324 let compiled_schema = jsonschema::options()
326 .should_validate_formats(true)
327 .should_ignore_unknown_formats(ignore_unknown_formats)
328 .build(&schema_definition)
329 .map_err(|e| format!("Failed to compile schema: {e}"))?;
330
331 let compiled_schema = Arc::new(compiled_schema);
332 cache.insert(schema_path.to_path_buf(), compiled_schema.clone());
333 Ok(compiled_schema)
334 }
335}
336
337#[cfg(test)]
338mod tests {
339 use super::*;
340 use crate::value;
341 use std::env;
342
343 fn test_data_dir() -> PathBuf {
344 PathBuf::from(env::var_os("CARGO_MANIFEST_DIR").unwrap()).join("tests/data/jsonschema/")
345 }
346
347 test_function![
348 validate_json_schema => ValidateJsonSchema;
349
350 valid_with_email_format_json {
351 args: func_args![
352 value: value!("{\"productUser\":\"email@domain.com\"}"),
353 schema_definition: test_data_dir().join("validate_json_schema/schema_with_email_format.json").to_str().unwrap().to_owned(),
354 ignore_unknown_formats: false],
355 want: Ok(value!(true)),
356 tdef: TypeDef::boolean().fallible(),
357 }
358
359 valid_with_array_of_things_json {
360 args: func_args![
361 value: value!("{\"fruits\":[\"apple\",\"orange\",\"pear\"],\"vegetables\":[{\"veggieName\":\"potato\",\"veggieLike\":true},{\"veggieName\":\"broccoli\",\"veggieLike\":false}]}"),
362 schema_definition: test_data_dir().join("validate_json_schema/schema_arrays_of_things.json").to_str().unwrap().to_owned(),
363 ignore_unknown_formats: false],
364 want: Ok(value!(true)),
365 tdef: TypeDef::boolean().fallible(),
366 }
367
368 invalid_email_json {
369 args: func_args![
370 value: value!("{\"productUser\":\"invalid-email\"}"),
371 schema_definition: test_data_dir().join("validate_json_schema/schema_with_email_format.json").to_str().unwrap().to_owned(),
372 ignore_unknown_formats: false],
373 want: Err("JSON schema validation failed: \"invalid-email\" is not a \"email\" at /productUser"),
374 tdef: TypeDef::boolean().fallible(),
375 }
376
377 custom_format_ignored_json {
378 args: func_args![
379 value: value!("{\"productUser\":\"just-a-string\"}"),
380 schema_definition: test_data_dir().join("validate_json_schema/schema_with_custom_format.json").to_str().unwrap().to_owned(),
381 ignore_unknown_formats: true],
382 want: Ok(value!(true)),
383 tdef: TypeDef::boolean().fallible(),
384 }
385
386 invalid_empty_json {
387 args: func_args![
388 value: value!(""),
389 schema_definition: test_data_dir().join("validate_json_schema/schema_with_email_format.json").to_str().unwrap().to_owned(),
390 ignore_unknown_formats: false],
391 want: Err("Empty JSON value"),
392 tdef: TypeDef::boolean().fallible(),
393 }
394
395 ];
396}