1use std::collections::HashMap;
2
3use serde_json::{
4 Error, Map,
5 value::{RawValue, Value as JsonValue},
6};
7
8use crate::compiler::prelude::*;
9use crate::stdlib::json_utils::bom::StripBomFromUTF8;
10use crate::stdlib::json_utils::json_type_def::json_type_def;
11use std::sync::LazyLock;
12
13static DEFAULT_LOSSY: LazyLock<Value> = LazyLock::new(|| Value::Boolean(true));
14
15static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
16 vec![
17 Parameter::required(
18 "value",
19 kind::BYTES,
20 "The string representation of the JSON to parse.",
21 ),
22 Parameter::optional(
23 "max_depth",
24 kind::INTEGER,
25 "Number of layers to parse for nested JSON-formatted documents.
26The value must be in the range of 1 to 128.",
27 ),
28 Parameter::optional(
29 "lossy",
30 kind::BOOLEAN,
31 "Whether to parse the JSON in a lossy manner. Replaces invalid UTF-8 characters
32with the Unicode character `�` (U+FFFD) if set to true, otherwise returns an error
33if there are any invalid UTF-8 characters present.",
34 )
35 .default(&DEFAULT_LOSSY),
36 ]
37});
38
39fn parse_json(value: Value, lossy: Value) -> Resolved {
40 let lossy = lossy.try_boolean()?;
41 Ok(if lossy {
42 serde_json::from_str(value.try_bytes_utf8_lossy()?.strip_bom())
43 } else {
44 serde_json::from_slice(value.try_bytes()?.strip_bom())
45 }
46 .map_err(|e| format!("unable to parse json: {e}"))?)
47}
48
49fn parse_json_with_depth(value: Value, max_depth: Value, lossy: Value) -> Resolved {
52 let parsed_depth = validate_depth(max_depth)?;
53 let lossy = lossy.try_boolean()?;
54 let bytes = if lossy {
55 value.try_bytes_utf8_lossy()?.into_owned().into()
56 } else {
57 value.try_bytes()?
58 };
59
60 let raw_value = serde_json::from_slice::<'_, &RawValue>(&bytes)
61 .map_err(|e| format!("unable to read json: {e}"))?;
62
63 let res = parse_layer(raw_value, parsed_depth)
64 .map_err(|e| format!("unable to parse json with max depth: {e}"))?;
65
66 Ok(Value::from(res))
67}
68
69fn parse_layer(value: &RawValue, remaining_depth: u8) -> std::result::Result<JsonValue, Error> {
70 let raw_value = value.get();
71
72 if raw_value.starts_with('{') {
74 if remaining_depth == 0 {
75 serde_json::value::to_value(raw_value)
78 } else {
79 let map: HashMap<String, &RawValue> = serde_json::from_str(raw_value)?;
81
82 let mut res_map: Map<String, JsonValue> = Map::with_capacity(map.len());
83 for (k, v) in map {
84 res_map.insert(k, parse_layer(v, remaining_depth - 1)?);
85 }
86 Ok(serde_json::Value::from(res_map))
87 }
88 } else if raw_value.starts_with('[') {
90 if remaining_depth == 0 {
91 serde_json::value::to_value(raw_value)
94 } else {
95 let arr: Vec<&RawValue> = serde_json::from_str(raw_value)?;
97
98 let mut res_arr: Vec<JsonValue> = Vec::with_capacity(arr.len());
99 for v in arr {
100 res_arr.push(parse_layer(v, remaining_depth - 1)?);
101 }
102 Ok(serde_json::Value::from(res_arr))
103 }
104 } else {
107 serde_json::from_str(raw_value)
108 }
109}
110
111fn validate_depth(value: Value) -> ExpressionResult<u8> {
112 let res = value.try_integer()?;
113 let res = u8::try_from(res).map_err(|e| e.to_string())?;
114
115 if (1..=128).contains(&res) {
121 Ok(res)
122 } else {
123 Err(ExpressionError::from(format!(
124 "max_depth value should be greater than 0 and less than 128, got {res}"
125 )))
126 }
127}
128
129#[derive(Clone, Copy, Debug)]
130pub struct ParseJson;
131
132impl Function for ParseJson {
133 fn identifier(&self) -> &'static str {
134 "parse_json"
135 }
136
137 fn summary(&self) -> &'static str {
138 "parse a string to a JSON type"
139 }
140
141 fn usage(&self) -> &'static str {
142 indoc! {"
143 Parses the provided `value` as JSON.
144
145 Only JSON types are returned. If you need to convert a `string` into a `timestamp`,
146 consider the `parse_timestamp` function.
147 "}
148 }
149
150 fn category(&self) -> &'static str {
151 Category::Parse.as_ref()
152 }
153
154 fn internal_failure_reasons(&self) -> &'static [&'static str] {
155 &["`value` is not a valid JSON-formatted payload."]
156 }
157
158 fn return_kind(&self) -> u16 {
159 kind::BOOLEAN
160 | kind::INTEGER
161 | kind::FLOAT
162 | kind::BYTES
163 | kind::OBJECT
164 | kind::ARRAY
165 | kind::NULL
166 }
167
168 fn notices(&self) -> &'static [&'static str] {
169 &[indoc! {"
170 Only JSON types are returned. If you need to convert a `string` into a `timestamp`,
171 consider the [`parse_timestamp`](#parse_timestamp) function.
172 "}]
173 }
174
175 fn parameters(&self) -> &'static [Parameter] {
176 PARAMETERS.as_slice()
177 }
178
179 fn examples(&self) -> &'static [Example] {
180 &[
181 example! {
182 title: "Parse JSON",
183 source: r#"parse_json!(s'{"key": "val"}')"#,
184 result: Ok(r#"{ "key": "val" }"#),
185 },
186 example! {
187 title: "Parse JSON array",
188 source: r#"parse_json!("[true, 0]")"#,
189 result: Ok("[true, 0]"),
190 },
191 example! {
192 title: "Parse JSON string",
193 source: r#"parse_json!(s'"hello"')"#,
194 result: Ok("hello"),
195 },
196 example! {
197 title: "Parse JSON integer",
198 source: r#"parse_json!("42")"#,
199 result: Ok("42"),
200 },
201 example! {
202 title: "Parse JSON float",
203 source: r#"parse_json!("42.13")"#,
204 result: Ok("42.13"),
205 },
206 example! {
207 title: "Parse JSON boolean",
208 source: r#"parse_json!("false")"#,
209 result: Ok("false"),
210 },
211 example! {
212 title: "Invalid JSON value",
213 source: r#"parse_json!("{ INVALID }")"#,
214 result: Err(
215 r#"function call error for "parse_json" at (0:26): unable to parse json: key must be a string at line 1 column 3"#,
216 ),
217 },
218 example! {
219 title: "Parse JSON with max_depth",
220 source: r#"parse_json!(s'{"first_level":{"second_level":"finish"}}', max_depth: 1)"#,
221 result: Ok(r#"{"first_level":"{\"second_level\":\"finish\"}"}"#),
222 },
223 ]
224 }
225
226 fn compile(
227 &self,
228 _state: &state::TypeState,
229 _ctx: &mut FunctionCompileContext,
230 arguments: ArgumentList,
231 ) -> Compiled {
232 let value = arguments.required("value");
233 let max_depth = arguments.optional("max_depth");
234 let lossy = arguments.optional("lossy");
235
236 match max_depth {
237 Some(max_depth) => Ok(ParseJsonMaxDepthFn {
238 value,
239 max_depth,
240 lossy,
241 }
242 .as_expr()),
243 None => Ok(ParseJsonFn { value, lossy }.as_expr()),
244 }
245 }
246}
247
248#[derive(Debug, Clone)]
249struct ParseJsonFn {
250 value: Box<dyn Expression>,
251 lossy: Option<Box<dyn Expression>>,
252}
253
254impl FunctionExpression for ParseJsonFn {
255 fn resolve(&self, ctx: &mut Context) -> Resolved {
256 let value = self.value.resolve(ctx)?;
257 let lossy = self
258 .lossy
259 .map_resolve_with_default(ctx, || DEFAULT_LOSSY.clone())?;
260 parse_json(value, lossy)
261 }
262
263 fn type_def(&self, _: &state::TypeState) -> TypeDef {
264 json_type_def()
265 }
266}
267
268#[derive(Debug, Clone)]
269struct ParseJsonMaxDepthFn {
270 value: Box<dyn Expression>,
271 max_depth: Box<dyn Expression>,
272 lossy: Option<Box<dyn Expression>>,
273}
274
275impl FunctionExpression for ParseJsonMaxDepthFn {
276 fn resolve(&self, ctx: &mut Context) -> Resolved {
277 let value = self.value.resolve(ctx)?;
278 let max_depth = self.max_depth.resolve(ctx)?;
279 let lossy = self
280 .lossy
281 .map_resolve_with_default(ctx, || DEFAULT_LOSSY.clone())?;
282 parse_json_with_depth(value, max_depth, lossy)
283 }
284
285 fn type_def(&self, _: &state::TypeState) -> TypeDef {
286 json_type_def()
287 }
288}
289
290#[cfg(test)]
291mod tests {
292 use super::*;
293 use crate::value;
294
295 test_function![
296 parse_json => ParseJson;
297
298 parses {
299 args: func_args![ value: r#"{"field": "value"}"# ],
300 want: Ok(value!({ field: "value" })),
301 tdef: json_type_def(),
302 }
303
304 complex_json {
305 args: func_args![ value: r#"{"object": {"string":"value","number":42,"array":["hello","world"],"boolean":false}}"# ],
306 want: Ok(value!({ object: {string: "value", number: 42, array: ["hello", "world"], boolean: false} })),
307 tdef: json_type_def(),
308 }
309
310 invalid_json_errors {
311 args: func_args![ value: r#"{"field": "value"# ],
312 want: Err("unable to parse json: EOF while parsing a string at line 1 column 16"),
313 tdef: json_type_def(),
314 }
315
316 max_depth {
317 args: func_args![ value: r#"{"top_layer": {"layer_one": "finish", "layer_two": 2}}"#, max_depth: 1],
318 want: Ok(value!({ top_layer: r#"{"layer_one": "finish", "layer_two": 2}"# })),
319 tdef: json_type_def(),
320 }
321
322 max_depth_array {
323 args: func_args![ value: r#"[{"top_layer": {"next_layer": ["finish"]}}]"#, max_depth: 2],
324 want: Ok(value!([{ top_layer: r#"{"next_layer": ["finish"]}"# }])),
325 tdef: json_type_def(),
326 }
327
328 max_depth_exceeds_layers {
329 args: func_args![ value: r#"{"top_layer": {"layer_one": "finish", "layer_two": 2}}"#, max_depth: 10],
330 want: Ok(value!({ top_layer: {layer_one: "finish", layer_two: 2} })),
331 tdef: json_type_def(),
332 }
333
334 invalid_json_with_max_depth {
335 args: func_args![ value: r#"{"field": "value"#, max_depth: 3 ],
336 want: Err("unable to read json: EOF while parsing a string at line 1 column 16"),
337 tdef: json_type_def(),
338 }
339
340 invalid_input_max_depth {
341 args: func_args![ value: r#"{"top_layer": "finish"}"#, max_depth: 129],
342 want: Err("max_depth value should be greater than 0 and less than 128, got 129"),
343 tdef: json_type_def(),
344 }
345
346 max_int {
348 args: func_args![ value: format!("{{\"num\": {}}}", i64::MAX - 1)],
349 want: Ok(value!({"num": 9_223_372_036_854_775_806_i64})),
350 tdef: json_type_def(),
351 }
352
353 lossy_float_conversion {
354 args: func_args![ value: r#"{"num": 9223372036854775808}"#],
355 want: Ok(value!({"num": 9.223_372_036_854_776e18})),
356 tdef: json_type_def(),
357 }
358
359 parse_invalid_utf8_default_lossy_arg {
361 args: func_args![ value: Bytes::from_static(&[0x22,0xf5,0x22])],
364 want: Ok(value!(std::char::REPLACEMENT_CHARACTER.to_string())),
365 tdef: json_type_def(),
366 }
367
368 parse_invalid_utf8_lossy_arg_true {
369 args: func_args![ value: Bytes::from_static(&[0x22,0xf5,0x22]), lossy: true],
371 want: Ok(value!(std::char::REPLACEMENT_CHARACTER.to_string())),
373 tdef: json_type_def(),
374 }
375
376 invalid_utf8_json_lossy_arg_false {
377 args: func_args![ value: Bytes::from_static(&[0x22,0xf5,0x22]), lossy: false],
378 want: Err("unable to parse json: invalid unicode code point at line 1 column 3"),
379 tdef: json_type_def(),
380 }
381
382 json_bom {
383 args: func_args![ value: Bytes::from_static(&[0xef,0xbb,0xbf,0x7b,0x7d]), lossy: false],
385 want: Ok(value!({})),
386 tdef: json_type_def(),
387 }
388
389 json_bom_lossy {
390 args: func_args![ value: Bytes::from_static(&[0xef,0xbb,0xbf,0x7b,0x7d]), lossy: true],
391 want: Ok(value!({})),
392 tdef: json_type_def(),
393 }
394 ];
395
396 #[cfg(not(feature = "float_roundtrip"))]
397 test_function![
398 parse_json => ParseJson;
399
400 no_roundtrip_float_conversion {
401 args: func_args![ value: r#"{"num": 1626175065.5934923}"#],
402 want: Ok(value!({"num": 1_626_175_065.593_492_5})),
403 tdef: json_type_def(),
404 }
405 ];
406
407 #[cfg(feature = "float_roundtrip")]
408 test_function![
409 parse_json => ParseJson;
410
411 roundtrip_float_conversion {
412 args: func_args![ value: r#"{"num": 1626175065.5934923}"#],
413 want: Ok(value!({"num": 1_626_175_065.593_492_3})),
414 tdef: json_type_def(),
415 }
416 ];
417}