1use crate::compiler::prelude::*;
2use crate::parsing::xml::{
3 DEFAULT_ALWAYS_USE_TEXT_KEY, DEFAULT_ATTR_PREFIX, DEFAULT_INCLUDE_ATTR, DEFAULT_PARSE_BOOL,
4 DEFAULT_PARSE_NULL, DEFAULT_PARSE_NUMBER, DEFAULT_TEXT_KEY, DEFAULT_TRIM, ParseOptions,
5 parse_xml,
6};
7use std::sync::LazyLock;
8
9static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
10 vec![
11 Parameter::required(
12 "value",
13 kind::BYTES,
14 "The string representation of the XML document to parse.",
15 ),
16 Parameter::optional(
17 "trim",
18 kind::BOOLEAN,
19 "Remove excess whitespace between XML elements.",
20 )
21 .default(&DEFAULT_TRIM),
22 Parameter::optional(
23 "include_attr",
24 kind::BOOLEAN,
25 "Include XML tag attributes in the returned object.",
26 )
27 .default(&DEFAULT_INCLUDE_ATTR),
28 Parameter::optional(
29 "attr_prefix",
30 kind::BYTES,
31 "String prefix to use for XML tag attribute keys.",
32 )
33 .default(&DEFAULT_ATTR_PREFIX),
34 Parameter::optional(
35 "text_key",
36 kind::BYTES,
37 "Key name to use for expanded text nodes.",
38 )
39 .default(&DEFAULT_TEXT_KEY),
40 Parameter::optional(
41 "always_use_text_key",
42 kind::BOOLEAN,
43 "Always return text nodes as `{\"<text_key>\": \"value\"}.`",
44 )
45 .default(&DEFAULT_ALWAYS_USE_TEXT_KEY),
46 Parameter::optional(
47 "parse_bool",
48 kind::BOOLEAN,
49 "Parse \"true\" and \"false\" as boolean.",
50 )
51 .default(&DEFAULT_PARSE_BOOL),
52 Parameter::optional("parse_null", kind::BOOLEAN, "Parse \"null\" as null.")
53 .default(&DEFAULT_PARSE_NULL),
54 Parameter::optional(
55 "parse_number",
56 kind::BOOLEAN,
57 "Parse numbers as integers/floats.",
58 )
59 .default(&DEFAULT_PARSE_NUMBER),
60 ]
61});
62
63#[derive(Clone, Copy, Debug)]
64pub struct ParseXml;
65
66impl Function for ParseXml {
67 fn identifier(&self) -> &'static str {
68 "parse_xml"
69 }
70
71 fn usage(&self) -> &'static str {
72 "Parses the `value` as XML."
73 }
74
75 fn category(&self) -> &'static str {
76 Category::Parse.as_ref()
77 }
78
79 fn internal_failure_reasons(&self) -> &'static [&'static str] {
80 &["`value` is not a valid XML document."]
81 }
82
83 fn return_kind(&self) -> u16 {
84 kind::OBJECT
85 }
86
87 fn notices(&self) -> &'static [&'static str] {
88 &["Valid XML must contain exactly one root node. Always returns an object."]
89 }
90
91 fn examples(&self) -> &'static [Example] {
92 &[example! {
93 title: "Parse XML",
94 source: indoc! {r#"
95 value = s'<book category="CHILDREN"><title lang="en">Harry Potter</title><author>J K. Rowling</author><year>2005</year></book>';
96
97 parse_xml!(value, text_key: "value", parse_number: false)
98 "#},
99 result: Ok(
100 r#"{ "book": { "@category": "CHILDREN", "author": "J K. Rowling", "title": { "@lang": "en", "value": "Harry Potter" }, "year": "2005" } }"#,
101 ),
102 }]
103 }
104
105 fn compile(
106 &self,
107 _state: &state::TypeState,
108 _ctx: &mut FunctionCompileContext,
109 arguments: ArgumentList,
110 ) -> Compiled {
111 let value = arguments.required("value");
112
113 let trim = arguments.optional("trim");
114 let include_attr = arguments.optional("include_attr");
115 let attr_prefix = arguments.optional("attr_prefix");
116 let text_key = arguments.optional("text_key");
117 let always_use_text_key = arguments.optional("always_use_text_key");
118 let parse_bool = arguments.optional("parse_bool");
119 let parse_null = arguments.optional("parse_null");
120 let parse_number = arguments.optional("parse_number");
121
122 Ok(ParseXmlFn {
123 value,
124 trim,
125 include_attr,
126 attr_prefix,
127 text_key,
128 always_use_text_key,
129 parse_bool,
130 parse_null,
131 parse_number,
132 }
133 .as_expr())
134 }
135
136 fn parameters(&self) -> &'static [Parameter] {
137 PARAMETERS.as_slice()
138 }
139}
140
141#[derive(Debug, Clone)]
142struct ParseXmlFn {
143 value: Box<dyn Expression>,
144
145 trim: Option<Box<dyn Expression>>,
146 include_attr: Option<Box<dyn Expression>>,
147 attr_prefix: Option<Box<dyn Expression>>,
148 text_key: Option<Box<dyn Expression>>,
149 always_use_text_key: Option<Box<dyn Expression>>,
150 parse_bool: Option<Box<dyn Expression>>,
151 parse_null: Option<Box<dyn Expression>>,
152 parse_number: Option<Box<dyn Expression>>,
153}
154
155impl FunctionExpression for ParseXmlFn {
156 fn resolve(&self, ctx: &mut Context) -> Resolved {
157 let value = self.value.resolve(ctx)?;
158
159 let options = ParseOptions {
160 trim: self
161 .trim
162 .as_ref()
163 .map(|expr| expr.resolve(ctx))
164 .transpose()?,
165
166 include_attr: self
167 .include_attr
168 .as_ref()
169 .map(|expr| expr.resolve(ctx))
170 .transpose()?,
171
172 attr_prefix: self
173 .attr_prefix
174 .as_ref()
175 .map(|expr| expr.resolve(ctx))
176 .transpose()?,
177
178 text_key: self
179 .text_key
180 .as_ref()
181 .map(|expr| expr.resolve(ctx))
182 .transpose()?,
183
184 always_use_text_key: self
185 .always_use_text_key
186 .as_ref()
187 .map(|expr| expr.resolve(ctx))
188 .transpose()?,
189
190 parse_bool: self
191 .parse_bool
192 .as_ref()
193 .map(|expr| expr.resolve(ctx))
194 .transpose()?,
195
196 parse_null: self
197 .parse_null
198 .as_ref()
199 .map(|expr| expr.resolve(ctx))
200 .transpose()?,
201
202 parse_number: self
203 .parse_number
204 .as_ref()
205 .map(|expr| expr.resolve(ctx))
206 .transpose()?,
207 };
208
209 parse_xml(value, options)
210 }
211
212 fn type_def(&self, _: &state::TypeState) -> TypeDef {
213 type_def()
214 }
215}
216
217fn type_def() -> TypeDef {
218 TypeDef::bytes()
219 .or_object(Collection::from_unknown(inner_kind()))
220 .fallible()
221}
222
223fn inner_kind() -> Kind {
224 Kind::object(Collection::any())
225}
226
227#[cfg(test)]
228mod tests {
229 use super::*;
230 use crate::value;
231
232 test_function![
233 parse_xml => ParseXml;
234
235 simple_text {
236 args: func_args![ value: "<a>test</a>" ],
237 want: Ok(value!({ "a": "test" })),
238 tdef: type_def(),
239 }
240
241 include_attr {
242 args: func_args![ value: r#"<a href="https://vector.dev">test</a>"# ],
243 want: Ok(value!({ "a": { "@href": "https://vector.dev", "text": "test" } })),
244 tdef: type_def(),
245 }
246
247 exclude_attr {
248 args: func_args![ value: r#"<a href="https://vector.dev">test</a>"#, include_attr: false ],
249 want: Ok(value!({ "a": "test" })),
250 tdef: type_def(),
251 }
252
253 custom_text_key {
254 args: func_args![ value: "<b>test</b>", text_key: "node", always_use_text_key: true ],
255 want: Ok(value!({ "b": { "node": "test" } })),
256 tdef: type_def(),
257 }
258
259 include_attributes_if_single_node {
261 args: func_args![ value: r#"<root><node attr="value"><message>foo</message></node></root>"# ],
262 want: Ok(value!({ "root": { "node": { "@attr": "value", "message": "foo" } } })),
263 tdef: type_def(),
264 }
265
266 include_attributes_multiple_children {
268 args: func_args![ value: r#"<root><node attr="value"><message>bar</message></node><node attr="value"><message>baz</message></node></root>"#],
269 want: Ok(value!({"root":{ "node":[ { "@attr": "value", "message": "bar" }, { "@attr": "value", "message": "baz" } ] } })),
270 tdef: type_def(),
271 }
272
273 nested_object {
274 args: func_args![ value: r#"<a attr="value"><b>one</b><c>two</c></a>"# ],
275 want: Ok(value!({ "a": { "@attr": "value", "b": "one", "c": "two" } })),
276 tdef: type_def(),
277 }
278
279 nested_object_array {
280 args: func_args![ value: "<a><b>one</b><b>two</b></a>" ],
281 want: Ok(value!({ "a": { "b": ["one", "two"] } })),
282 tdef: type_def(),
283 }
284
285 header_and_comments {
286 args: func_args![ value: indoc!{r#"
287 <?xml version="1.0" encoding="ISO-8859-1"?>
288 <!-- Example found somewhere in the deep depths of the web -->
289 <note>
290 <to>Tove</to>
291 <!-- Randomly inserted inner comment -->
292 <from>Jani</from>
293 <heading>Reminder</heading>
294 <body>Don't forget me this weekend!</body>
295 </note>
296
297 <!-- Could literally be placed anywhere -->
298 "#}],
299 want: Ok(value!(
300 {
301 "note": {
302 "to": "Tove",
303 "from": "Jani",
304 "heading": "Reminder",
305 "body": "Don't forget me this weekend!"
306 }
307 }
308 )),
309 tdef: type_def(),
310 }
311
312 header_inside_element {
313 args: func_args![ value: "<p><?xml?>text123</p>" ],
314 want: Ok(value!(
315 {
316 "p": {
317 "text": "text123"
318 }
319 }
320 )),
321 tdef: type_def(),
322 }
323
324 mixed_types {
325 args: func_args![ value: indoc!{r#"
326 <?xml version="1.0" encoding="ISO-8859-1"?>
327 <!-- Mixed types -->
328 <data>
329 <!-- Booleans -->
330 <item>true</item>
331 <item>false</item>
332 <!-- String -->
333 <item>string!</item>
334 <!-- Empty object -->
335 <item />
336 <!-- Literal value "null" -->
337 <item>null</item>
338 <!-- Integer -->
339 <item>1</item>
340 <!-- Float -->
341 <item>1.0</item>
342 </data>
343 "#}],
344 want: Ok(value!(
345 {
346 "data": {
347 "item": [
348 true,
349 false,
350 "string!",
351 {},
352 null,
353 1,
354 1.0
355 ]
356 }
357 }
358 )),
359 tdef: type_def(),
360 }
361
362 just_strings {
363 args: func_args![ value: indoc!{r#"
364 <?xml version="1.0" encoding="ISO-8859-1"?>
365 <!-- All scalar types are just strings -->
366 <data>
367 <item>true</item>
368 <item>false</item>
369 <item>string!</item>
370 <!-- Still an empty object -->
371 <item />
372 <item>null</item>
373 <item>1</item>
374 <item>1.0</item>
375 </data>
376 "#}, parse_null: false, parse_bool: false, parse_number: false],
377 want: Ok(value!(
378 {
379 "data": {
380 "item": [
381 "true",
382 "false",
383 "string!",
384 {},
385 "null",
386 "1",
387 "1.0"
388 ]
389 }
390 }
391 )),
392 tdef: type_def(),
393 }
394
395 untrimmed {
396 args: func_args![ value: "<root> <a>test</a> </root>", trim: false ],
397 want: Ok(value!(
398 {
399 "root": {
400 "a": "test",
401 "text": [" ", " "],
402 }
403 }
404 )),
405 tdef: type_def(),
406 }
407
408 invalid_token {
409 args: func_args![ value: "true" ],
410 want: Err("unable to parse xml: unknown token at 1:1"),
411 tdef: type_def(),
412 }
413
414 flat_parent_property {
415 args: func_args![ value: indoc!{r#"
416 <?xml version="1.0" encoding="UTF-8"?>
417 <MY_XML>
418 <property1>
419 <property1_a>a</property1_a>
420 <property1_b>b</property1_b>
421 <property1_c>c</property1_c>
422 </property1>
423 <property2>
424 <property2_object>
425 <property2a_a>a</property2a_a>
426 <property2a_b>b</property2a_b>
427 <property2a_c>c</property2a_c>
428 </property2_object>
429 </property2>
430 </MY_XML>
431 "#}],
432 want: Ok(value!(
433 {
434 "MY_XML": {
435 "property1": {
436 "property1_a": "a",
437 "property1_b": "b",
438 "property1_c": "c"
439 },
440 "property2": {
441 "property2_object": {
442 "property2a_a": "a",
443 "property2a_b": "b",
444 "property2a_c": "c"
445 }
446 }
447 }
448 }
449 )),
450 tdef: type_def(),
451 }
452
453 nested_parent_property {
454 args: func_args![ value: indoc!{r#"
455 <?xml version="1.0" encoding="UTF-8"?>
456 <MY_XML>
457 <property1>
458 <property1_a>a</property1_a>
459 <property1_b>b</property1_b>
460 <property1_c>c</property1_c>
461 </property1>
462 <property2>
463 <property2_object>
464 <property2a_a>a</property2a_a>
465 <property2a_b>b</property2a_b>
466 <property2a_c>c</property2a_c>
467 </property2_object>
468 <property2_object>
469 <property2a_a>a</property2a_a>
470 <property2a_b>b</property2a_b>
471 <property2a_c>c</property2a_c>
472 </property2_object>
473 </property2>
474 </MY_XML>
475 "#}],
476 want: Ok(value!(
477 {
478 "MY_XML": {
479 "property1": {
480 "property1_a": "a",
481 "property1_b": "b",
482 "property1_c": "c"
483 },
484 "property2": {
485 "property2_object": [
486 {
487 "property2a_a": "a",
488 "property2a_b": "b",
489 "property2a_c": "c"
490 },
491 {
492 "property2a_a": "a",
493 "property2a_b": "b",
494 "property2a_c": "c"
495 }
496 ]
497 }
498 }
499 }
500 )),
501 tdef: type_def(),
502 }
503
504 if_no_sibling {
505 args: func_args![ value: "<root><a>test</a></root>"],
506 want: Ok(value!({ "root": { "a": "test" } })),
507 tdef: type_def(),
508 }
509
510 if_no_sibling2 {
511 args: func_args![ value: "<root><a><a1>test</a1></a><b>test2</b></root>"],
512 want: Ok(value!({ "root": { "a": { "a1": "test" }, "b" : "test2" } })),
513 tdef: type_def(),
514 }
515 ];
516
517 #[test]
518 fn test_kind() {
519 let state = state::TypeState::default();
520
521 let func = ParseXmlFn {
522 value: value!(true).into_expression(),
523 trim: None,
524 include_attr: None,
525 attr_prefix: None,
526 text_key: None,
527 always_use_text_key: None,
528 parse_bool: None,
529 parse_null: None,
530 parse_number: None,
531 };
532
533 let type_def = func.type_def(&state);
534
535 assert!(type_def.is_fallible());
536 assert!(!type_def.is_exact());
537 assert!(type_def.contains_bytes());
538 assert!(type_def.contains_object());
539
540 let object1 = type_def.as_object().unwrap();
541
542 assert!(object1.known().is_empty());
543 assert!(object1.unknown_kind().contains_object());
544
545 let object2 = object1.unknown_kind().as_object().cloned().unwrap();
546
547 assert!(object2.known().is_empty());
548 assert!(object2.unknown_kind().is_any());
549 }
550}