vrl/core/
encode_key_value.rs

1use std::{
2    collections::BTreeMap,
3    fmt::{self, Write},
4};
5
6use serde::ser::{
7    Error, Serialize, SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant,
8    SerializeTuple, SerializeTupleStruct, SerializeTupleVariant, Serializer,
9};
10
11use crate::value::KeyString;
12
13#[derive(Debug, snafu::Snafu)]
14pub enum EncodingError {
15    #[snafu(display("Key is not String."))]
16    KeyNotString,
17    #[snafu(display("Encoding error: {}.", msg))]
18    Other { msg: String },
19}
20
21impl Error for EncodingError {
22    fn custom<T>(msg: T) -> Self
23    where
24        T: fmt::Display,
25    {
26        Self::Other {
27            msg: msg.to_string(),
28        }
29    }
30}
31
32/// Encodes input to key value format with specified
33/// delimiters in field order where unspecified fields
34/// will follow after them. `Flattens_boolean` values
35/// to only a key if true.
36///
37/// # Errors
38///
39/// Returns an `EncodingError` if the input contains non-`String` map keys.
40pub fn to_string<V: Serialize>(
41    input: &BTreeMap<KeyString, V>,
42    fields_order: &[KeyString],
43    key_value_delimiter: &str,
44    field_delimiter: &str,
45    flatten_boolean: bool,
46) -> Result<String, EncodingError> {
47    let mut output = String::new();
48
49    let mut input = flatten(input, '.')?;
50
51    for field in fields_order {
52        match (input.remove(field), flatten_boolean) {
53            (Some(Data::Boolean(false)), true) | (None, _) => (),
54            (Some(Data::Boolean(true)), true) => {
55                encode_string(&mut output, field);
56                output.push_str(field_delimiter);
57            }
58            (Some(value), _) => {
59                encode_field(&mut output, field, &value.to_string(), key_value_delimiter);
60                output.push_str(field_delimiter);
61            }
62        }
63    }
64
65    for (key, value) in &input {
66        match (value, flatten_boolean) {
67            (Data::Boolean(false), true) => (),
68            (Data::Boolean(true), true) => {
69                encode_string(&mut output, key);
70                output.push_str(field_delimiter);
71            }
72            (_, _) => {
73                encode_field(&mut output, key, &value.to_string(), key_value_delimiter);
74                output.push_str(field_delimiter);
75            }
76        }
77    }
78
79    if output.ends_with(field_delimiter) {
80        output.truncate(output.len() - field_delimiter.len());
81    }
82
83    Ok(output)
84}
85
86fn flatten<'a>(
87    input: impl IntoIterator<Item = (&'a KeyString, impl Serialize)> + 'a,
88    separator: char,
89) -> Result<BTreeMap<KeyString, Data>, EncodingError> {
90    let mut map = BTreeMap::new();
91    for (key, value) in input {
92        value.serialize(KeyValueSerializer::new(key.clone(), separator, &mut map))?;
93    }
94    Ok(map)
95}
96
97fn encode_field(output: &mut String, key: &str, value: &str, key_value_delimiter: &str) {
98    encode_string(output, key);
99    output.push_str(key_value_delimiter);
100    encode_string(output, value);
101}
102
103fn encode_string(output: &mut String, str: &str) {
104    let needs_quoting = str
105        .chars()
106        .any(|c| c.is_whitespace() || c == '"' || c == '=');
107
108    if needs_quoting {
109        output.write_char('"').unwrap();
110    }
111
112    for c in str.chars() {
113        match c {
114            '\\' => output.push_str(r"\\"),
115            '"' => output.push_str(r#"\""#),
116            '\n' => output.push_str(r"\\n"),
117            _ => output.push(c),
118        }
119    }
120
121    if needs_quoting {
122        output.push('"');
123    }
124}
125
126enum Data {
127    None,
128    Boolean(bool),
129    I64(i64),
130    U64(u64),
131    F32(f32),
132    F64(f64),
133    I128(i128),
134    U128(u128),
135    Char(char),
136    String(String),
137}
138
139impl fmt::Display for Data {
140    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
141        match self {
142            Data::None => write!(f, "null"),
143            Data::Boolean(val) => write!(f, "{val}"),
144            Data::I64(val) => write!(f, "{val}"),
145            Data::U64(val) => write!(f, "{val}"),
146            Data::F32(val) => write!(f, "{val}"),
147            Data::F64(val) => write!(f, "{val}"),
148            Data::I128(val) => write!(f, "{val}"),
149            Data::U128(val) => write!(f, "{val}"),
150            Data::Char(val) => write!(f, "{val}"),
151            Data::String(val) => write!(f, "{val}"),
152        }
153    }
154}
155
156struct KeyValueSerializer<'a> {
157    key: KeyString,
158    separator: char,
159    output: &'a mut BTreeMap<KeyString, Data>,
160}
161
162impl<'a> KeyValueSerializer<'a> {
163    fn new(key: KeyString, separator: char, output: &'a mut BTreeMap<KeyString, Data>) -> Self {
164        Self {
165            key,
166            separator,
167            output,
168        }
169    }
170
171    fn indexed(self) -> IndexedKeyValueSerializer<'a> {
172        IndexedKeyValueSerializer {
173            index: 0,
174            ser: self,
175        }
176    }
177
178    fn keyed(self) -> KeyedKeyValueSerializer<'a> {
179        KeyedKeyValueSerializer {
180            key: None,
181            ser: self,
182        }
183    }
184
185    fn descend(mut self, child: impl fmt::Display) -> Self {
186        self.key = format!("{}{}{child}", self.key, self.separator).into();
187        self
188    }
189
190    fn child(&mut self, child: impl fmt::Display) -> KeyValueSerializer<'_> {
191        KeyValueSerializer {
192            key: format!("{}{}{child}", self.key, self.separator).into(),
193            separator: self.separator,
194            output: self.output,
195        }
196    }
197
198    #[allow(clippy::unnecessary_wraps)]
199    fn process(self, data: Data) -> Result<(), EncodingError> {
200        self.output.insert(self.key, data);
201        Ok(())
202    }
203}
204
205impl<'a> Serializer for KeyValueSerializer<'a> {
206    type Ok = ();
207    type Error = EncodingError;
208
209    type SerializeSeq = IndexedKeyValueSerializer<'a>;
210    type SerializeTuple = IndexedKeyValueSerializer<'a>;
211    type SerializeTupleStruct = IndexedKeyValueSerializer<'a>;
212    type SerializeTupleVariant = IndexedKeyValueSerializer<'a>;
213    type SerializeMap = KeyedKeyValueSerializer<'a>;
214    type SerializeStruct = Self;
215    type SerializeStructVariant = Self;
216
217    fn serialize_bool(self, v: bool) -> Result<Self::Ok, Self::Error> {
218        self.process(Data::Boolean(v))
219    }
220
221    fn serialize_i8(self, v: i8) -> Result<Self::Ok, Self::Error> {
222        self.process(Data::I64(i64::from(v)))
223    }
224
225    fn serialize_i16(self, v: i16) -> Result<Self::Ok, Self::Error> {
226        self.process(Data::I64(i64::from(v)))
227    }
228
229    fn serialize_i32(self, v: i32) -> Result<Self::Ok, Self::Error> {
230        self.process(Data::I64(i64::from(v)))
231    }
232
233    fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error> {
234        self.process(Data::I64(v))
235    }
236
237    fn serialize_u8(self, v: u8) -> Result<Self::Ok, Self::Error> {
238        self.process(Data::U64(u64::from(v)))
239    }
240    fn serialize_u16(self, v: u16) -> Result<Self::Ok, Self::Error> {
241        self.process(Data::U64(u64::from(v)))
242    }
243
244    fn serialize_u32(self, v: u32) -> Result<Self::Ok, Self::Error> {
245        self.process(Data::U64(u64::from(v)))
246    }
247
248    fn serialize_u64(self, v: u64) -> Result<Self::Ok, Self::Error> {
249        self.process(Data::U64(v))
250    }
251
252    fn serialize_f32(self, v: f32) -> Result<Self::Ok, Self::Error> {
253        self.process(Data::F32(v))
254    }
255
256    fn serialize_f64(self, v: f64) -> Result<Self::Ok, Self::Error> {
257        self.process(Data::F64(v))
258    }
259
260    fn serialize_i128(self, v: i128) -> Result<Self::Ok, Self::Error> {
261        self.process(Data::I128(v))
262    }
263
264    fn serialize_u128(self, v: u128) -> Result<Self::Ok, Self::Error> {
265        self.process(Data::U128(v))
266    }
267
268    fn serialize_char(self, v: char) -> Result<Self::Ok, Self::Error> {
269        self.process(Data::Char(v))
270    }
271
272    fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
273        self.process(Data::String(v.to_owned()))
274    }
275
276    fn serialize_bytes(self, v: &[u8]) -> Result<Self::Ok, Self::Error> {
277        self.process(Data::String(String::from_utf8_lossy(v).into_owned()))
278    }
279
280    fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
281        self.process(Data::None)
282    }
283
284    fn serialize_some<T: Serialize + ?Sized>(self, value: &T) -> Result<Self::Ok, Self::Error> {
285        value.serialize(self)
286    }
287
288    fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
289        self.process(Data::None)
290    }
291
292    fn serialize_unit_struct(self, name: &'static str) -> Result<Self::Ok, Self::Error> {
293        self.descend(name).process(Data::None)
294    }
295
296    fn serialize_unit_variant(
297        self,
298        name: &'static str,
299        _: u32,
300        variant: &'static str,
301    ) -> Result<Self::Ok, Self::Error> {
302        self.descend(name).descend(variant).process(Data::None)
303    }
304
305    fn serialize_newtype_struct<T: Serialize + ?Sized>(
306        self,
307        name: &'static str,
308        value: &T,
309    ) -> Result<Self::Ok, Self::Error> {
310        value.serialize(self.descend(name))
311    }
312
313    fn serialize_newtype_variant<T: Serialize + ?Sized>(
314        self,
315        name: &'static str,
316        _: u32,
317        variant: &'static str,
318        value: &T,
319    ) -> Result<Self::Ok, Self::Error> {
320        value.serialize(self.descend(name).descend(variant))
321    }
322
323    fn serialize_seq(self, _: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
324        Ok(self.indexed())
325    }
326
327    fn serialize_tuple(self, _: usize) -> Result<Self::SerializeTuple, Self::Error> {
328        Ok(self.indexed())
329    }
330
331    fn serialize_tuple_struct(
332        self,
333        name: &'static str,
334        _: usize,
335    ) -> Result<Self::SerializeTupleStruct, Self::Error> {
336        Ok(self.descend(name).indexed())
337    }
338
339    fn serialize_tuple_variant(
340        self,
341        name: &'static str,
342        _: u32,
343        variant: &'static str,
344        _: usize,
345    ) -> Result<Self::SerializeTupleVariant, Self::Error> {
346        Ok(self.descend(name).descend(variant).indexed())
347    }
348
349    fn serialize_map(self, _: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
350        Ok(self.keyed())
351    }
352
353    fn serialize_struct(
354        self,
355        name: &'static str,
356        _: usize,
357    ) -> Result<Self::SerializeStruct, Self::Error> {
358        Ok(self.descend(name))
359    }
360
361    fn serialize_struct_variant(
362        self,
363        name: &'static str,
364        _: u32,
365        variant: &'static str,
366        _: usize,
367    ) -> Result<Self::SerializeStructVariant, Self::Error> {
368        Ok(self.descend(name).descend(variant))
369    }
370}
371
372impl SerializeStruct for KeyValueSerializer<'_> {
373    type Ok = ();
374    type Error = EncodingError;
375    fn serialize_field<T: Serialize + ?Sized>(
376        &mut self,
377        key: &'static str,
378        value: &T,
379    ) -> Result<(), Self::Error> {
380        value.serialize(self.child(key))
381    }
382
383    fn end(self) -> Result<Self::Ok, Self::Error> {
384        Ok(())
385    }
386}
387
388impl SerializeStructVariant for KeyValueSerializer<'_> {
389    type Ok = ();
390    type Error = EncodingError;
391    fn serialize_field<T: Serialize + ?Sized>(
392        &mut self,
393        key: &'static str,
394        value: &T,
395    ) -> Result<(), Self::Error> {
396        value.serialize(self.child(key))
397    }
398
399    fn end(self) -> Result<Self::Ok, Self::Error> {
400        Ok(())
401    }
402}
403
404struct IndexedKeyValueSerializer<'a> {
405    index: usize,
406    ser: KeyValueSerializer<'a>,
407}
408
409impl IndexedKeyValueSerializer<'_> {
410    fn process<T: ?Sized + Serialize>(&mut self, data: &T) -> Result<(), EncodingError> {
411        let key = self.index;
412        self.index += 1;
413        data.serialize(self.ser.child(key))
414    }
415}
416
417impl SerializeTuple for IndexedKeyValueSerializer<'_> {
418    type Ok = ();
419    type Error = EncodingError;
420
421    fn serialize_element<T: Serialize + ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> {
422        self.process(value)
423    }
424
425    fn end(self) -> Result<Self::Ok, Self::Error> {
426        Ok(())
427    }
428}
429
430impl SerializeSeq for IndexedKeyValueSerializer<'_> {
431    type Ok = ();
432    type Error = EncodingError;
433
434    fn serialize_element<T: Serialize + ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> {
435        self.process(value)
436    }
437
438    fn end(self) -> Result<Self::Ok, Self::Error> {
439        Ok(())
440    }
441}
442
443impl SerializeTupleStruct for IndexedKeyValueSerializer<'_> {
444    type Ok = ();
445    type Error = EncodingError;
446
447    fn serialize_field<T: Serialize + ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> {
448        self.process(value)
449    }
450
451    fn end(self) -> Result<Self::Ok, Self::Error> {
452        Ok(())
453    }
454}
455
456impl SerializeTupleVariant for IndexedKeyValueSerializer<'_> {
457    type Ok = ();
458    type Error = EncodingError;
459
460    fn serialize_field<T: Serialize + ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> {
461        self.process(value)
462    }
463
464    fn end(self) -> Result<Self::Ok, Self::Error> {
465        Ok(())
466    }
467}
468
469struct KeyedKeyValueSerializer<'a> {
470    key: Option<String>,
471    ser: KeyValueSerializer<'a>,
472}
473
474impl SerializeMap for KeyedKeyValueSerializer<'_> {
475    type Ok = ();
476    type Error = EncodingError;
477    fn serialize_key<T: Serialize + ?Sized>(&mut self, key: &T) -> Result<(), Self::Error> {
478        use serde_json::{Value, to_value};
479        match to_value(key) {
480            Ok(Value::String(key)) => {
481                self.key = Some(key);
482                Ok(())
483            }
484            _ => Err(EncodingError::KeyNotString),
485        }
486    }
487
488    fn serialize_value<T: Serialize + ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> {
489        let key = self.key.take().expect("Key must be present.");
490        value.serialize(self.ser.child(key))
491    }
492
493    fn end(self) -> Result<Self::Ok, Self::Error> {
494        Ok(())
495    }
496}
497
498#[cfg(test)]
499mod tests {
500    use serde::Serialize;
501    use serde_json::{Value, json};
502
503    use super::*;
504    use crate::btreemap;
505
506    #[test]
507    fn single_element() {
508        assert_eq!(
509            &to_string::<Value>(
510                &btreemap! {
511                    "lvl" => "info"
512                },
513                &[],
514                "=",
515                " ",
516                true
517            )
518            .unwrap(),
519            "lvl=info"
520        );
521    }
522
523    #[test]
524    fn multiple_elements() {
525        assert_eq!(
526            &to_string::<Value>(
527                &btreemap! {
528                    "lvl" => "info",
529                    "log_id" => 12345
530                },
531                &[],
532                "=",
533                " ",
534                true
535            )
536            .unwrap(),
537            "log_id=12345 lvl=info"
538        );
539    }
540
541    #[test]
542    fn string_with_spaces() {
543        assert_eq!(
544            &to_string::<Value>(
545                &btreemap! {
546                    "lvl" => "info",
547                    "msg" => "This is a log message"
548                },
549                &[],
550                "=",
551                " ",
552                true
553            )
554            .unwrap(),
555            r#"lvl=info msg="This is a log message""#
556        );
557    }
558
559    #[test]
560    fn string_with_quotes() {
561        assert_eq!(
562            &to_string::<Value>(
563                &btreemap! {
564                    "lvl" => "info",
565                    "msg" => "{\"key\":\"value\"}"
566                },
567                &[],
568                "=",
569                " ",
570                true
571            )
572            .unwrap(),
573            r#"lvl=info msg="{\"key\":\"value\"}""#
574        );
575    }
576
577    #[test]
578    fn string_with_equal_sign() {
579        assert_eq!(
580            &to_string::<Value>(
581                &btreemap! {
582                    "lvl" => "info",
583                    "msg" => "="
584                },
585                &[],
586                "=",
587                " ",
588                true
589            )
590            .unwrap(),
591            r#"lvl=info msg="=""#
592        );
593    }
594
595    #[test]
596    fn flatten_boolean() {
597        assert_eq!(
598            &to_string::<Value>(
599                &btreemap! {
600                    "beta" => true,
601                    "prod" => false,
602                    "lvl" => "info",
603                    "msg" => "This is a log message",
604                },
605                &[],
606                "=",
607                " ",
608                true
609            )
610            .unwrap(),
611            r#"beta lvl=info msg="This is a log message""#
612        );
613    }
614
615    #[test]
616    fn dont_flatten_boolean() {
617        assert_eq!(
618            &to_string::<Value>(
619                &btreemap! {
620                    "beta" => true,
621                    "prod" => false,
622                    "lvl" => "info",
623                    "msg" => "This is a log message",
624                },
625                &[],
626                "=",
627                " ",
628                false
629            )
630            .unwrap(),
631            r#"beta=true lvl=info msg="This is a log message" prod=false"#
632        );
633    }
634
635    #[test]
636    fn other_delimiters() {
637        assert_eq!(
638            &to_string::<Value>(
639                &btreemap! {
640                    "tag_a" => "val_a",
641                    "tag_b" => "val_b",
642                    "tag_c" => true,
643                },
644                &[],
645                ":",
646                ",",
647                true
648            )
649            .unwrap(),
650            "tag_a:val_a,tag_b:val_b,tag_c"
651        );
652    }
653
654    #[test]
655    fn string_with_characters_to_escape() {
656        assert_eq!(
657            &to_string::<Value>(
658                &btreemap! {
659                    "lvl" => "info",
660                    "msg" => r#"payload: {"code": 200}\n"#,
661                    "another_field" => "some\nfield\\and things",
662                    "space key" => "foo"
663                },
664                &[],
665                "=",
666                " ",
667                true
668            )
669            .unwrap(),
670            r#"another_field="some\\nfield\\and things" lvl=info msg="payload: {\"code\": 200}\\n" "space key"=foo"#
671        );
672    }
673
674    #[test]
675    fn nested_fields() {
676        assert_eq!(
677            &to_string::<Value>(
678                &btreemap! {
679                    "log" => json!({
680                        "file": {
681                            "path": "encode_key_value.rs"
682                        },
683                    }),
684                    "agent" => json!({
685                        "name": "vector",
686                        "id": 1234
687                    }),
688                    "network" => json!({
689                        "ip": [127, 0, 0, 1],
690                        "proto": "tcp"
691                    }),
692                    "event" => "log"
693                },
694                &[],
695                "=",
696                " ",
697                true
698            )
699            .unwrap(),
700            "agent.id=1234 agent.name=vector event=log log.file.path=encode_key_value.rs network.ip.0=127 network.ip.1=0 network.ip.2=0 network.ip.3=1 network.proto=tcp"
701        );
702    }
703
704    #[test]
705    fn fields_ordering() {
706        assert_eq!(
707            &to_string::<Value>(
708                &btreemap! {
709                    "lvl" => "info",
710                    "msg" => "This is a log message",
711                    "log_id" => 12345,
712                },
713                &[KeyString::from("lvl"), KeyString::from("msg")],
714                "=",
715                " ",
716                true
717            )
718            .unwrap(),
719            r#"lvl=info msg="This is a log message" log_id=12345"#
720        );
721    }
722
723    #[test]
724    fn nested_fields_ordering() {
725        assert_eq!(
726            &to_string::<Value>(
727                &btreemap! {
728                    "log" => json!({
729                        "file": {
730                            "path": "encode_key_value.rs"
731                        },
732                    }),
733                    "agent" => json!({
734                        "name": "vector",
735                    }),
736                    "event" => "log"
737                },
738                &[
739                    KeyString::from("event"),
740                    KeyString::from("log.file.path"),
741                    KeyString::from("agent.name"),
742                ],
743                "=",
744                " ",
745                true
746            )
747            .unwrap(),
748            "event=log log.file.path=encode_key_value.rs agent.name=vector"
749        );
750    }
751
752    #[test]
753    fn non_string_keys() {
754        #[derive(Serialize)]
755        struct IntegerMap(BTreeMap<i32, String>);
756
757        assert!(
758            &to_string::<IntegerMap>(
759                &btreemap! {
760                    "inner_map" => IntegerMap(btreemap!{
761                        0 => "Hello",
762                        1 => "World"
763                    })
764                },
765                &[],
766                "=",
767                " ",
768                true
769            )
770            .is_err()
771        );
772    }
773}