codecs/encoding/format/
syslog.rs

1use bytes::{BufMut, BytesMut};
2use chrono::{DateTime, SecondsFormat, SubsecRound, Utc};
3use lookup::lookup_v2::ConfigTargetPath;
4use serde_json;
5use std::borrow::Cow;
6use std::collections::BTreeMap;
7use std::fmt::Write;
8use std::str::FromStr;
9use strum::{EnumString, FromRepr, VariantNames};
10use tokio_util::codec::Encoder;
11use tracing::debug;
12use vector_config::configurable_component;
13use vector_core::{
14    config::DataType,
15    event::{Event, LogEvent, Value},
16    schema,
17};
18use vrl::value::ObjectMap;
19
20/// Config used to build a `SyslogSerializer`.
21#[configurable_component]
22#[derive(Clone, Debug, Default)]
23#[serde(default)]
24pub struct SyslogSerializerConfig {
25    /// Options for the Syslog serializer.
26    pub syslog: SyslogSerializerOptions,
27}
28
29impl SyslogSerializerConfig {
30    /// Build the `SyslogSerializer` from this configuration.
31    pub fn build(&self) -> SyslogSerializer {
32        SyslogSerializer::new(self)
33    }
34
35    /// The data type of events that are accepted by `SyslogSerializer`.
36    pub fn input_type(&self) -> DataType {
37        DataType::Log
38    }
39
40    /// The schema required by the serializer.
41    pub fn schema_requirement(&self) -> schema::Requirement {
42        schema::Requirement::empty()
43    }
44}
45
46/// Syslog serializer options.
47#[configurable_component]
48#[derive(Clone, Debug, Default)]
49#[serde(default, deny_unknown_fields)]
50pub struct SyslogSerializerOptions {
51    /// RFC to use for formatting.
52    rfc: SyslogRFC,
53    /// Path to a field in the event to use for the facility. Defaults to "user".
54    facility: Option<ConfigTargetPath>,
55    /// Path to a field in the event to use for the severity. Defaults to "informational".
56    severity: Option<ConfigTargetPath>,
57    /// Path to a field in the event to use for the app name.
58    ///
59    /// If not provided, the encoder checks for a semantic "service" field.
60    /// If that is also missing, it defaults to "vector".
61    app_name: Option<ConfigTargetPath>,
62    /// Path to a field in the event to use for the proc ID.
63    proc_id: Option<ConfigTargetPath>,
64    /// Path to a field in the event to use for the msg ID.
65    msg_id: Option<ConfigTargetPath>,
66}
67
68/// Serializer that converts an `Event` to bytes using the Syslog format.
69#[derive(Debug, Clone)]
70pub struct SyslogSerializer {
71    config: SyslogSerializerConfig,
72}
73
74impl SyslogSerializer {
75    /// Creates a new `SyslogSerializer`.
76    pub fn new(conf: &SyslogSerializerConfig) -> Self {
77        Self {
78            config: conf.clone(),
79        }
80    }
81}
82
83impl Encoder<Event> for SyslogSerializer {
84    type Error = vector_common::Error;
85
86    fn encode(&mut self, event: Event, buffer: &mut BytesMut) -> Result<(), Self::Error> {
87        if let Event::Log(log_event) = event {
88            let syslog_message = ConfigDecanter::new(&log_event).decant_config(&self.config.syslog);
89            let encoded = syslog_message.encode(&self.config.syslog.rfc);
90            buffer.put_slice(encoded.as_bytes());
91        }
92
93        Ok(())
94    }
95}
96
97struct ConfigDecanter<'a> {
98    log: &'a LogEvent,
99}
100
101impl<'a> ConfigDecanter<'a> {
102    fn new(log: &'a LogEvent) -> Self {
103        Self { log }
104    }
105
106    fn decant_config(&self, config: &SyslogSerializerOptions) -> SyslogMessage {
107        let mut app_name = self
108            .get_value(&config.app_name) // P1: Configured path
109            .unwrap_or_else(|| {
110                // P2: Semantic Fallback: Check for the field designated as "service" in the schema
111                self.log
112                    .get_by_meaning("service")
113                    .map(|v| v.to_string_lossy().to_string())
114                    // P3: Hardcoded default
115                    .unwrap_or_else(|| "vector".to_owned())
116            });
117        let mut proc_id = self.get_value(&config.proc_id);
118        let mut msg_id = self.get_value(&config.msg_id);
119
120        match config.rfc {
121            SyslogRFC::Rfc3164 => {
122                // RFC 3164: TAG field (app_name and proc_id) must be ASCII printable
123                app_name = sanitize_to_ascii(&app_name).into_owned();
124                if let Some(pid) = &mut proc_id {
125                    *pid = sanitize_to_ascii(pid).into_owned();
126                }
127            }
128            SyslogRFC::Rfc5424 => {
129                // Truncate to character limits (not byte limits to avoid UTF-8 panics)
130                truncate_chars(&mut app_name, 48);
131                if let Some(pid) = &mut proc_id {
132                    truncate_chars(pid, 128);
133                }
134                if let Some(mid) = &mut msg_id {
135                    truncate_chars(mid, 32);
136                }
137            }
138        }
139
140        SyslogMessage {
141            pri: Pri {
142                facility: self.get_facility(config),
143                severity: self.get_severity(config),
144            },
145            timestamp: self.get_timestamp(),
146            hostname: self.log.get_host().map(|v| v.to_string_lossy().to_string()),
147            tag: Tag {
148                app_name,
149                proc_id,
150                msg_id,
151            },
152            structured_data: self.get_structured_data(),
153            message: self.get_payload(),
154        }
155    }
156
157    fn get_value(&self, path: &Option<ConfigTargetPath>) -> Option<String> {
158        path.as_ref()
159            .and_then(|p| self.log.get(p).cloned())
160            .map(|v| v.to_string_lossy().to_string())
161    }
162
163    fn get_structured_data(&self) -> Option<StructuredData> {
164        self.log
165            .get("structured_data")
166            .and_then(|v| v.clone().into_object())
167            .map(StructuredData::from)
168    }
169
170    fn get_timestamp(&self) -> DateTime<Utc> {
171        if let Some(Value::Timestamp(timestamp)) = self.log.get_timestamp() {
172            return *timestamp;
173        }
174        Utc::now()
175    }
176
177    fn get_payload(&self) -> String {
178        self.log
179            .get_message()
180            .map(|v| v.to_string_lossy().to_string())
181            .unwrap_or_default()
182    }
183
184    fn get_facility(&self, config: &SyslogSerializerOptions) -> Facility {
185        config.facility.as_ref().map_or(Facility::User, |path| {
186            self.get_syslog_code(path, Facility::from_repr, Facility::User)
187        })
188    }
189
190    fn get_severity(&self, config: &SyslogSerializerOptions) -> Severity {
191        config
192            .severity
193            .as_ref()
194            .map_or(Severity::Informational, |path| {
195                self.get_syslog_code(path, Severity::from_repr, Severity::Informational)
196            })
197    }
198
199    fn get_syslog_code<T>(
200        &self,
201        path: &ConfigTargetPath,
202        from_repr_fn: fn(usize) -> Option<T>,
203        default_value: T,
204    ) -> T
205    where
206        T: Copy + FromStr,
207    {
208        if let Some(value) = self.log.get(path).cloned() {
209            let s = value.to_string_lossy();
210            if let Ok(val_from_name) = s.to_ascii_lowercase().parse::<T>() {
211                return val_from_name;
212            }
213            if let Value::Integer(n) = value
214                && let Some(val_from_num) = from_repr_fn(n as usize)
215            {
216                return val_from_num;
217            }
218        }
219        default_value
220    }
221}
222
223const NIL_VALUE: &str = "-";
224const SYSLOG_V1: &str = "1";
225const RFC3164_TAG_MAX_LENGTH: usize = 32;
226const SD_ID_MAX_LENGTH: usize = 32;
227
228/// Replaces invalid characters with '_'
229#[inline]
230fn sanitize_with<F>(s: &str, is_valid: F) -> Cow<'_, str>
231where
232    F: Fn(char) -> bool,
233{
234    match s.char_indices().find(|(_, c)| !is_valid(*c)) {
235        None => Cow::Borrowed(s), // All valid, zero allocation
236        Some((first_invalid_idx, _)) => {
237            let mut result = String::with_capacity(s.len());
238            result.push_str(&s[..first_invalid_idx]); // Copy valid prefix
239            for c in s[first_invalid_idx..].chars() {
240                result.push(if is_valid(c) { c } else { '_' });
241            }
242
243            Cow::Owned(result)
244        }
245    }
246}
247
248/// Sanitize a string to ASCII printable characters (space to tilde, ASCII 32-126)
249/// Used for RFC 3164 TAG field (app_name and proc_id)
250/// Invalid characters are replaced with '_'
251#[inline]
252fn sanitize_to_ascii(s: &str) -> Cow<'_, str> {
253    sanitize_with(s, |c| (' '..='~').contains(&c))
254}
255
256/// Sanitize SD-ID or PARAM-NAME according to RFC 5424
257/// Per RFC 5424, these NAMES must only contain printable ASCII (33-126)
258/// excluding '=', ' ', ']', '"'
259/// Invalid characters are replaced with '_'
260#[inline]
261fn sanitize_name(name: &str) -> Cow<'_, str> {
262    sanitize_with(name, |c| {
263        c.is_ascii_graphic() && !matches!(c, '=' | ']' | '"')
264    })
265}
266
267/// Escape PARAM-VALUE according to RFC 5424
268fn escape_sd_value(s: &str) -> Cow<'_, str> {
269    let needs_escaping = s.chars().any(|c| matches!(c, '\\' | '"' | ']'));
270
271    if !needs_escaping {
272        return Cow::Borrowed(s);
273    }
274
275    let mut result = String::with_capacity(s.len() + 10);
276    for ch in s.chars() {
277        match ch {
278            '\\' => result.push_str("\\\\"),
279            '"' => result.push_str("\\\""),
280            ']' => result.push_str("\\]"),
281            _ => result.push(ch),
282        }
283    }
284
285    Cow::Owned(result)
286}
287
288/// Safely truncate a string to a maximum number of characters (not bytes!)
289/// This avoids panics when truncating at a multi-byte UTF-8 character boundary
290/// Optimized to iterate only through necessary characters (not the entire string)
291fn truncate_chars(s: &mut String, max_chars: usize) {
292    if let Some((byte_idx, _)) = s.char_indices().nth(max_chars) {
293        s.truncate(byte_idx);
294    }
295}
296
297/// The syslog RFC standard to use for formatting.
298#[configurable_component]
299#[derive(PartialEq, Clone, Debug, Default)]
300#[serde(rename_all = "snake_case")]
301pub enum SyslogRFC {
302    /// The legacy RFC3164 syslog format.
303    Rfc3164,
304    /// The modern RFC5424 syslog format.
305    #[default]
306    Rfc5424,
307}
308
309#[derive(Default, Debug)]
310struct SyslogMessage {
311    pri: Pri,
312    timestamp: DateTime<Utc>,
313    hostname: Option<String>,
314    tag: Tag,
315    structured_data: Option<StructuredData>,
316    message: String,
317}
318
319impl SyslogMessage {
320    fn encode(&self, rfc: &SyslogRFC) -> String {
321        let mut result = String::with_capacity(256);
322
323        let _ = write!(result, "{}", self.pri.encode());
324
325        if *rfc == SyslogRFC::Rfc5424 {
326            result.push_str(SYSLOG_V1);
327            result.push(' ');
328        }
329
330        match rfc {
331            SyslogRFC::Rfc3164 => {
332                let _ = write!(result, "{} ", self.timestamp.format("%b %e %H:%M:%S"));
333            }
334            SyslogRFC::Rfc5424 => {
335                result.push_str(
336                    &self
337                        .timestamp
338                        .round_subsecs(6)
339                        .to_rfc3339_opts(SecondsFormat::Micros, true),
340                );
341                result.push(' ');
342            }
343        }
344
345        result.push_str(self.hostname.as_deref().unwrap_or(NIL_VALUE));
346        result.push(' ');
347
348        match rfc {
349            SyslogRFC::Rfc3164 => result.push_str(&self.tag.encode_rfc_3164()),
350            SyslogRFC::Rfc5424 => result.push_str(&self.tag.encode_rfc_5424()),
351        }
352        result.push(' ');
353
354        if *rfc == SyslogRFC::Rfc3164 {
355            // RFC 3164 does not support structured data
356            if let Some(sd) = &self.structured_data
357                && !sd.elements.is_empty()
358            {
359                debug!(
360                    "Structured data present but ignored - RFC 3164 does not support structured data. Consider using RFC 5424 instead."
361                );
362            }
363        } else {
364            if let Some(sd) = &self.structured_data {
365                result.push_str(&sd.encode());
366            } else {
367                result.push_str(NIL_VALUE);
368            }
369            if !self.message.is_empty() {
370                result.push(' ');
371            }
372        }
373
374        if !self.message.is_empty() {
375            if *rfc == SyslogRFC::Rfc3164 {
376                result.push_str(&Self::sanitize_rfc3164_message(&self.message));
377            } else {
378                result.push_str(&self.message);
379            }
380        }
381
382        result
383    }
384
385    fn sanitize_rfc3164_message(message: &str) -> String {
386        message
387            .chars()
388            .map(|ch| if (' '..='~').contains(&ch) { ch } else { ' ' })
389            .collect()
390    }
391}
392
393#[derive(Default, Debug)]
394struct Tag {
395    app_name: String,
396    proc_id: Option<String>,
397    msg_id: Option<String>,
398}
399
400impl Tag {
401    fn encode_rfc_3164(&self) -> String {
402        let mut tag = if let Some(proc_id) = self.proc_id.as_deref() {
403            format!("{}[{}]:", self.app_name, proc_id)
404        } else {
405            format!("{}:", self.app_name)
406        };
407        if tag.chars().count() > RFC3164_TAG_MAX_LENGTH {
408            truncate_chars(&mut tag, RFC3164_TAG_MAX_LENGTH);
409            if !tag.ends_with(':') {
410                tag.pop();
411                tag.push(':');
412            }
413        }
414        tag
415    }
416
417    fn encode_rfc_5424(&self) -> String {
418        let proc_id_str = self.proc_id.as_deref().unwrap_or(NIL_VALUE);
419        let msg_id_str = self.msg_id.as_deref().unwrap_or(NIL_VALUE);
420        format!("{} {} {}", self.app_name, proc_id_str, msg_id_str)
421    }
422}
423
424type StructuredDataMap = BTreeMap<String, BTreeMap<String, String>>;
425#[derive(Debug, Default)]
426struct StructuredData {
427    elements: StructuredDataMap,
428}
429
430impl StructuredData {
431    fn encode(&self) -> String {
432        if self.elements.is_empty() {
433            NIL_VALUE.to_string()
434        } else {
435            self.elements
436                .iter()
437                .fold(String::new(), |mut acc, (sd_id, sd_params)| {
438                    let _ = write!(acc, "[{sd_id}");
439                    for (key, value) in sd_params {
440                        let esc_val = escape_sd_value(value);
441                        let _ = write!(acc, " {key}=\"{esc_val}\"");
442                    }
443                    let _ = write!(acc, "]");
444                    acc
445                })
446        }
447    }
448}
449
450impl From<ObjectMap> for StructuredData {
451    fn from(fields: ObjectMap) -> Self {
452        let elements = fields
453            .into_iter()
454            .map(|(sd_id, value)| {
455                let sd_id_str: String = sd_id.into();
456                let sanitized_id = sanitize_name(&sd_id_str);
457
458                let final_id = if sanitized_id.chars().count() > SD_ID_MAX_LENGTH {
459                    sanitized_id.chars().take(SD_ID_MAX_LENGTH).collect()
460                } else {
461                    sanitized_id.into_owned()
462                };
463
464                let sd_params = match value {
465                    Value::Object(obj) => {
466                        let mut map = BTreeMap::new();
467                        flatten_object(obj, String::new(), &mut map);
468                        map
469                    }
470                    scalar => {
471                        let mut map = BTreeMap::new();
472                        map.insert("value".to_string(), scalar.to_string_lossy().to_string());
473                        map
474                    }
475                };
476                (final_id, sd_params)
477            })
478            .collect();
479        Self { elements }
480    }
481}
482
483/// Helper function to flatten nested objects with dot notation
484fn flatten_object(obj: ObjectMap, prefix: String, result: &mut BTreeMap<String, String>) {
485    for (key, value) in obj {
486        let key_str: String = key.into();
487
488        let sanitized_key = sanitize_name(&key_str);
489
490        let mut full_key = prefix.clone();
491        if !full_key.is_empty() {
492            full_key.push('.');
493        }
494        full_key.push_str(&sanitized_key);
495
496        match value {
497            Value::Object(nested) => {
498                flatten_object(nested, full_key, result);
499            }
500            Value::Array(arr) => {
501                if let Ok(json) = serde_json::to_string(&arr) {
502                    result.insert(full_key, json);
503                } else {
504                    result.insert(full_key, format!("{:?}", arr));
505                }
506            }
507            scalar => {
508                result.insert(full_key, scalar.to_string_lossy().to_string());
509            }
510        }
511    }
512}
513
514#[derive(Default, Debug)]
515struct Pri {
516    facility: Facility,
517    severity: Severity,
518}
519
520impl Pri {
521    // The last paragraph describes how to compose the enums into `PRIVAL`:
522    // https://datatracker.ietf.org/doc/html/rfc5424#section-6.2.1
523    fn encode(&self) -> String {
524        let pri_val = (self.facility as u8 * 8) + self.severity as u8;
525        format!("<{pri_val}>")
526    }
527}
528
529/// Syslog facility
530#[derive(Default, Debug, EnumString, FromRepr, VariantNames, Copy, Clone, PartialEq, Eq)]
531#[strum(serialize_all = "kebab-case")]
532#[configurable_component]
533pub enum Facility {
534    /// Kern
535    Kern = 0,
536    /// User
537    #[default]
538    User = 1,
539    /// Mail
540    Mail = 2,
541    /// Daemon
542    Daemon = 3,
543    /// Auth
544    Auth = 4,
545    /// Syslog
546    Syslog = 5,
547    /// Lpr
548    Lpr = 6,
549    /// News
550    News = 7,
551    /// Uucp
552    Uucp = 8,
553    /// Cron
554    Cron = 9,
555    /// Authpriv
556    Authpriv = 10,
557    /// Ftp
558    Ftp = 11,
559    /// Ntp
560    Ntp = 12,
561    /// Security
562    Security = 13,
563    /// Console
564    Console = 14,
565    /// SolarisCron
566    SolarisCron = 15,
567    /// Local0
568    Local0 = 16,
569    /// Local1
570    Local1 = 17,
571    /// Local2
572    Local2 = 18,
573    /// Local3
574    Local3 = 19,
575    /// Local4
576    Local4 = 20,
577    /// Local5
578    Local5 = 21,
579    /// Local6
580    Local6 = 22,
581    /// Local7
582    Local7 = 23,
583}
584
585/// Syslog severity
586#[derive(Default, Debug, EnumString, FromRepr, VariantNames, Copy, Clone, PartialEq, Eq)]
587#[strum(serialize_all = "kebab-case")]
588#[configurable_component]
589pub enum Severity {
590    /// Emergency
591    Emergency = 0,
592    /// Alert
593    Alert = 1,
594    /// Critical
595    Critical = 2,
596    /// Error
597    Error = 3,
598    /// Warning
599    Warning = 4,
600    /// Notice
601    Notice = 5,
602    /// Informational
603    #[default]
604    Informational = 6,
605    /// Debug
606    Debug = 7,
607}
608
609#[cfg(test)]
610mod tests {
611    use super::*;
612    use bytes::BytesMut;
613    use chrono::NaiveDate;
614    use std::sync::Arc;
615    use vector_core::config::LogNamespace;
616    use vector_core::event::Event::Metric;
617    use vector_core::event::{Event, MetricKind, MetricValue, StatisticKind};
618    use vrl::path::parse_target_path;
619    use vrl::prelude::Kind;
620    use vrl::{btreemap, event_path, value};
621
622    fn run_encode(config: SyslogSerializerConfig, event: Event) -> String {
623        let mut serializer = SyslogSerializer::new(&config);
624        let mut buffer = BytesMut::new();
625        serializer.encode(event, &mut buffer).unwrap();
626        String::from_utf8(buffer.to_vec()).unwrap()
627    }
628
629    fn create_simple_log() -> LogEvent {
630        let mut log = LogEvent::from("original message");
631        log.insert(
632            event_path!("timestamp"),
633            NaiveDate::from_ymd_opt(2025, 8, 28)
634                .unwrap()
635                .and_hms_micro_opt(18, 30, 00, 123456)
636                .unwrap()
637                .and_local_timezone(Utc)
638                .unwrap(),
639        );
640        log.insert(event_path!("host"), "test-host.com");
641        log
642    }
643
644    fn create_test_log() -> LogEvent {
645        let mut log = create_simple_log();
646        log.insert(event_path!("app"), "my-app");
647        log.insert(event_path!("pid"), "12345");
648        log.insert(event_path!("mid"), "req-abc-789");
649        log.insert(event_path!("fac"), "daemon"); //3
650        log.insert(event_path!("sev"), Value::from(2u8)); // Critical
651        log.insert(
652            event_path!("structured_data"),
653            value!({"metrics": {"retries": 3}}),
654        );
655        log
656    }
657
658    #[test]
659    fn test_rfc5424_defaults() {
660        let config = toml::from_str::<SyslogSerializerConfig>(
661            r#"
662            [syslog]
663            rfc = "rfc5424"
664        "#,
665        )
666        .unwrap();
667        let log = create_simple_log();
668        let output = run_encode(config, Event::Log(log));
669        let expected =
670            "<14>1 2025-08-28T18:30:00.123456Z test-host.com vector - - - original message";
671        assert_eq!(output, expected);
672    }
673
674    #[test]
675    fn test_rfc5424_all_fields() {
676        let config = toml::from_str::<SyslogSerializerConfig>(
677            r#"
678            [syslog]
679            app_name = ".app"
680            proc_id = ".pid"
681            msg_id = ".mid"
682            facility = ".fac"
683            severity = ".sev"
684        "#,
685        )
686        .unwrap();
687        let log = create_test_log();
688        let output = run_encode(config, Event::Log(log));
689        let expected = "<26>1 2025-08-28T18:30:00.123456Z test-host.com my-app 12345 req-abc-789 [metrics retries=\"3\"] original message";
690        assert_eq!(output, expected);
691    }
692
693    #[test]
694    fn test_rfc3164_all_fields() {
695        let config = toml::from_str::<SyslogSerializerConfig>(
696            r#"
697            [syslog]
698            rfc = "rfc3164"
699            facility = ".fac"
700            severity = ".sev"
701            app_name = ".app"
702            proc_id = ".pid"
703        "#,
704        )
705        .unwrap();
706        let log = create_test_log();
707        let output = run_encode(config, Event::Log(log));
708        // RFC 3164 does not support structured data, so it's ignored
709        let expected = "<26>Aug 28 18:30:00 test-host.com my-app[12345]: original message";
710        assert_eq!(output, expected);
711    }
712
713    #[test]
714    fn test_parsing_logic() {
715        let mut log = LogEvent::from("test message");
716        let config_fac =
717            toml::from_str::<SyslogSerializerOptions>(r#"facility = ".syslog_facility""#).unwrap();
718        let config_sev =
719            toml::from_str::<SyslogSerializerOptions>(r#"severity = ".syslog_severity""#).unwrap();
720        //check lowercase and digit
721        log.insert(event_path!("syslog_facility"), "daemon");
722        log.insert(event_path!("syslog_severity"), "critical");
723        let decanter = ConfigDecanter::new(&log);
724        let facility = decanter.get_facility(&config_fac);
725        let severity = decanter.get_severity(&config_sev);
726        assert_eq!(facility, Facility::Daemon);
727        assert_eq!(severity, Severity::Critical);
728
729        //check uppercase
730        log.insert(event_path!("syslog_facility"), "DAEMON");
731        log.insert(event_path!("syslog_severity"), "CRITICAL");
732        let decanter = ConfigDecanter::new(&log);
733        let facility = decanter.get_facility(&config_fac);
734        let severity = decanter.get_severity(&config_sev);
735        assert_eq!(facility, Facility::Daemon);
736        assert_eq!(severity, Severity::Critical);
737
738        //check digit
739        log.insert(event_path!("syslog_facility"), Value::from(3u8));
740        log.insert(event_path!("syslog_severity"), Value::from(2u8));
741        let decanter = ConfigDecanter::new(&log);
742        let facility = decanter.get_facility(&config_fac);
743        let severity = decanter.get_severity(&config_sev);
744        assert_eq!(facility, Facility::Daemon);
745        assert_eq!(severity, Severity::Critical);
746
747        //check defaults with empty config
748        let empty_config =
749            toml::from_str::<SyslogSerializerOptions>(r#"facility = ".missing_field""#).unwrap();
750        let default_facility = decanter.get_facility(&empty_config);
751        let default_severity = decanter.get_severity(&empty_config);
752        assert_eq!(default_facility, Facility::User);
753        assert_eq!(default_severity, Severity::Informational);
754    }
755
756    #[test]
757    fn test_rfc3164_sanitization() {
758        let config = toml::from_str::<SyslogSerializerConfig>(
759            r#"
760        [syslog]
761        rfc = "rfc3164"
762    "#,
763        )
764        .unwrap();
765
766        let mut log = create_simple_log();
767        log.insert(
768            event_path!("message"),
769            "A\nB\tC, Привіт D, E\u{0007}F", //newline, tab, unicode
770        );
771
772        let output = run_encode(config, Event::Log(log));
773        let expected_message = "A B C,        D, E F";
774        assert!(output.ends_with(expected_message));
775    }
776
777    #[test]
778    fn test_rfc5424_field_truncation() {
779        let long_string = "vector".repeat(50);
780
781        let mut log = create_simple_log();
782        log.insert(event_path!("long_app_name"), long_string.clone());
783        log.insert(event_path!("long_proc_id"), long_string.clone());
784        log.insert(event_path!("long_msg_id"), long_string.clone());
785
786        let config = toml::from_str::<SyslogSerializerConfig>(
787            r#"
788        [syslog]
789        rfc = "rfc5424"
790        app_name = ".long_app_name"
791        proc_id = ".long_proc_id"
792        msg_id = ".long_msg_id"
793    "#,
794        )
795        .unwrap();
796
797        let decanter = ConfigDecanter::new(&log);
798        let message = decanter.decant_config(&config.syslog);
799
800        assert_eq!(message.tag.app_name.len(), 48);
801        assert_eq!(message.tag.proc_id.unwrap().len(), 128);
802        assert_eq!(message.tag.msg_id.unwrap().len(), 32);
803    }
804
805    #[test]
806    fn test_rfc3164_tag_truncation() {
807        let config = toml::from_str::<SyslogSerializerConfig>(
808            r#"
809        [syslog]
810        rfc = "rfc3164"
811        facility = "user"
812        severity = "notice"
813        app_name = ".app_name"
814        proc_id = ".proc_id"
815    "#,
816        )
817        .unwrap();
818
819        let mut log = create_simple_log();
820        log.insert(
821            event_path!("app_name"),
822            "this-is-a-very-very-long-application-name",
823        );
824        log.insert(event_path!("proc_id"), "1234567890");
825
826        let output = run_encode(config, Event::Log(log));
827        let expected_tag = "this-is-a-very-very-long-applic:";
828        assert!(output.contains(expected_tag));
829    }
830
831    #[test]
832    fn test_rfc5424_missing_fields() {
833        let config = toml::from_str::<SyslogSerializerConfig>(
834            r#"
835        [syslog]
836        rfc = "rfc5424"
837        app_name = ".app"  # configured path, but not in log
838        proc_id = ".pid"   # configured path, but not in log
839        msg_id = ".mid"    # configured path, but not in log
840    "#,
841        )
842        .unwrap();
843
844        let log = create_simple_log();
845        let output = run_encode(config, Event::Log(log));
846
847        let expected =
848            "<14>1 2025-08-28T18:30:00.123456Z test-host.com vector - - - original message";
849        assert_eq!(output, expected);
850    }
851
852    #[test]
853    fn test_invalid_parsing_fallback() {
854        let config = toml::from_str::<SyslogSerializerConfig>(
855            r#"
856        [syslog]
857        rfc = "rfc5424"
858        facility = ".fac"
859        severity = ".sev"
860    "#,
861        )
862        .unwrap();
863
864        let mut log = create_simple_log();
865
866        log.insert(event_path!("fac"), "");
867        log.insert(event_path!("sev"), "invalid_severity_name");
868
869        let output = run_encode(config, Event::Log(log));
870
871        let expected_pri = "<14>";
872        assert!(output.starts_with(expected_pri));
873
874        let expected_suffix = "vector - - - original message";
875        assert!(output.ends_with(expected_suffix));
876    }
877
878    #[test]
879    fn test_rfc5424_empty_message_and_sd() {
880        let config = toml::from_str::<SyslogSerializerConfig>(
881            r#"
882        [syslog]
883        rfc = "rfc5424"
884        app_name = ".app"
885        proc_id = ".pid"
886        msg_id = ".mid"
887    "#,
888        )
889        .unwrap();
890
891        let mut log = create_simple_log();
892        log.insert(event_path!("message"), "");
893        log.insert(event_path!("structured_data"), value!({}));
894
895        let output = run_encode(config, Event::Log(log));
896        let expected = "<14>1 2025-08-28T18:30:00.123456Z test-host.com vector - - -";
897        assert_eq!(output, expected);
898    }
899
900    #[test]
901    fn test_non_log_event_filtering() {
902        let config = toml::from_str::<SyslogSerializerConfig>(
903            r#"
904        [syslog]
905        rfc = "rfc5424"
906    "#,
907        )
908        .unwrap();
909
910        let metric_event = Metric(vector_core::event::Metric::new(
911            "metric1",
912            MetricKind::Incremental,
913            MetricValue::Distribution {
914                samples: vector_core::samples![10.0 => 1],
915                statistic: StatisticKind::Histogram,
916            },
917        ));
918
919        let mut serializer = SyslogSerializer::new(&config);
920        let mut buffer = BytesMut::new();
921
922        let result = serializer.encode(metric_event, &mut buffer);
923
924        assert!(result.is_ok());
925        assert!(buffer.is_empty());
926    }
927
928    #[test]
929    fn test_minimal_event() {
930        let config = toml::from_str::<SyslogSerializerConfig>(
931            r#"
932        [syslog]
933    "#,
934        )
935        .unwrap();
936        let log = LogEvent::from("");
937
938        let output = run_encode(config, Event::Log(log));
939        let expected_suffix = "vector - - -";
940        assert!(output.starts_with("<14>1"));
941        assert!(output.ends_with(expected_suffix));
942    }
943
944    #[test]
945    fn test_app_name_meaning_fallback() {
946        let config = toml::from_str::<SyslogSerializerConfig>(
947            r#"
948        [syslog]
949        rfc = "rfc5424"
950        severity = ".sev"
951        app_name = ".nonexistent"
952    "#,
953        )
954        .unwrap();
955
956        let mut log = LogEvent::default();
957        log.insert("syslog.service", "meaning-app");
958
959        let schema = schema::Definition::new_with_default_metadata(
960            Kind::object(btreemap! {
961                "syslog" => Kind::object(btreemap! {
962                    "service" => Kind::bytes(),
963                })
964            }),
965            [LogNamespace::Vector],
966        );
967        let schema = schema.with_meaning(parse_target_path("syslog.service").unwrap(), "service");
968        let mut event = Event::from(log);
969        event
970            .metadata_mut()
971            .set_schema_definition(&Arc::new(schema));
972
973        let output = run_encode(config, event);
974        assert!(output.contains("meaning-app - -"));
975    }
976
977    #[test]
978    fn test_structured_data_with_scalars() {
979        let config = toml::from_str::<SyslogSerializerConfig>(
980            r#"
981            [syslog]
982            rfc = "rfc5424"
983        "#,
984        )
985        .unwrap();
986
987        let mut log = create_simple_log();
988        log.insert(
989            event_path!("structured_data"),
990            value!({"simple_string": "hello", "simple_number": 42}),
991        );
992
993        let output = run_encode(config, Event::Log(log));
994        assert!(output.contains(r#"[simple_number value="42"]"#));
995        assert!(output.contains(r#"[simple_string value="hello"]"#));
996    }
997
998    #[test]
999    fn test_structured_data_with_nested_objects() {
1000        let config = toml::from_str::<SyslogSerializerConfig>(
1001            r#"
1002            [syslog]
1003            rfc = "rfc5424"
1004        "#,
1005        )
1006        .unwrap();
1007
1008        let mut log = create_simple_log();
1009        log.insert(
1010            event_path!("structured_data"),
1011            value!({
1012                "meta": {
1013                    "request": {
1014                        "id": "abc-123",
1015                        "method": "GET"
1016                    },
1017                    "user": "bob"
1018                }
1019            }),
1020        );
1021
1022        let output = run_encode(config, Event::Log(log));
1023        assert!(output.contains(r#"[meta request.id="abc-123" request.method="GET" user="bob"]"#));
1024    }
1025
1026    #[test]
1027    fn test_structured_data_with_arrays() {
1028        let config = toml::from_str::<SyslogSerializerConfig>(
1029            r#"
1030            [syslog]
1031            rfc = "rfc5424"
1032        "#,
1033        )
1034        .unwrap();
1035
1036        let mut log = create_simple_log();
1037        log.insert(
1038            event_path!("structured_data"),
1039            value!({
1040                "data": {
1041                    "tags": ["tag1", "tag2", "tag3"]
1042                }
1043            }),
1044        );
1045
1046        let output = run_encode(config, Event::Log(log));
1047        // Arrays should be JSON-encoded and escaped
1048        assert!(output.contains(r#"[data tags="[\"tag1\",\"tag2\",\"tag3\"\]"]"#));
1049    }
1050
1051    #[test]
1052    fn test_structured_data_complex_nested() {
1053        let config = toml::from_str::<SyslogSerializerConfig>(
1054            r#"
1055            [syslog]
1056            rfc = "rfc5424"
1057        "#,
1058        )
1059        .unwrap();
1060
1061        let mut log = create_simple_log();
1062        log.insert(
1063            event_path!("structured_data"),
1064            value!({
1065                "tracking": {
1066                    "session": {
1067                        "user": {
1068                            "id": "123",
1069                            "name": "alice"
1070                        },
1071                        "duration_ms": 5000
1072                    }
1073                }
1074            }),
1075        );
1076
1077        let output = run_encode(config, Event::Log(log));
1078        assert!(output.contains(r#"session.duration_ms="5000""#));
1079        assert!(output.contains(r#"session.user.id="123""#));
1080        assert!(output.contains(r#"session.user.name="alice""#));
1081    }
1082
1083    #[test]
1084    fn test_structured_data_sanitization() {
1085        let config = toml::from_str::<SyslogSerializerConfig>(
1086            r#"
1087            [syslog]
1088            rfc = "rfc5424"
1089        "#,
1090        )
1091        .unwrap();
1092
1093        let mut log = create_simple_log();
1094        log.insert(
1095            event_path!("structured_data"),
1096            value!({
1097                "my id": {  // SD-ID with space - should be sanitized to my_id
1098                    "user=name": "alice",  // PARAM-NAME with = - should be sanitized to user_name
1099                    "foo]bar": "value1",   // PARAM-NAME with ] - should be sanitized to foo_bar
1100                    "has\"quote": "value2" // PARAM-NAME with " - should be sanitized to has_quote
1101                }
1102            }),
1103        );
1104
1105        let output = run_encode(config, Event::Log(log));
1106        // All invalid characters should be replaced with _
1107        assert!(output.contains(r#"[my_id"#));
1108        assert!(output.contains(r#"foo_bar="value1""#));
1109        assert!(output.contains(r#"has_quote="value2""#));
1110        assert!(output.contains(r#"user_name="alice""#));
1111    }
1112
1113    #[test]
1114    fn test_structured_data_sd_id_length_limit() {
1115        let config = toml::from_str::<SyslogSerializerConfig>(
1116            r#"
1117            [syslog]
1118            rfc = "rfc5424"
1119        "#,
1120        )
1121        .unwrap();
1122
1123        let mut log = create_simple_log();
1124        log.insert(
1125            event_path!("structured_data"),
1126            value!({
1127                "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa": {
1128                    "key": "value"
1129                }
1130            }),
1131        );
1132
1133        let output = run_encode(config, Event::Log(log));
1134        let expected_id = "a".repeat(32);
1135        assert!(output.contains(&format!("[{}", expected_id)));
1136        assert!(!output.contains(&format!("[{}", "a".repeat(50))));
1137    }
1138
1139    #[test]
1140    fn test_utf8_safe_truncation() {
1141        let config = toml::from_str::<SyslogSerializerConfig>(
1142            r#"
1143            [syslog]
1144            rfc = "rfc5424"
1145            app_name = ".app"
1146            proc_id = ".proc"
1147            msg_id = ".msg"
1148        "#,
1149        )
1150        .unwrap();
1151
1152        let mut log = create_simple_log();
1153        // Create fields with UTF-8 characters (emoji, Cyrillic, etc.) each emoji is 4 bytes
1154        log.insert(
1155            event_path!("app"),
1156            "app_😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀",
1157        );
1158        log.insert(
1159            event_path!("proc"),
1160            "процес_😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀😀",
1161        );
1162        log.insert(event_path!("msg"), "довге_повідомлення ");
1163
1164        log.insert(
1165            event_path!("structured_data"),
1166            value!({
1167                "_😀_дуже_довге_значення_більше_тридцати_двух_символів": {
1168                    "_😀_": "value"
1169                }
1170            }),
1171        );
1172        let output = run_encode(config, Event::Log(log));
1173        assert!(output.starts_with("<14>1"));
1174        assert!(output.contains("app_"));
1175
1176        let expected_sd_id: String = "_".repeat(32);
1177        assert!(output.contains(&format!("[{}", expected_sd_id)));
1178    }
1179
1180    #[test]
1181    fn test_rfc3164_ascii_sanitization() {
1182        let config = toml::from_str::<SyslogSerializerConfig>(
1183            r#"
1184            [syslog]
1185            rfc = "rfc3164"
1186            app_name = ".app"
1187            proc_id = ".proc"
1188        "#,
1189        )
1190        .unwrap();
1191
1192        let mut log = create_simple_log();
1193        // Use non-ASCII characters in app_name and proc_id
1194        log.insert(event_path!("app"), "my_app_😀_тест");
1195        log.insert(event_path!("proc"), "процес_123");
1196
1197        let output = run_encode(config, Event::Log(log));
1198
1199        assert!(output.starts_with("<14>"));
1200        assert!(output.contains("my_app_____"));
1201        assert!(output.contains("[_______123]:"));
1202
1203        assert!(!output.contains("😀"));
1204        assert!(!output.contains("тест"));
1205        assert!(!output.contains("процес"));
1206    }
1207}