vrl/stdlib/
parse_syslog.rs

1use crate::compiler::prelude::*;
2use chrono::{DateTime, Datelike, Utc};
3use std::collections::BTreeMap;
4use syslog_loose::{IncompleteDate, Message, ProcId, Protocol, Variant};
5
6pub(crate) fn parse_syslog(value: &Value, ctx: &Context) -> Resolved {
7    let message = value.try_bytes_utf8_lossy()?;
8    let timezone = match ctx.timezone() {
9        TimeZone::Local => None,
10        TimeZone::Named(tz) => Some(*tz),
11    };
12    let parsed = syslog_loose::parse_message_with_year_exact_tz(
13        &message,
14        resolve_year,
15        timezone,
16        Variant::Either,
17    )?;
18    Ok(message_to_value(parsed))
19}
20
21#[derive(Clone, Copy, Debug)]
22pub struct ParseSyslog;
23
24impl Function for ParseSyslog {
25    fn identifier(&self) -> &'static str {
26        "parse_syslog"
27    }
28
29    fn usage(&self) -> &'static str {
30        "Parses the `value` in [Syslog](https://en.wikipedia.org/wiki/Syslog) format."
31    }
32
33    fn category(&self) -> &'static str {
34        Category::Parse.as_ref()
35    }
36
37    fn internal_failure_reasons(&self) -> &'static [&'static str] {
38        &["`value` is not a properly formatted Syslog message."]
39    }
40
41    fn return_kind(&self) -> u16 {
42        kind::OBJECT
43    }
44
45    fn notices(&self) -> &'static [&'static str] {
46        &[
47            indoc! {"
48                The function makes a best effort to parse the various Syslog formats that exists out
49                in the wild. This includes [RFC 6587](https://tools.ietf.org/html/rfc6587),
50                [RFC 5424](https://tools.ietf.org/html/rfc5424),
51                [RFC 3164](https://tools.ietf.org/html/rfc3164), and other common variations (such
52                as the Nginx Syslog style).
53            "},
54            "All values are returned as strings. We recommend manually coercing values to desired types as you see fit.",
55        ]
56    }
57
58    fn parameters(&self) -> &'static [Parameter] {
59        const PARAMETERS: &[Parameter] = &[Parameter::required(
60            "value",
61            kind::BYTES,
62            "The text containing the Syslog message to parse.",
63        )];
64        PARAMETERS
65    }
66
67    fn examples(&self) -> &'static [Example] {
68        &[example! {
69            title: "Parse Syslog log (5424)",
70            source: r#"parse_syslog!(s'<13>1 2020-03-13T20:45:38.119Z dynamicwireless.name non 2426 ID931 [exampleSDID@32473 iut="3" eventSource= "Application" eventID="1011"] Try to override the THX port, maybe it will reboot the neural interface!')"#,
71            result: Ok(indoc! {r#"{
72                "appname": "non",
73                "exampleSDID@32473": {
74                    "eventID": "1011",
75                    "eventSource": "Application",
76                    "iut": "3"
77                },
78                "facility": "user",
79                "hostname": "dynamicwireless.name",
80                "message": "Try to override the THX port, maybe it will reboot the neural interface!",
81                "msgid": "ID931",
82                "procid": 2426,
83                "severity": "notice",
84                "timestamp": "2020-03-13T20:45:38.119Z",
85                "version": 1
86            }"#}),
87        }]
88    }
89
90    fn compile(
91        &self,
92        _state: &state::TypeState,
93        _ctx: &mut FunctionCompileContext,
94        arguments: ArgumentList,
95    ) -> Compiled {
96        let value = arguments.required("value");
97
98        Ok(ParseSyslogFn { value }.as_expr())
99    }
100}
101
102#[derive(Debug, Clone)]
103pub(crate) struct ParseSyslogFn {
104    pub(crate) value: Box<dyn Expression>,
105}
106
107impl FunctionExpression for ParseSyslogFn {
108    fn resolve(&self, ctx: &mut Context) -> Resolved {
109        let value = self.value.resolve(ctx)?;
110
111        parse_syslog(&value, ctx)
112    }
113
114    fn type_def(&self, _: &state::TypeState) -> TypeDef {
115        TypeDef::object(inner_kind()).fallible()
116    }
117}
118
119/// Function used to resolve the year for syslog messages that don't include the
120/// year. If the current month is January, and the syslog message is for
121/// December, it will take the previous year. Otherwise, take the current year.
122fn resolve_year((month, _date, _hour, _min, _sec): IncompleteDate) -> i32 {
123    let now = Utc::now();
124    if now.month() == 1 && month == 12 {
125        now.year() - 1
126    } else {
127        now.year()
128    }
129}
130
131/// Create a `Value::Map` from the fields of the given syslog message.
132fn message_to_value(message: Message<&str>) -> Value {
133    let mut result = BTreeMap::new();
134
135    result.insert("message".to_string().into(), message.msg.to_string().into());
136
137    if let Some(host) = message.hostname {
138        result.insert("hostname".to_string().into(), host.to_string().into());
139    }
140
141    if let Some(severity) = message.severity {
142        result.insert(
143            "severity".to_string().into(),
144            severity.as_str().to_owned().into(),
145        );
146    }
147
148    if let Some(facility) = message.facility {
149        result.insert(
150            "facility".to_string().into(),
151            facility.as_str().to_owned().into(),
152        );
153    }
154
155    if let Protocol::RFC5424(version) = message.protocol {
156        result.insert("version".to_string().into(), version.into());
157    }
158
159    if let Some(app_name) = message.appname {
160        result.insert("appname".to_string().into(), app_name.to_owned().into());
161    }
162
163    if let Some(msg_id) = message.msgid {
164        result.insert("msgid".to_string().into(), msg_id.to_owned().into());
165    }
166
167    if let Some(timestamp) = message.timestamp {
168        let timestamp: DateTime<Utc> = timestamp.into();
169        result.insert("timestamp".to_string().into(), timestamp.into());
170    }
171
172    if let Some(procid) = message.procid {
173        let value: Value = match procid {
174            ProcId::PID(pid) => pid.into(),
175            ProcId::Name(name) => name.to_string().into(),
176        };
177        result.insert("procid".to_string().into(), value);
178    }
179
180    for element in message.structured_data {
181        let mut sdata = BTreeMap::new();
182        for (name, value) in element.params() {
183            sdata.insert((*name).into(), value.into());
184        }
185        result.insert(element.id.to_string().into(), sdata.into());
186    }
187
188    result.into()
189}
190
191fn inner_kind() -> BTreeMap<Field, Kind> {
192    BTreeMap::from([
193        ("message".into(), Kind::bytes()),
194        ("hostname".into(), Kind::bytes().or_null()),
195        ("severity".into(), Kind::bytes().or_null()),
196        ("facility".into(), Kind::bytes().or_null()),
197        ("appname".into(), Kind::bytes().or_null()),
198        ("msgid".into(), Kind::bytes().or_null()),
199        ("timestamp".into(), Kind::timestamp().or_null()),
200        ("procid".into(), Kind::bytes().or_integer().or_null()),
201        ("version".into(), Kind::integer().or_null()),
202    ])
203}
204
205#[cfg(test)]
206mod tests {
207    use crate::btreemap;
208    use chrono::{TimeZone, Timelike};
209
210    use super::*;
211
212    test_function![
213        parse_syslog => ParseSyslog;
214
215        valid {
216            args: func_args![value: r#"<13>1 2020-03-13T20:45:38.119Z dynamicwireless.name non 2426 ID931 [exampleSDID@32473 iut="3" eventSource= "Application" eventID="1011"] Try to override the THX port, maybe it will reboot the neural interface!"#],
217            want: Ok(btreemap! {
218                "severity" => "notice",
219                "facility" => "user",
220                "timestamp" => Utc.with_ymd_and_hms(2020, 3, 13, 20, 45, 38).unwrap().with_nanosecond(119_000_000).unwrap(),
221                "hostname" => "dynamicwireless.name",
222                "appname" => "non",
223                "procid" => 2426,
224                "msgid" => "ID931",
225                "exampleSDID@32473" => btreemap! {
226                    "iut" => "3",
227                    "eventSource" => "Application",
228                    "eventID" => "1011",
229                },
230                "message" => "Try to override the THX port, maybe it will reboot the neural interface!",
231                "version" => 1,
232            }),
233            tdef: TypeDef::object(inner_kind()).fallible(),
234        }
235
236        invalid {
237            args: func_args![value: "not much of a syslog message"],
238            want: Err("unable to parse input as valid syslog message".to_string()),
239            tdef: TypeDef::object(inner_kind()).fallible(),
240        }
241
242        haproxy {
243            args: func_args![value: "<133>Jun 13 16:33:35 haproxy[73411]: Proxy sticky-servers started."],
244            want: Ok(btreemap! {
245                    "facility" => "local0",
246                    "severity" => "notice",
247                    "message" => "Proxy sticky-servers started.",
248                    "timestamp" => Utc.with_ymd_and_hms(Utc::now().year(), 6, 13, 16, 33, 35).unwrap(),
249                    "appname" => "haproxy",
250                    "procid" => 73411,
251            }),
252            tdef: TypeDef::object(inner_kind()).fallible(),
253        }
254
255        missing_pri {
256            args: func_args![value: "Jun 13 16:33:35 haproxy[73411]: I am missing a pri."],
257            want: Ok(btreemap! {
258                "message" => "I am missing a pri.",
259                "timestamp" => Utc.with_ymd_and_hms(Utc::now().year(), 6, 13, 16, 33, 35).unwrap(),
260                "appname" => "haproxy",
261                "procid" => 73411,
262            }),
263            tdef: TypeDef::object(inner_kind()).fallible(),
264        }
265
266        empty_sd_element {
267            args: func_args![value: "<13>1 2019-02-13T19:48:34+00:00 74794bfb6795 root 8449 - [empty] qwerty"],
268            want: Ok(btreemap!{
269                "message" => "qwerty",
270                "appname" => "root",
271                "facility" => "user",
272                "hostname" => "74794bfb6795",
273                "message" => "qwerty",
274                "procid" => 8449,
275                "severity" => "notice",
276                "timestamp" => Utc.with_ymd_and_hms(2019, 2, 13, 19, 48, 34).unwrap(),
277                "version" => 1,
278                "empty" => btreemap! {},
279            }),
280            tdef: TypeDef::object(inner_kind()).fallible(),
281        }
282
283        non_empty_sd_element {
284            args: func_args![value: r#"<13>1 2019-02-13T19:48:34+00:00 74794bfb6795 root 8449 - [non_empty x="1"][empty] qwerty"#],
285            want: Ok(btreemap!{
286                "message" => "qwerty",
287                "appname" => "root",
288                "facility" => "user",
289                "hostname" => "74794bfb6795",
290                "message" => "qwerty",
291                "procid" => 8449,
292                "severity" => "notice",
293                "timestamp" => Utc.with_ymd_and_hms(2019, 2, 13, 19, 48, 34).unwrap(),
294                "version" => 1,
295                "non_empty" => btreemap! {
296                    "x" => "1",
297                },
298                "empty" => btreemap! {},
299            }),
300            tdef: TypeDef::object(inner_kind()).fallible(),
301        }
302
303        empty_sd_value {
304            args: func_args![value: r#"<13>1 2019-02-13T19:48:34+00:00 74794bfb6795 root 8449 - [non_empty x=""][empty] qwerty"#],
305            want: Ok(btreemap!{
306                "message" => "qwerty",
307                "appname" => "root",
308                "facility" => "user",
309                "hostname" => "74794bfb6795",
310                "message" => "qwerty",
311                "procid" => 8449,
312                "severity" => "notice",
313                "timestamp" => Utc.with_ymd_and_hms(2019, 2, 13, 19, 48, 34).unwrap(),
314                "version" => 1,
315                "empty" => btreemap! {},
316                "non_empty" => btreemap! {
317                    "x" => "",
318                },
319            }),
320            tdef: TypeDef::object(inner_kind()).fallible(),
321        }
322
323        non_structured_data_in_message {
324            args: func_args![value: "<131>Jun 8 11:54:08 master apache_error [Tue Jun 08 11:54:08.929301 2021] [php7:emerg] [pid 1374899] [client 95.223.77.60:41888] rest of message"],
325            want: Ok(btreemap!{
326                "appname" => "apache_error",
327                "facility" => "local0",
328                "hostname" => "master",
329                "severity" => "err",
330                "timestamp" => Utc.with_ymd_and_hms(Utc::now().year(), 6, 8, 11, 54, 8).unwrap(),
331                "message" => "[Tue Jun 08 11:54:08.929301 2021] [php7:emerg] [pid 1374899] [client 95.223.77.60:41888] rest of message",
332            }),
333            tdef: TypeDef::object(inner_kind()).fallible(),
334        }
335
336        escapes_in_structured_data_quote {
337            args: func_args![value: r#"<165>1 2003-10-11T22:14:15.003Z mymachine.example.com evntslog - ID47 [exampleSDID@32473 key="hello \"test\""] An application event log entry..."#],
338            want: Ok(btreemap!{
339                "appname" => "evntslog",
340                "exampleSDID@32473" => btreemap! {
341                    "key" => r#"hello "test""#,
342                },
343                "facility" => "local4",
344                "hostname" => "mymachine.example.com",
345                "message" => "An application event log entry...",
346                "msgid" => "ID47",
347                "severity" => "notice",
348                "timestamp" => Utc.with_ymd_and_hms(2003, 10, 11, 22, 14, 15).unwrap().with_nanosecond(3_000_000).unwrap(),
349                "version" => 1
350            }),
351            tdef: TypeDef::object(inner_kind()).fallible(),
352        }
353
354        escapes_in_structured_data_slash {
355            args: func_args![value: r#"<165>1 2003-10-11T22:14:15.003Z mymachine.example.com evntslog - ID47 [exampleSDID@32473 key="hello a\\b"] An application event log entry..."#],
356            want: Ok(btreemap!{
357                "appname" => "evntslog",
358                "exampleSDID@32473" => btreemap! {
359                    "key" => r"hello a\b",
360                },
361                "facility" => "local4",
362                "hostname" => "mymachine.example.com",
363                "message" => "An application event log entry...",
364                "msgid" => "ID47",
365                "severity" => "notice",
366                "timestamp" => Utc.with_ymd_and_hms(2003, 10, 11, 22, 14, 15).unwrap().with_nanosecond(3_000_000).unwrap(),
367                "version" => 1
368            }),
369            tdef: TypeDef::object(inner_kind()).fallible(),
370        }
371
372        escapes_in_structured_data_bracket {
373            args: func_args![value: r#"<165>1 2003-10-11T22:14:15.003Z mymachine.example.com evntslog - ID47 [exampleSDID@32473 key="hello [bye\]"] An application event log entry..."#],
374            want: Ok(btreemap!{
375                "appname" => "evntslog",
376                "exampleSDID@32473" => btreemap! {
377                    "key" => "hello [bye]",
378                },
379                "facility" => "local4",
380                "hostname" => "mymachine.example.com",
381                "message" => "An application event log entry...",
382                "msgid" => "ID47",
383                "severity" => "notice",
384                "timestamp" => Utc.with_ymd_and_hms(2003, 10, 11, 22, 14, 15).unwrap().with_nanosecond(3_000_000).unwrap(),
385                "version" => 1
386            }),
387            tdef: TypeDef::object(inner_kind()).fallible(),
388        }
389    ];
390}