vrl/stdlib/
parse_klog.rs

1use crate::compiler::prelude::*;
2use chrono::{Datelike, NaiveDateTime, Utc};
3use regex::Regex;
4use std::collections::BTreeMap;
5use std::sync::LazyLock;
6
7fn parse_klog(bytes: Value) -> Resolved {
8    let bytes = bytes.try_bytes()?;
9    let message = String::from_utf8_lossy(&bytes);
10    let mut log = ObjectMap::new();
11    let captures = REGEX_KLOG
12        .captures(&message)
13        .ok_or("failed parsing klog message")?;
14    if let Some(level) = captures.name("level").map(|capture| capture.as_str()) {
15        let level = match level {
16            "I" => Ok("info"),
17            "W" => Ok("warning"),
18            "E" => Ok("error"),
19            "F" => Ok("fatal"),
20            _ => Err(format!(r#"unrecognized log level "{level}""#)),
21        }?;
22
23        log.insert("level".into(), Value::Bytes(level.to_owned().into()));
24    }
25    if let Some(timestamp) = captures.name("timestamp").map(|capture| capture.as_str()) {
26        let month = captures.name("month").map(|capture| capture.as_str());
27        let year = resolve_year(month);
28
29        match NaiveDateTime::parse_from_str(&format!("{year}{timestamp}"), "%Y%m%d %H:%M:%S%.f") {
30            Ok(naive_dt) => {
31                let utc_dt = naive_dt.and_utc();
32                log.insert("timestamp".into(), Value::Timestamp(utc_dt));
33            }
34            Err(e) => return Err(format!("failed parsing timestamp {timestamp}: {e}").into()),
35        }
36    }
37    if let Some(id) = captures.name("id").map(|capture| capture.as_str()) {
38        log.insert(
39            "id".into(),
40            Value::Integer(id.parse().map_err(|_| "failed parsing id")?),
41        );
42    }
43    if let Some(file) = captures.name("file").map(|capture| capture.as_str()) {
44        log.insert("file".into(), Value::Bytes(file.to_owned().into()));
45    }
46    if let Some(line) = captures.name("line").map(|capture| capture.as_str()) {
47        log.insert(
48            "line".into(),
49            Value::Integer(line.parse().map_err(|_| "failed parsing line")?),
50        );
51    }
52    if let Some(message) = captures.name("message").map(|capture| capture.as_str()) {
53        log.insert("message".into(), Value::Bytes(message.to_owned().into()));
54    }
55    Ok(log.into())
56}
57
58static REGEX_KLOG: LazyLock<Regex> = LazyLock::new(|| {
59    Regex::new(
60        r"(?x)                                                        # Ignore whitespace and comments in the regex expression.
61        ^\s*                                                           # Start with any number of whitespaces.
62        (?P<level>\w)                                                  # Match one word character (expecting `I`,`W`,`E` or `F`).
63        (?P<timestamp>(?P<month>\d{2})\d{2}\s\d{2}:\d{2}:\d{2}\.\d{6}) # Match MMDD hh:mm:ss.ffffff.
64        \s+                                                            # Match one whitespace.
65        (?P<id>\d+)                                                    # Match at least one digit.
66        \s                                                             # Match one whitespace.
67        (?P<file>.+):(?P<line>\d+)                                     # Match any character (greedily), ended by `:` and at least one digit.
68        \]\s                                                           # Match `]` and one whitespace.
69        (?P<message>.*?)                                               # Match any characters (non-greedily).
70        \s*$                                                           # Match any number of whitespaces to be stripped from the end.
71    ").expect("failed compiling regex for klog")
72});
73
74static EXAMPLES: LazyLock<Vec<Example>> = LazyLock::new(|| {
75    let result = Box::leak(
76        format!(
77            indoc! { r#"{{
78                "file": "klog.go",
79                "id": 28133,
80                "level": "info",
81                "line": 70,
82                "message": "hello from klog",
83                "timestamp": "{year}-05-05T17:59:40.692994Z"
84            }}"#},
85            year = Utc::now().year()
86        )
87        .into_boxed_str(),
88    );
89    vec![example! {
90        title: "Parse using klog",
91        source: r#"parse_klog!("I0505 17:59:40.692994   28133 klog.go:70] hello from klog")"#,
92        result: Ok(result),
93    }]
94});
95
96#[derive(Clone, Copy, Debug)]
97pub struct ParseKlog;
98
99impl Function for ParseKlog {
100    fn identifier(&self) -> &'static str {
101        "parse_klog"
102    }
103
104    fn usage(&self) -> &'static str {
105        "Parses the `value` using the [klog](https://github.com/kubernetes/klog) format used by Kubernetes components."
106    }
107
108    fn category(&self) -> &'static str {
109        Category::Parse.as_ref()
110    }
111
112    fn internal_failure_reasons(&self) -> &'static [&'static str] {
113        &["`value` does not match the `klog` format."]
114    }
115
116    fn return_kind(&self) -> u16 {
117        kind::OBJECT
118    }
119
120    fn notices(&self) -> &'static [&'static str] {
121        &[indoc! {"
122            This function resolves the year for messages. If the current month is January and the
123            provided month is December, it sets the year to the previous year. Otherwise, it sets
124            the year to the current year.
125        "}]
126    }
127
128    fn examples(&self) -> &'static [Example] {
129        EXAMPLES.as_slice()
130    }
131
132    fn compile(
133        &self,
134        _state: &state::TypeState,
135        _ctx: &mut FunctionCompileContext,
136        arguments: ArgumentList,
137    ) -> Compiled {
138        let value = arguments.required("value");
139
140        Ok(ParseKlogFn { value }.as_expr())
141    }
142
143    fn parameters(&self) -> &'static [Parameter] {
144        const PARAMETERS: &[Parameter] = &[Parameter::required(
145            "value",
146            kind::BYTES,
147            "The string to parse.",
148        )];
149        PARAMETERS
150    }
151}
152
153#[derive(Debug, Clone)]
154struct ParseKlogFn {
155    value: Box<dyn Expression>,
156}
157
158impl FunctionExpression for ParseKlogFn {
159    fn resolve(&self, ctx: &mut Context) -> Resolved {
160        let bytes = self.value.resolve(ctx)?;
161        parse_klog(bytes)
162    }
163
164    fn type_def(&self, _: &state::TypeState) -> TypeDef {
165        TypeDef::object(inner_kind()).fallible()
166    }
167}
168
169// same logic as our handling of RFC3164 syslog messages: since we don't know the year, we look at
170// the month to guess the year based on the current month
171fn resolve_year(month: Option<&str>) -> i32 {
172    let now = Utc::now();
173    match (month, now.month()) {
174        (Some("12"), 1) => now.year() - 1,
175        (_, _) => now.year(),
176    }
177}
178
179fn inner_kind() -> BTreeMap<Field, Kind> {
180    BTreeMap::from([
181        ("level".into(), Kind::bytes()),
182        ("timestamp".into(), Kind::timestamp()),
183        ("id".into(), Kind::integer()),
184        ("file".into(), Kind::bytes()),
185        ("line".into(), Kind::integer()),
186        ("message".into(), Kind::bytes()),
187    ])
188}
189
190#[cfg(test)]
191mod tests {
192    use crate::btreemap;
193    use chrono::DateTime;
194
195    use super::*;
196
197    test_function![
198        parse_klog => ParseKlog;
199
200        log_line_valid {
201            args: func_args![value: "I0505 17:59:40.692994   28133 klog.go:70] hello from klog"],
202            want: Ok(btreemap! {
203                "level" => "info",
204                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339(&format!("{}-05-05T17:59:40.692994Z", Utc::now().year())).unwrap().into()),
205                "id" => 28133,
206                "file" => "klog.go",
207                "line" => 70,
208                "message" => "hello from klog",
209            }),
210            tdef: TypeDef::object(inner_kind()).fallible(),
211        }
212
213        log_line_valid_strip_whitespace {
214            args: func_args![value: "\n     I0505 17:59:40.692994   28133 klog.go:70] hello from klog    \n"],
215            want: Ok(btreemap! {
216                "level" => "info",
217                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339(&format!("{}-05-05T17:59:40.692994Z", Utc::now().year())).unwrap().into()),
218                "id" => 28133,
219                "file" => "klog.go",
220                "line" => 70,
221                "message" => "hello from klog",
222            }),
223            tdef: TypeDef::object(inner_kind()).fallible(),
224        }
225
226        log_line_invalid {
227            args: func_args![value: "not a klog line"],
228            want: Err("failed parsing klog message"),
229            tdef: TypeDef::object(inner_kind()).fallible(),
230        }
231
232        log_line_invalid_log_level {
233            args: func_args![value: "X0505 17:59:40.692994   28133 klog.go:70] hello from klog"],
234            want: Err(r#"unrecognized log level "X""#),
235            tdef: TypeDef::object(inner_kind()).fallible(),
236        }
237
238        log_line_invalid_timestamp {
239            args: func_args![value: "I0000 17:59:40.692994   28133 klog.go:70] hello from klog"],
240            want: Err("failed parsing timestamp 0000 17:59:40.692994: input is out of range"),
241            tdef: TypeDef::object(inner_kind()).fallible(),
242        }
243
244        log_line_invalid_id {
245            args: func_args![value: "I0505 17:59:40.692994   99999999999999999999999999999 klog.go:70] hello from klog"],
246            want: Err("failed parsing id"),
247            tdef: TypeDef::object(inner_kind()).fallible(),
248        }
249    ];
250}