vrl/stdlib/
parse_glog.rs

1use crate::compiler::prelude::*;
2use chrono::{NaiveDateTime, Utc, offset::TimeZone};
3use regex::Regex;
4use std::collections::BTreeMap;
5use std::sync::LazyLock;
6
7fn parse_glog(bytes: Value) -> Resolved {
8    let bytes = bytes.try_bytes()?;
9    let message = String::from_utf8_lossy(&bytes);
10    let mut log = ObjectMap::new();
11    let captures = REGEX_GLOG
12        .captures(&message)
13        .ok_or("failed parsing glog message")?;
14    if let Some(level) = captures.name("level").map(|capture| capture.as_str()) {
15        let level = match level {
16            "I" => Ok("info"),
17            "W" => Ok("warning"),
18            "E" => Ok("error"),
19            "F" => Ok("fatal"),
20            _ => Err(format!(r#"unrecognized log level "{level}""#)),
21        }?;
22
23        log.insert("level".into(), Value::Bytes(level.to_owned().into()));
24    }
25    if let Some(timestamp) = captures.name("timestamp").map(|capture| capture.as_str()) {
26        match NaiveDateTime::parse_from_str(timestamp, "%Y%m%d %H:%M:%S%.f") {
27            Ok(naive_dt) => {
28                let utc_dt = Utc.from_utc_datetime(&naive_dt);
29                log.insert("timestamp".into(), Value::Timestamp(utc_dt));
30            }
31            Err(e) => return Err(format!("failed parsing timestamp {timestamp}: {e}").into()),
32        }
33    }
34    if let Some(id) = captures.name("id").map(|capture| capture.as_str()) {
35        log.insert(
36            "id".into(),
37            Value::Integer(id.parse().map_err(|_| "failed parsing id")?),
38        );
39    }
40    if let Some(file) = captures.name("file").map(|capture| capture.as_str()) {
41        log.insert("file".into(), Value::Bytes(file.to_owned().into()));
42    }
43    if let Some(line) = captures.name("line").map(|capture| capture.as_str()) {
44        log.insert(
45            "line".into(),
46            Value::Integer(line.parse().map_err(|_| "failed parsing line")?),
47        );
48    }
49    if let Some(message) = captures.name("message").map(|capture| capture.as_str()) {
50        log.insert("message".into(), Value::Bytes(message.to_owned().into()));
51    }
52    Ok(log.into())
53}
54
55static REGEX_GLOG: LazyLock<Regex> = LazyLock::new(|| {
56    Regex::new(
57        r"(?x)                                                     # Ignore whitespace and comments in the regex expression.
58        ^\s*                                                        # Start with any number of whitespaces.
59        (?P<level>\w)                                               # Match one word character (expecting `I`,`W`,`E` or `F`).
60        (?P<timestamp>\d{4}\d{2}\d{2}\s\d{2}:\d{2}:\d{2}\.\d{6})    # Match YYYYMMDD hh:mm:ss.ffffff.
61        \s+                                                         # Match one or more whitespace.
62        (?P<id>\d+)                                                 # Match at least one digit.
63        \s                                                          # Match one whitespace.
64        (?P<file>.+):(?P<line>\d+)                                  # Match any character (greedily), ended by `:` and at least one digit.
65        \]\s                                                        # Match `]` and one whitespace.
66        (?P<message>.*?)                                            # Match any characters (non-greedily).
67        \s*$                                                        # Match any number of whitespaces to be stripped from the end.
68    ")
69                                                 .expect("failed compiling regex for glog")
70});
71
72#[derive(Clone, Copy, Debug)]
73pub struct ParseGlog;
74
75impl Function for ParseGlog {
76    fn identifier(&self) -> &'static str {
77        "parse_glog"
78    }
79
80    fn usage(&self) -> &'static str {
81        "Parses the `value` using the [glog (Google Logging Library)](https://github.com/google/glog) format."
82    }
83
84    fn category(&self) -> &'static str {
85        Category::Parse.as_ref()
86    }
87
88    fn internal_failure_reasons(&self) -> &'static [&'static str] {
89        &["`value` does not match the `glog` format."]
90    }
91
92    fn return_kind(&self) -> u16 {
93        kind::OBJECT
94    }
95
96    fn examples(&self) -> &'static [Example] {
97        &[example! {
98            title: "Parse using glog",
99            source: r#"parse_glog!("I20210131 14:48:54.411655 15520 main.c++:9] Hello world!")"#,
100            result: Ok(indoc! { r#"{
101                "file": "main.c++",
102                "id": 15520,
103                "level": "info",
104                "line": 9,
105                "message": "Hello world!",
106                "timestamp": "2021-01-31T14:48:54.411655Z"
107            }"#}),
108        }]
109    }
110
111    fn compile(
112        &self,
113        _state: &state::TypeState,
114        _ctx: &mut FunctionCompileContext,
115        arguments: ArgumentList,
116    ) -> Compiled {
117        let value = arguments.required("value");
118
119        Ok(ParseGlogFn { value }.as_expr())
120    }
121
122    fn parameters(&self) -> &'static [Parameter] {
123        const PARAMETERS: &[Parameter] = &[Parameter::required(
124            "value",
125            kind::BYTES,
126            "The string to parse.",
127        )];
128        PARAMETERS
129    }
130}
131
132#[derive(Debug, Clone)]
133struct ParseGlogFn {
134    value: Box<dyn Expression>,
135}
136
137impl FunctionExpression for ParseGlogFn {
138    fn resolve(&self, ctx: &mut Context) -> Resolved {
139        let bytes = self.value.resolve(ctx)?;
140        parse_glog(bytes)
141    }
142
143    fn type_def(&self, _: &state::TypeState) -> TypeDef {
144        TypeDef::object(inner_kind()).fallible()
145    }
146}
147
148fn inner_kind() -> BTreeMap<Field, Kind> {
149    BTreeMap::from([
150        ("level".into(), Kind::bytes()),
151        ("timestamp".into(), Kind::timestamp()),
152        ("id".into(), Kind::integer()),
153        ("file".into(), Kind::bytes()),
154        ("line".into(), Kind::integer()),
155        ("message".into(), Kind::bytes()),
156    ])
157}
158
159#[cfg(test)]
160mod tests {
161    use crate::btreemap;
162    use chrono::DateTime;
163
164    use super::*;
165
166    test_function![
167        parse_glog => ParseGlog;
168
169        log_line_valid {
170            args: func_args![value: "I20210131 14:48:54.411655 15520 main.c++:9] Hello world!"],
171            want: Ok(btreemap! {
172                "level" => "info",
173                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-01-31T14:48:54.411655Z").unwrap().into()),
174                "id" => 15520,
175                "file" => "main.c++",
176                "line" => 9,
177                "message" => "Hello world!",
178            }),
179            tdef: TypeDef::object(inner_kind()).fallible(),
180        }
181
182        log_line_valid_strip_whitespace {
183            args: func_args![value: "\n  I20210131 14:48:54.411655 15520 main.c++:9] Hello world!  \n"],
184            want: Ok(btreemap! {
185                "level" => "info",
186                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-01-31T14:48:54.411655Z").unwrap().into()),
187                "id" => 15520,
188                "file" => "main.c++",
189                "line" => 9,
190                "message" => "Hello world!",
191            }),
192            tdef: TypeDef::object(inner_kind()).fallible(),
193        }
194
195        log_line_padded_threadid {
196            args: func_args![value: "I20210131 14:48:54.411655    20 main.c++:9] Hello world!"],
197            want: Ok(btreemap! {
198                "level" => "info",
199                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-01-31T14:48:54.411655Z").unwrap().into()),
200                "id" => 20,
201                "file" => "main.c++",
202                "line" => 9,
203                "message" => "Hello world!",
204            }),
205            tdef: TypeDef::object(inner_kind()).fallible(),
206        }
207
208        log_line_invalid {
209            args: func_args![value: "not a glog line"],
210            want: Err("failed parsing glog message"),
211            tdef: TypeDef::object(inner_kind()).fallible(),
212        }
213
214        log_line_invalid_log_level {
215            args: func_args![value: "X20210131 14:48:54.411655 15520 main.c++:9] Hello world!"],
216            want: Err(r#"unrecognized log level "X""#),
217            tdef: TypeDef::object(inner_kind()).fallible(),
218        }
219
220        log_line_invalid_timestamp {
221            args: func_args![value: "I20210000 14:48:54.411655 15520 main.c++:9] Hello world!"],
222            want: Err("failed parsing timestamp 20210000 14:48:54.411655: input is out of range"),
223            tdef: TypeDef::object(inner_kind()).fallible(),
224        }
225
226        log_line_invalid_id {
227            args: func_args![value: "I20210131 14:48:54.411655 99999999999999999999999999999 main.c++:9] Hello world!"],
228            want: Err("failed parsing id"),
229            tdef: TypeDef::object(inner_kind()).fallible(),
230        }
231    ];
232}