1use crate::compiler::prelude::*;
2use chrono::{NaiveDateTime, Utc, offset::TimeZone};
3use regex::Regex;
4use std::collections::BTreeMap;
5use std::sync::LazyLock;
6
7fn parse_glog(bytes: Value) -> Resolved {
8 let bytes = bytes.try_bytes()?;
9 let message = String::from_utf8_lossy(&bytes);
10 let mut log = ObjectMap::new();
11 let captures = REGEX_GLOG
12 .captures(&message)
13 .ok_or("failed parsing glog message")?;
14 if let Some(level) = captures.name("level").map(|capture| capture.as_str()) {
15 let level = match level {
16 "I" => Ok("info"),
17 "W" => Ok("warning"),
18 "E" => Ok("error"),
19 "F" => Ok("fatal"),
20 _ => Err(format!(r#"unrecognized log level "{level}""#)),
21 }?;
22
23 log.insert("level".into(), Value::Bytes(level.to_owned().into()));
24 }
25 if let Some(timestamp) = captures.name("timestamp").map(|capture| capture.as_str()) {
26 match NaiveDateTime::parse_from_str(timestamp, "%Y%m%d %H:%M:%S%.f") {
27 Ok(naive_dt) => {
28 let utc_dt = Utc.from_utc_datetime(&naive_dt);
29 log.insert("timestamp".into(), Value::Timestamp(utc_dt));
30 }
31 Err(e) => return Err(format!("failed parsing timestamp {timestamp}: {e}").into()),
32 }
33 }
34 if let Some(id) = captures.name("id").map(|capture| capture.as_str()) {
35 log.insert(
36 "id".into(),
37 Value::Integer(id.parse().map_err(|_| "failed parsing id")?),
38 );
39 }
40 if let Some(file) = captures.name("file").map(|capture| capture.as_str()) {
41 log.insert("file".into(), Value::Bytes(file.to_owned().into()));
42 }
43 if let Some(line) = captures.name("line").map(|capture| capture.as_str()) {
44 log.insert(
45 "line".into(),
46 Value::Integer(line.parse().map_err(|_| "failed parsing line")?),
47 );
48 }
49 if let Some(message) = captures.name("message").map(|capture| capture.as_str()) {
50 log.insert("message".into(), Value::Bytes(message.to_owned().into()));
51 }
52 Ok(log.into())
53}
54
55static REGEX_GLOG: LazyLock<Regex> = LazyLock::new(|| {
56 Regex::new(
57 r"(?x) # Ignore whitespace and comments in the regex expression.
58 ^\s* # Start with any number of whitespaces.
59 (?P<level>\w) # Match one word character (expecting `I`,`W`,`E` or `F`).
60 (?P<timestamp>\d{4}\d{2}\d{2}\s\d{2}:\d{2}:\d{2}\.\d{6}) # Match YYYYMMDD hh:mm:ss.ffffff.
61 \s+ # Match one or more whitespace.
62 (?P<id>\d+) # Match at least one digit.
63 \s # Match one whitespace.
64 (?P<file>.+):(?P<line>\d+) # Match any character (greedily), ended by `:` and at least one digit.
65 \]\s # Match `]` and one whitespace.
66 (?P<message>.*?) # Match any characters (non-greedily).
67 \s*$ # Match any number of whitespaces to be stripped from the end.
68 ")
69 .expect("failed compiling regex for glog")
70});
71
72#[derive(Clone, Copy, Debug)]
73pub struct ParseGlog;
74
75impl Function for ParseGlog {
76 fn identifier(&self) -> &'static str {
77 "parse_glog"
78 }
79
80 fn usage(&self) -> &'static str {
81 "Parses the `value` using the [glog (Google Logging Library)](https://github.com/google/glog) format."
82 }
83
84 fn category(&self) -> &'static str {
85 Category::Parse.as_ref()
86 }
87
88 fn internal_failure_reasons(&self) -> &'static [&'static str] {
89 &["`value` does not match the `glog` format."]
90 }
91
92 fn return_kind(&self) -> u16 {
93 kind::OBJECT
94 }
95
96 fn examples(&self) -> &'static [Example] {
97 &[example! {
98 title: "Parse using glog",
99 source: r#"parse_glog!("I20210131 14:48:54.411655 15520 main.c++:9] Hello world!")"#,
100 result: Ok(indoc! { r#"{
101 "file": "main.c++",
102 "id": 15520,
103 "level": "info",
104 "line": 9,
105 "message": "Hello world!",
106 "timestamp": "2021-01-31T14:48:54.411655Z"
107 }"#}),
108 }]
109 }
110
111 fn compile(
112 &self,
113 _state: &state::TypeState,
114 _ctx: &mut FunctionCompileContext,
115 arguments: ArgumentList,
116 ) -> Compiled {
117 let value = arguments.required("value");
118
119 Ok(ParseGlogFn { value }.as_expr())
120 }
121
122 fn parameters(&self) -> &'static [Parameter] {
123 const PARAMETERS: &[Parameter] = &[Parameter::required(
124 "value",
125 kind::BYTES,
126 "The string to parse.",
127 )];
128 PARAMETERS
129 }
130}
131
132#[derive(Debug, Clone)]
133struct ParseGlogFn {
134 value: Box<dyn Expression>,
135}
136
137impl FunctionExpression for ParseGlogFn {
138 fn resolve(&self, ctx: &mut Context) -> Resolved {
139 let bytes = self.value.resolve(ctx)?;
140 parse_glog(bytes)
141 }
142
143 fn type_def(&self, _: &state::TypeState) -> TypeDef {
144 TypeDef::object(inner_kind()).fallible()
145 }
146}
147
148fn inner_kind() -> BTreeMap<Field, Kind> {
149 BTreeMap::from([
150 ("level".into(), Kind::bytes()),
151 ("timestamp".into(), Kind::timestamp()),
152 ("id".into(), Kind::integer()),
153 ("file".into(), Kind::bytes()),
154 ("line".into(), Kind::integer()),
155 ("message".into(), Kind::bytes()),
156 ])
157}
158
159#[cfg(test)]
160mod tests {
161 use crate::btreemap;
162 use chrono::DateTime;
163
164 use super::*;
165
166 test_function![
167 parse_glog => ParseGlog;
168
169 log_line_valid {
170 args: func_args![value: "I20210131 14:48:54.411655 15520 main.c++:9] Hello world!"],
171 want: Ok(btreemap! {
172 "level" => "info",
173 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-01-31T14:48:54.411655Z").unwrap().into()),
174 "id" => 15520,
175 "file" => "main.c++",
176 "line" => 9,
177 "message" => "Hello world!",
178 }),
179 tdef: TypeDef::object(inner_kind()).fallible(),
180 }
181
182 log_line_valid_strip_whitespace {
183 args: func_args![value: "\n I20210131 14:48:54.411655 15520 main.c++:9] Hello world! \n"],
184 want: Ok(btreemap! {
185 "level" => "info",
186 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-01-31T14:48:54.411655Z").unwrap().into()),
187 "id" => 15520,
188 "file" => "main.c++",
189 "line" => 9,
190 "message" => "Hello world!",
191 }),
192 tdef: TypeDef::object(inner_kind()).fallible(),
193 }
194
195 log_line_padded_threadid {
196 args: func_args![value: "I20210131 14:48:54.411655 20 main.c++:9] Hello world!"],
197 want: Ok(btreemap! {
198 "level" => "info",
199 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-01-31T14:48:54.411655Z").unwrap().into()),
200 "id" => 20,
201 "file" => "main.c++",
202 "line" => 9,
203 "message" => "Hello world!",
204 }),
205 tdef: TypeDef::object(inner_kind()).fallible(),
206 }
207
208 log_line_invalid {
209 args: func_args![value: "not a glog line"],
210 want: Err("failed parsing glog message"),
211 tdef: TypeDef::object(inner_kind()).fallible(),
212 }
213
214 log_line_invalid_log_level {
215 args: func_args![value: "X20210131 14:48:54.411655 15520 main.c++:9] Hello world!"],
216 want: Err(r#"unrecognized log level "X""#),
217 tdef: TypeDef::object(inner_kind()).fallible(),
218 }
219
220 log_line_invalid_timestamp {
221 args: func_args![value: "I20210000 14:48:54.411655 15520 main.c++:9] Hello world!"],
222 want: Err("failed parsing timestamp 20210000 14:48:54.411655: input is out of range"),
223 tdef: TypeDef::object(inner_kind()).fallible(),
224 }
225
226 log_line_invalid_id {
227 args: func_args![value: "I20210131 14:48:54.411655 99999999999999999999999999999 main.c++:9] Hello world!"],
228 want: Err("failed parsing id"),
229 tdef: TypeDef::object(inner_kind()).fallible(),
230 }
231 ];
232}