1use crate::compiler::prelude::*;
2use chrono::{Datelike, NaiveDateTime, Utc};
3use regex::Regex;
4use std::collections::BTreeMap;
5use std::sync::LazyLock;
6
7fn parse_klog(bytes: Value) -> Resolved {
8 let bytes = bytes.try_bytes()?;
9 let message = String::from_utf8_lossy(&bytes);
10 let mut log = ObjectMap::new();
11 let captures = REGEX_KLOG
12 .captures(&message)
13 .ok_or("failed parsing klog message")?;
14 if let Some(level) = captures.name("level").map(|capture| capture.as_str()) {
15 let level = match level {
16 "I" => Ok("info"),
17 "W" => Ok("warning"),
18 "E" => Ok("error"),
19 "F" => Ok("fatal"),
20 _ => Err(format!(r#"unrecognized log level "{level}""#)),
21 }?;
22
23 log.insert("level".into(), Value::Bytes(level.to_owned().into()));
24 }
25 if let Some(timestamp) = captures.name("timestamp").map(|capture| capture.as_str()) {
26 let month = captures.name("month").map(|capture| capture.as_str());
27 let year = resolve_year(month);
28
29 match NaiveDateTime::parse_from_str(&format!("{year}{timestamp}"), "%Y%m%d %H:%M:%S%.f") {
30 Ok(naive_dt) => {
31 let utc_dt = naive_dt.and_utc();
32 log.insert("timestamp".into(), Value::Timestamp(utc_dt));
33 }
34 Err(e) => return Err(format!("failed parsing timestamp {timestamp}: {e}").into()),
35 }
36 }
37 if let Some(id) = captures.name("id").map(|capture| capture.as_str()) {
38 log.insert(
39 "id".into(),
40 Value::Integer(id.parse().map_err(|_| "failed parsing id")?),
41 );
42 }
43 if let Some(file) = captures.name("file").map(|capture| capture.as_str()) {
44 log.insert("file".into(), Value::Bytes(file.to_owned().into()));
45 }
46 if let Some(line) = captures.name("line").map(|capture| capture.as_str()) {
47 log.insert(
48 "line".into(),
49 Value::Integer(line.parse().map_err(|_| "failed parsing line")?),
50 );
51 }
52 if let Some(message) = captures.name("message").map(|capture| capture.as_str()) {
53 log.insert("message".into(), Value::Bytes(message.to_owned().into()));
54 }
55 Ok(log.into())
56}
57
58static REGEX_KLOG: LazyLock<Regex> = LazyLock::new(|| {
59 Regex::new(
60 r"(?x) # Ignore whitespace and comments in the regex expression.
61 ^\s* # Start with any number of whitespaces.
62 (?P<level>\w) # Match one word character (expecting `I`,`W`,`E` or `F`).
63 (?P<timestamp>(?P<month>\d{2})\d{2}\s\d{2}:\d{2}:\d{2}\.\d{6}) # Match MMDD hh:mm:ss.ffffff.
64 \s+ # Match one whitespace.
65 (?P<id>\d+) # Match at least one digit.
66 \s # Match one whitespace.
67 (?P<file>.+):(?P<line>\d+) # Match any character (greedily), ended by `:` and at least one digit.
68 \]\s # Match `]` and one whitespace.
69 (?P<message>.*?) # Match any characters (non-greedily).
70 \s*$ # Match any number of whitespaces to be stripped from the end.
71 ").expect("failed compiling regex for klog")
72});
73
74static EXAMPLES: LazyLock<Vec<Example>> = LazyLock::new(|| {
75 let result = Box::leak(
76 format!(
77 indoc! { r#"{{
78 "file": "klog.go",
79 "id": 28133,
80 "level": "info",
81 "line": 70,
82 "message": "hello from klog",
83 "timestamp": "{year}-05-05T17:59:40.692994Z"
84 }}"#},
85 year = Utc::now().year()
86 )
87 .into_boxed_str(),
88 );
89 vec![example! {
90 title: "Parse using klog",
91 source: r#"parse_klog!("I0505 17:59:40.692994 28133 klog.go:70] hello from klog")"#,
92 result: Ok(result),
93 }]
94});
95
96#[derive(Clone, Copy, Debug)]
97pub struct ParseKlog;
98
99impl Function for ParseKlog {
100 fn identifier(&self) -> &'static str {
101 "parse_klog"
102 }
103
104 fn usage(&self) -> &'static str {
105 "Parses the `value` using the [klog](https://github.com/kubernetes/klog) format used by Kubernetes components."
106 }
107
108 fn category(&self) -> &'static str {
109 Category::Parse.as_ref()
110 }
111
112 fn internal_failure_reasons(&self) -> &'static [&'static str] {
113 &["`value` does not match the `klog` format."]
114 }
115
116 fn return_kind(&self) -> u16 {
117 kind::OBJECT
118 }
119
120 fn notices(&self) -> &'static [&'static str] {
121 &[indoc! {"
122 This function resolves the year for messages. If the current month is January and the
123 provided month is December, it sets the year to the previous year. Otherwise, it sets
124 the year to the current year.
125 "}]
126 }
127
128 fn examples(&self) -> &'static [Example] {
129 EXAMPLES.as_slice()
130 }
131
132 fn compile(
133 &self,
134 _state: &state::TypeState,
135 _ctx: &mut FunctionCompileContext,
136 arguments: ArgumentList,
137 ) -> Compiled {
138 let value = arguments.required("value");
139
140 Ok(ParseKlogFn { value }.as_expr())
141 }
142
143 fn parameters(&self) -> &'static [Parameter] {
144 const PARAMETERS: &[Parameter] = &[Parameter::required(
145 "value",
146 kind::BYTES,
147 "The string to parse.",
148 )];
149 PARAMETERS
150 }
151}
152
153#[derive(Debug, Clone)]
154struct ParseKlogFn {
155 value: Box<dyn Expression>,
156}
157
158impl FunctionExpression for ParseKlogFn {
159 fn resolve(&self, ctx: &mut Context) -> Resolved {
160 let bytes = self.value.resolve(ctx)?;
161 parse_klog(bytes)
162 }
163
164 fn type_def(&self, _: &state::TypeState) -> TypeDef {
165 TypeDef::object(inner_kind()).fallible()
166 }
167}
168
169fn resolve_year(month: Option<&str>) -> i32 {
172 let now = Utc::now();
173 match (month, now.month()) {
174 (Some("12"), 1) => now.year() - 1,
175 (_, _) => now.year(),
176 }
177}
178
179fn inner_kind() -> BTreeMap<Field, Kind> {
180 BTreeMap::from([
181 ("level".into(), Kind::bytes()),
182 ("timestamp".into(), Kind::timestamp()),
183 ("id".into(), Kind::integer()),
184 ("file".into(), Kind::bytes()),
185 ("line".into(), Kind::integer()),
186 ("message".into(), Kind::bytes()),
187 ])
188}
189
190#[cfg(test)]
191mod tests {
192 use crate::btreemap;
193 use chrono::DateTime;
194
195 use super::*;
196
197 test_function![
198 parse_klog => ParseKlog;
199
200 log_line_valid {
201 args: func_args![value: "I0505 17:59:40.692994 28133 klog.go:70] hello from klog"],
202 want: Ok(btreemap! {
203 "level" => "info",
204 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339(&format!("{}-05-05T17:59:40.692994Z", Utc::now().year())).unwrap().into()),
205 "id" => 28133,
206 "file" => "klog.go",
207 "line" => 70,
208 "message" => "hello from klog",
209 }),
210 tdef: TypeDef::object(inner_kind()).fallible(),
211 }
212
213 log_line_valid_strip_whitespace {
214 args: func_args![value: "\n I0505 17:59:40.692994 28133 klog.go:70] hello from klog \n"],
215 want: Ok(btreemap! {
216 "level" => "info",
217 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339(&format!("{}-05-05T17:59:40.692994Z", Utc::now().year())).unwrap().into()),
218 "id" => 28133,
219 "file" => "klog.go",
220 "line" => 70,
221 "message" => "hello from klog",
222 }),
223 tdef: TypeDef::object(inner_kind()).fallible(),
224 }
225
226 log_line_invalid {
227 args: func_args![value: "not a klog line"],
228 want: Err("failed parsing klog message"),
229 tdef: TypeDef::object(inner_kind()).fallible(),
230 }
231
232 log_line_invalid_log_level {
233 args: func_args![value: "X0505 17:59:40.692994 28133 klog.go:70] hello from klog"],
234 want: Err(r#"unrecognized log level "X""#),
235 tdef: TypeDef::object(inner_kind()).fallible(),
236 }
237
238 log_line_invalid_timestamp {
239 args: func_args![value: "I0000 17:59:40.692994 28133 klog.go:70] hello from klog"],
240 want: Err("failed parsing timestamp 0000 17:59:40.692994: input is out of range"),
241 tdef: TypeDef::object(inner_kind()).fallible(),
242 }
243
244 log_line_invalid_id {
245 args: func_args![value: "I0505 17:59:40.692994 99999999999999999999999999999 klog.go:70] hello from klog"],
246 want: Err("failed parsing id"),
247 tdef: TypeDef::object(inner_kind()).fallible(),
248 }
249 ];
250}