1use super::log_util;
2use crate::compiler::prelude::*;
3use std::collections::BTreeMap;
4use std::sync::LazyLock;
5
6static DEFAULT_TIMESTAMP_FORMAT: LazyLock<Value> =
7 LazyLock::new(|| Value::Bytes(Bytes::from("%d/%b/%Y:%T %z")));
8
9static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
10 vec to use for
13encoding the timestamp.")
14 .default(&DEFAULT_TIMESTAMP_FORMAT),
15 ]
16});
17
18fn parse_common_log(bytes: &Value, timestamp_format: &Value, ctx: &Context) -> Resolved {
19 let message = bytes.try_bytes_utf8_lossy()?;
20 let timestamp_format = timestamp_format.try_bytes_utf8_lossy()?.to_string();
21
22 log_util::parse_message(
23 &log_util::REGEX_APACHE_COMMON_LOG,
24 &message,
25 ×tamp_format,
26 *ctx.timezone(),
27 "common",
28 )
29 .map_err(Into::into)
30}
31
32#[derive(Clone, Copy, Debug)]
33pub struct ParseCommonLog;
34
35impl Function for ParseCommonLog {
36 fn identifier(&self) -> &'static str {
37 "parse_common_log"
38 }
39
40 fn usage(&self) -> &'static str {
41 "Parses the `value` using the [Common Log Format](https://httpd.apache.org/docs/current/logs.html#common) (CLF)."
42 }
43
44 fn category(&self) -> &'static str {
45 Category::Parse.as_ref()
46 }
47
48 fn internal_failure_reasons(&self) -> &'static [&'static str] {
49 &[
50 "`value` does not match the Common Log Format.",
51 "`timestamp_format` is not a valid format string.",
52 "The timestamp in `value` fails to parse using the provided `timestamp_format`.",
53 ]
54 }
55
56 fn return_kind(&self) -> u16 {
57 kind::OBJECT
58 }
59
60 fn notices(&self) -> &'static [&'static str] {
61 &[
62 "Missing information in the log message may be indicated by `-`. These fields are omitted in the result.",
63 ]
64 }
65
66 fn parameters(&self) -> &'static [Parameter] {
67 PARAMETERS.as_slice()
68 }
69
70 fn compile(
71 &self,
72 _state: &state::TypeState,
73 _ctx: &mut FunctionCompileContext,
74 arguments: ArgumentList,
75 ) -> Compiled {
76 let value = arguments.required("value");
77 let timestamp_format = arguments.optional("timestamp_format");
78
79 Ok(ParseCommonLogFn {
80 value,
81 timestamp_format,
82 }
83 .as_expr())
84 }
85
86 fn examples(&self) -> &'static [Example] {
87 &[
88 example! {
89 title: "Parse using Common Log Format (with default timestamp format)",
90 source: r#"parse_common_log!(s'127.0.0.1 bob frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326')"#,
91 result: Ok(indoc! {
92 r#"{
93 "host":"127.0.0.1",
94 "identity":"bob",
95 "message":"GET /apache_pb.gif HTTP/1.0",
96 "method":"GET",
97 "path":"/apache_pb.gif",
98 "protocol":"HTTP/1.0",
99 "size":2326,
100 "status":200,
101 "timestamp":"2000-10-10T20:55:36Z",
102 "user":"frank"
103 }"#
104 }),
105 },
106 example! {
107 title: "Parse using Common Log Format (with custom timestamp format)",
108 source: indoc! {r#"
109 parse_common_log!(
110 s'127.0.0.1 bob frank [2000-10-10T20:55:36Z] "GET /apache_pb.gif HTTP/1.0" 200 2326',
111 "%+"
112 )
113 "#},
114 result: Ok(indoc! {
115 r#"{
116 "host":"127.0.0.1",
117 "identity":"bob",
118 "message":"GET /apache_pb.gif HTTP/1.0",
119 "method":"GET",
120 "path":"/apache_pb.gif",
121 "protocol":"HTTP/1.0",
122 "size":2326,
123 "status":200,
124 "timestamp":"2000-10-10T20:55:36Z",
125 "user":"frank"
126 }"#
127 }),
128 },
129 ]
130 }
131}
132
133#[derive(Debug, Clone)]
134struct ParseCommonLogFn {
135 value: Box<dyn Expression>,
136 timestamp_format: Option<Box<dyn Expression>>,
137}
138
139impl FunctionExpression for ParseCommonLogFn {
140 fn resolve(&self, ctx: &mut Context) -> Resolved {
141 let bytes = self.value.resolve(ctx)?;
142 let timestamp_format = self
143 .timestamp_format
144 .map_resolve_with_default(ctx, || DEFAULT_TIMESTAMP_FORMAT.clone())?;
145
146 parse_common_log(&bytes, ×tamp_format, ctx)
147 }
148
149 fn type_def(&self, _: &state::TypeState) -> TypeDef {
150 TypeDef::object(inner_kind()).fallible()
151 }
152}
153
154fn inner_kind() -> BTreeMap<Field, Kind> {
155 BTreeMap::from([
156 (Field::from("host"), Kind::bytes() | Kind::null()),
157 (Field::from("identity"), Kind::bytes() | Kind::null()),
158 (Field::from("user"), Kind::bytes() | Kind::null()),
159 (Field::from("timestamp"), Kind::timestamp() | Kind::null()),
160 (Field::from("message"), Kind::bytes() | Kind::null()),
161 (Field::from("method"), Kind::bytes() | Kind::null()),
162 (Field::from("path"), Kind::bytes() | Kind::null()),
163 (Field::from("protocol"), Kind::bytes() | Kind::null()),
164 (Field::from("status"), Kind::integer() | Kind::null()),
165 (Field::from("size"), Kind::integer() | Kind::null()),
166 ])
167}
168
169#[cfg(test)]
170mod tests {
171 use crate::btreemap;
172 use chrono::prelude::*;
173
174 use super::*;
175
176 test_function![
177 parse_common_log => ParseCommonLog;
178
179 log_line_valid {
180 args: func_args![value: r#"127.0.0.1 bob frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326"#],
181 want: Ok(btreemap! {
182 "host" => "127.0.0.1",
183 "identity" => "bob",
184 "user" => "frank",
185 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2000-10-10T20:55:36Z").unwrap().into()),
186 "message" => "GET /apache_pb.gif HTTP/1.0",
187 "method" => "GET",
188 "path" => "/apache_pb.gif",
189 "protocol" => "HTTP/1.0",
190 "status" => 200,
191 "size" => 2326,
192 }),
193 tdef: TypeDef::object(inner_kind()).fallible(),
194 }
195
196 log_line_valid_empty {
197 args: func_args![value: "- - - - - - -"],
198 want: Ok(BTreeMap::new()),
199 tdef: TypeDef::object(inner_kind()).fallible(),
200 }
201
202 log_line_valid_empty_variant {
203 args: func_args![value: r#"- - - [-] "-" - -"#],
204 want: Ok(BTreeMap::new()),
205 tdef: TypeDef::object(inner_kind()).fallible(),
206 }
207
208 log_line_valid_with_timestamp_format {
209 args: func_args![value: r#"- - - [2000-10-10T20:55:36Z] "-" - -"#,
210 timestamp_format: "%+",
211 ],
212 want: Ok(btreemap! {
213 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2000-10-10T20:55:36Z").unwrap().into()),
214 }),
215 tdef: TypeDef::object(inner_kind()).fallible(),
216 }
217
218 log_line_invalid {
219 args: func_args![value: "not a common log line"],
220 want: Err("failed parsing common log line"),
221 tdef: TypeDef::object(inner_kind()).fallible(),
222 }
223
224 log_line_invalid_timestamp {
225 args: func_args![value: "- - - [1234] - - -"],
226 want: Err("failed parsing timestamp 1234 using format %d/%b/%Y:%T %z: input contains invalid characters"),
227 tdef: TypeDef::object(inner_kind()).fallible(),
228 }
229 ];
230}