1use crate::compiler::function::EnumVariant;
2use crate::compiler::prelude::*;
3use crate::value;
4use regex::Regex;
5use std::collections::BTreeMap;
6
7use super::log_util;
8use std::sync::LazyLock;
9
10static DEFAULT_TIMESTAMP_FORMAT_STR: &str = "%d/%b/%Y:%T %z";
11static DEFAULT_TIMESTAMP_FORMAT: LazyLock<Value> =
12 LazyLock::new(|| Value::Bytes(Bytes::from(DEFAULT_TIMESTAMP_FORMAT_STR)));
13
14static FORMAT_ENUM: &[EnumVariant] = &[
15 EnumVariant {
16 value: "combined",
17 description: "Nginx combined format",
18 },
19 EnumVariant {
20 value: "error",
21 description: "Default Nginx error format",
22 },
23 EnumVariant {
24 value: "ingress_upstreaminfo",
25 description: "Provides detailed upstream information (Nginx Ingress Controller)",
26 },
27 EnumVariant {
28 value: "main",
29 description: "Nginx main format used by Docker images",
30 },
31];
32
33static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
34 vec to use for encoding the timestamp. The time is parsed
47in local time if the timestamp doesn't specify a timezone. The default format is `%d/%b/%Y:%T %z` for
48combined logs and `%Y/%m/%d %H:%M:%S` for error logs.",
49 )
50 .default(&DEFAULT_TIMESTAMP_FORMAT),
51 ]
52});
53
54fn parse_nginx_log(
55 bytes: &Value,
56 timestamp_format: Option<Value>,
57 format: &Bytes,
58 ctx: &Context,
59) -> Resolved {
60 let message = bytes.try_bytes_utf8_lossy()?;
61 let timestamp_format = match timestamp_format {
62 None => time_format_for_format(format.as_ref()),
63 Some(timestamp_format) => timestamp_format.try_bytes_utf8_lossy()?.to_string(),
64 };
65 let regex = regex_for_format(format.as_ref());
66 let captures = regex.captures(&message).ok_or("failed parsing log line")?;
67 log_util::log_fields(regex, &captures, ×tamp_format, *ctx.timezone())
68 .map(rename_referrer)
69 .map_err(Into::into)
70}
71
72fn variants() -> Vec<Value> {
73 vec![
74 value!("combined"),
75 value!("error"),
76 value!("ingress_upstreaminfo"),
77 value!("main"),
78 ]
79}
80
81#[derive(Clone, Copy, Debug)]
82pub struct ParseNginxLog;
83
84impl Function for ParseNginxLog {
85 fn identifier(&self) -> &'static str {
86 "parse_nginx_log"
87 }
88
89 fn usage(&self) -> &'static str {
90 "Parses Nginx access and error log lines. Lines can be in [`combined`](https://nginx.org/en/docs/http/ngx_http_log_module.html), [`ingress_upstreaminfo`](https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/log-format/), [`main`](https://hg.nginx.org/pkg-oss/file/tip/debian/debian/nginx.conf) or [`error`](https://github.com/nginx/nginx/blob/branches/stable-1.18/src/core/ngx_log.c#L102) format."
91 }
92
93 fn category(&self) -> &'static str {
94 Category::Parse.as_ref()
95 }
96
97 fn internal_failure_reasons(&self) -> &'static [&'static str] {
98 &[
99 "`value` does not match the specified format.",
100 "`timestamp_format` is not a valid format string.",
101 "The timestamp in `value` fails to parse using the provided `timestamp_format`.",
102 ]
103 }
104
105 fn return_kind(&self) -> u16 {
106 kind::OBJECT
107 }
108
109 fn notices(&self) -> &'static [&'static str] {
110 &[
111 indoc! {"
112 Missing information in the log message may be indicated by `-`. These fields are
113 omitted in the result.
114 "},
115 indoc! {"
116 In case of `ingress_upstreaminfo` format the following fields may be safely omitted
117 in the log message: `remote_addr`, `remote_user`, `http_referer`, `http_user_agent`,
118 `proxy_alternative_upstream_name`, `upstream_addr`, `upstream_response_length`,
119 `upstream_response_time`, `upstream_status`.
120 "},
121 ]
122 }
123
124 fn parameters(&self) -> &'static [Parameter] {
125 PARAMETERS.as_slice()
126 }
127
128 fn compile(
129 &self,
130 state: &state::TypeState,
131 _ctx: &mut FunctionCompileContext,
132 arguments: ArgumentList,
133 ) -> Compiled {
134 let value = arguments.required("value");
135 let format = arguments
136 .required_enum("format", &variants(), state)?
137 .try_bytes()
138 .expect("format not bytes");
139
140 let timestamp_format = arguments.optional("timestamp_format");
141
142 Ok(ParseNginxLogFn {
143 value,
144 format,
145 timestamp_format,
146 }
147 .as_expr())
148 }
149
150 fn examples(&self) -> &'static [Example] {
151 &[
152 example! {
153 title: "Parse via Nginx log format (combined)",
154 source: indoc! {r#"
155 parse_nginx_log!(
156 s'172.17.0.1 - alice [01/Apr/2021:12:02:31 +0000] "POST /not-found HTTP/1.1" 404 153 "http://localhost/somewhere" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75"',
157 "combined",
158 )
159 "#},
160 result: Ok(indoc! {r#"{
161 "agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36",
162 "client": "172.17.0.1",
163 "compression": "2.75",
164 "referer": "http://localhost/somewhere",
165 "request": "POST /not-found HTTP/1.1",
166 "size": 153,
167 "status": 404,
168 "timestamp": "2021-04-01T12:02:31Z",
169 "user": "alice"
170 }"#}),
171 },
172 example! {
173 title: "Parse via Nginx log format (error)",
174 source: indoc! {r#"
175 parse_nginx_log!(
176 s'2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081"',
177 "error"
178 )
179 "#},
180 result: Ok(indoc! {r#"{
181 "cid": 1,
182 "client": "172.17.0.1",
183 "host": "localhost:8081",
184 "message": "open() \"/usr/share/nginx/html/not-found\" failed (2: No such file or directory)",
185 "pid": 31,
186 "request": "POST /not-found HTTP/1.1",
187 "server": "localhost",
188 "severity": "error",
189 "tid": 31,
190 "timestamp": "2021-04-01T13:02:31Z"
191 }"#}),
192 },
193 example! {
194 title: "Parse via Nginx log format (ingress_upstreaminfo)",
195 source: indoc! {r#"
196 parse_nginx_log!(
197 s'0.0.0.0 - bob [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [some-other-upstream-5000] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7',
198 "ingress_upstreaminfo"
199 )
200 "#},
201 result: Ok(indoc! {r#"{
202 "body_bytes_size": 12312,
203 "http_referer": "https://10.0.0.1/some/referer",
204 "http_user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
205 "proxy_alternative_upstream_name": "some-other-upstream-5000",
206 "proxy_upstream_name": "some-upstream-service-9000",
207 "remote_addr": "0.0.0.0",
208 "remote_user": "bob",
209 "req_id": "752178adb17130b291aefd8c386279e7",
210 "request": "GET /some/path HTTP/2.0",
211 "request_length": 462,
212 "request_time": 0.05,
213 "status": 200,
214 "timestamp": "2023-03-18T15:00:00Z",
215 "upstream_addr": "10.0.50.80:9000",
216 "upstream_response_length": 19437,
217 "upstream_response_time": 0.049,
218 "upstream_status": 200
219 }"#}),
220 },
221 example! {
222 title: "Parse via Nginx log format (main)",
223 source: indoc! {r#"
224 parse_nginx_log!(
225 s'172.24.0.3 - alice [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "https://domain.tld/path" "curl/8.11.1" "1.2.3.4, 10.10.1.1"',
226 "main"
227 )
228 "#},
229 result: Ok(indoc! {r#"{
230 "body_bytes_size": 615,
231 "http_referer": "https://domain.tld/path",
232 "http_user_agent": "curl/8.11.1",
233 "http_x_forwarded_for": "1.2.3.4, 10.10.1.1",
234 "remote_addr": "172.24.0.3",
235 "remote_user": "alice",
236 "request": "GET / HTTP/1.1",
237 "status": 200,
238 "timestamp": "2024-12-31T17:32:06Z"
239 }"#}),
240 },
241 ]
242 }
243}
244
245fn regex_for_format(format: &[u8]) -> &Regex {
246 match format {
247 b"combined" => &log_util::REGEX_NGINX_COMBINED_LOG,
248 b"ingress_upstreaminfo" => &log_util::REGEX_INGRESS_NGINX_UPSTREAMINFO_LOG,
249 b"main" => &log_util::REGEX_NGINX_MAIN_LOG,
250 b"error" => &log_util::REGEX_NGINX_ERROR_LOG,
251 _ => unreachable!(),
252 }
253}
254
255fn time_format_for_format(format: &[u8]) -> String {
256 match format {
257 b"combined" | b"ingress_upstreaminfo" | b"main" => DEFAULT_TIMESTAMP_FORMAT_STR.to_owned(),
258 b"error" => "%Y/%m/%d %H:%M:%S".to_owned(),
259 _ => unreachable!(),
260 }
261}
262
263fn rename_referrer(mut value: Value) -> Value {
264 if let Some(obj) = value.as_object_mut()
265 && let Some(referer) = obj.remove("referrer")
266 {
267 obj.insert("referer".into(), referer);
268 }
269 value
270}
271
272#[derive(Debug, Clone)]
273struct ParseNginxLogFn {
274 value: Box<dyn Expression>,
275 format: Bytes,
276 timestamp_format: Option<Box<dyn Expression>>,
277}
278
279impl FunctionExpression for ParseNginxLogFn {
280 fn resolve(&self, ctx: &mut Context) -> Resolved {
281 let bytes = self.value.resolve(ctx)?;
282 let timestamp_format = self
283 .timestamp_format
284 .as_ref()
285 .map(|expr| expr.resolve(ctx))
286 .transpose()?;
287 let format = &self.format;
288
289 parse_nginx_log(&bytes, timestamp_format, format, ctx)
290 }
291
292 fn type_def(&self, _: &state::TypeState) -> TypeDef {
293 TypeDef::object(match self.format.as_ref() {
294 b"combined" => kind_combined(),
295 b"ingress_upstreaminfo" => kind_ingress_upstreaminfo(),
296 b"main" => kind_main(),
297 b"error" => kind_error(),
298 _ => unreachable!(),
299 })
300 .fallible()
301 }
302}
303
304fn kind_combined() -> BTreeMap<Field, Kind> {
305 BTreeMap::from([
306 ("client".into(), Kind::bytes()),
307 ("user".into(), Kind::bytes().or_null()),
308 ("timestamp".into(), Kind::timestamp()),
309 ("request".into(), Kind::bytes()),
310 ("status".into(), Kind::integer()),
311 ("size".into(), Kind::integer()),
312 ("referer".into(), Kind::bytes().or_null()),
313 ("agent".into(), Kind::bytes().or_null()),
314 ("compression".into(), Kind::bytes().or_null()),
315 ])
316}
317
318fn kind_ingress_upstreaminfo() -> BTreeMap<Field, Kind> {
319 BTreeMap::from([
320 ("remote_addr".into(), Kind::bytes().or_undefined()),
321 ("remote_user".into(), Kind::bytes().or_undefined()),
322 ("timestamp".into(), Kind::timestamp()),
323 ("request".into(), Kind::bytes()),
324 ("status".into(), Kind::integer()),
325 ("body_bytes_size".into(), Kind::integer()),
326 ("http_referer".into(), Kind::bytes().or_undefined()),
327 ("http_user_agent".into(), Kind::bytes().or_undefined()),
328 ("request_length".into(), Kind::integer()),
329 ("request_time".into(), Kind::float()),
330 ("proxy_upstream_name".into(), Kind::bytes()),
331 (
332 "proxy_alternative_upstream_name".into(),
333 Kind::bytes().or_undefined(),
334 ),
335 ("upstream_addr".into(), Kind::bytes()),
336 ("upstream_response_length".into(), Kind::integer()),
337 ("upstream_response_time".into(), Kind::float()),
338 ("upstream_status".into(), Kind::integer()),
339 ("req_id".into(), Kind::bytes()),
340 ])
341}
342
343fn kind_main() -> BTreeMap<Field, Kind> {
344 BTreeMap::from([
345 ("remote_addr".into(), Kind::bytes().or_undefined()),
346 ("remote_user".into(), Kind::bytes().or_undefined()),
347 ("timestamp".into(), Kind::timestamp()),
348 ("request".into(), Kind::bytes()),
349 ("status".into(), Kind::integer()),
350 ("body_bytes_size".into(), Kind::integer()),
351 ("http_referer".into(), Kind::bytes().or_undefined()),
352 ("http_user_agent".into(), Kind::bytes().or_undefined()),
353 ("http_x_forwarded_for".into(), Kind::bytes().or_undefined()),
354 ])
355}
356
357fn kind_error() -> BTreeMap<Field, Kind> {
358 BTreeMap::from([
359 ("timestamp".into(), Kind::timestamp()),
360 ("severity".into(), Kind::bytes()),
361 ("pid".into(), Kind::integer()),
362 ("tid".into(), Kind::integer()),
363 ("cid".into(), Kind::integer()),
364 ("message".into(), Kind::bytes()),
365 ("excess".into(), Kind::float().or_null()),
366 ("zone".into(), Kind::bytes().or_null()),
367 ("client".into(), Kind::bytes().or_null()),
368 ("server".into(), Kind::bytes().or_null()),
369 ("request".into(), Kind::bytes().or_null()),
370 ("upstream".into(), Kind::bytes().or_null()),
371 ("host".into(), Kind::bytes().or_null()),
372 ("port".into(), Kind::bytes().or_null()),
373 ])
374}
375
376#[cfg(test)]
377mod tests {
378 use crate::btreemap;
379 use chrono::prelude::*;
380
381 use super::*;
382
383 test_function![
384 parse_combined_log => ParseNginxLog;
385
386 combined_line_valid {
387 args: func_args![
388 value: r#"172.17.0.1 - - [31/Mar/2021:12:04:07 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.75.0" "-""#,
389 format: "combined"
390 ],
391 want: Ok(btreemap! {
392 "client" => "172.17.0.1",
393 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-03-31T12:04:07Z").unwrap().into()),
394 "referer" => "-",
395 "request" => "GET / HTTP/1.1",
396 "status" => 200,
397 "size" => 612,
398 "agent" => "curl/7.75.0",
399 }),
400 tdef: TypeDef::object(kind_combined()).fallible(),
401 }
402
403 combined_line_valid_no_compression {
404 args: func_args![
405 value: r#"0.0.0.0 - - [23/Apr/2021:14:59:24 +0000] "GET /my-path/manifest.json HTTP/1.1" 200 504 "https://my-url.com/my-path" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36""#,
406 format: "combined"
407 ],
408 want: Ok(btreemap! {
409 "client" => "0.0.0.0",
410 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-04-23T14:59:24Z").unwrap().into()),
411 "request" => "GET /my-path/manifest.json HTTP/1.1",
412 "status" => 200,
413 "size" => 504,
414 "referer" => "https://my-url.com/my-path",
415 "agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
416 }),
417 tdef: TypeDef::object(kind_combined()).fallible(),
418 }
419
420 combined_line_valid_empty_fields {
421 args: func_args![
422 value: r#"0.0.0.0 - - [04/Oct/2022:04:34:49 +0000] "" 400 0 "" """#,
423 format: "combined"
424 ],
425 want: Ok(btreemap! {
426 "client" => "0.0.0.0",
427 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-10-04T04:34:49Z").unwrap().into()),
428 "request" => "",
429 "status" => 400,
430 "size" => 0,
431 "referer" => "",
432 "agent" => "",
433 }),
434 tdef: TypeDef::object(kind_combined()).fallible(),
435 }
436
437 combined_line_valid_bot_request {
438 args: func_args![
439 value: r#"0.0.0.0 - - [04/Oct/2022:03:07:27 +0000] "]&\xDF\xBDV\xE7\xBB<\x10;\xA2b}\xDFM\x1D" 400 150 "-" "-""#,
440 format: "combined"
441 ],
442 want: Ok(btreemap! {
443 "client" => "0.0.0.0",
444 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-10-04T03:07:27Z").unwrap().into()),
445 "request" => r"]&\xDF\xBDV\xE7\xBB<\x10;\xA2b}\xDFM\x1D",
446 "status" => 400,
447 "size" => 150,
448 "referer" => "-",
449 "agent" => "-",
450 }),
451 tdef: TypeDef::object(kind_combined()).fallible(),
452 }
453
454 combined_line_valid_empty_referer {
455 args: func_args![
456 value: r#"0.0.0.0 - - [04/Oct/2022:03:07:27 +0000] "]&\xDF\xBDV\xE7\xBB<\x10;\xA2b}\xDFM\x1D" 400 150 "" "-""#,
457 format: "combined"
458 ],
459 want: Ok(btreemap! {
460 "client" => "0.0.0.0",
461 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-10-04T03:07:27Z").unwrap().into()),
462 "request" => r"]&\xDF\xBDV\xE7\xBB<\x10;\xA2b}\xDFM\x1D",
463 "status" => 400,
464 "size" => 150,
465 "referer" => "",
466 "agent" => "-",
467 }),
468 tdef: TypeDef::object(kind_combined()).fallible(),
469 }
470
471 combined_line_valid_all_fields {
472 args: func_args![
473 value: r#"172.17.0.1 - alice [01/Apr/2021:12:02:31 +0000] "POST /not-found HTTP/1.1" 404 153 "http://localhost/somewhere" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75""#,
474 format: "combined"
475 ],
476 want: Ok(btreemap! {
477 "client" => "172.17.0.1",
478 "user" => "alice",
479 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-04-01T12:02:31Z").unwrap().into()),
480 "request" => "POST /not-found HTTP/1.1",
481 "status" => 404,
482 "size" => 153,
483 "referer" => "http://localhost/somewhere",
484 "agent" => "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36",
485 "compression" => "2.75",
486 }),
487 tdef: TypeDef::object(kind_combined()).fallible(),
488 }
489
490 ingress_nginx_upstreaminfo_valid_without_optional_fields {
491 args: func_args![
492 value: r#"0.0.0.0 - - [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7"#,
493 format: "ingress_upstreaminfo"
494 ],
495 want: Ok(btreemap! {
496 "remote_addr" => "0.0.0.0",
497 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().into()),
498 "request" => "GET /some/path HTTP/2.0",
499 "status" => 200,
500 "body_bytes_size" => 12312,
501 "http_referer" => "https://10.0.0.1/some/referer",
502 "http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
503 "request_length" => 462,
504 "request_time" => 0.050,
505 "proxy_upstream_name" => "some-upstream-service-9000",
506 "upstream_addr" => "10.0.50.80:9000",
507 "upstream_response_length" => 19437,
508 "upstream_response_time" => 0.049,
509 "upstream_status" => 200,
510 "req_id" => "752178adb17130b291aefd8c386279e7",
511 }),
512 tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(),
513 }
514
515 ingress_nginx_upstreaminfo_valid_missing_upstream {
516 args: func_args![
517 value: r#"0.0.0.0 - - [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [] - - - - 752178adb17130b291aefd8c386279e7"#,
518 format: "ingress_upstreaminfo"
519 ],
520 want: Ok(btreemap! {
521 "remote_addr" => "0.0.0.0",
522 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().into()),
523 "request" => "GET /some/path HTTP/2.0",
524 "status" => 200,
525 "body_bytes_size" => 12312,
526 "http_referer" => "https://10.0.0.1/some/referer",
527 "http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
528 "request_length" => 462,
529 "request_time" => 0.050,
530 "upstream_addr" => "-",
531 "proxy_upstream_name" => "some-upstream-service-9000",
532 "req_id" => "752178adb17130b291aefd8c386279e7",
533 }),
534 tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(),
535 }
536
537 ingress_nginx_upstreaminfo_valid_empty_referer {
538 args: func_args![
539 value: r#"0.0.0.0 - - [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [] - - - - 752178adb17130b291aefd8c386279e7"#,
540 format: "ingress_upstreaminfo"
541 ],
542 want: Ok(btreemap! {
543 "remote_addr" => "0.0.0.0",
544 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().into()),
545 "request" => "GET /some/path HTTP/2.0",
546 "status" => 200,
547 "body_bytes_size" => 12312,
548 "http_referer" => "",
549 "http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
550 "request_length" => 462,
551 "request_time" => 0.050,
552 "upstream_addr" => "-",
553 "proxy_upstream_name" => "some-upstream-service-9000",
554 "req_id" => "752178adb17130b291aefd8c386279e7",
555 }),
556 tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(),
557 }
558
559 ingress_nginx_upstreaminfo_valid_all_fields {
560 args: func_args![
561 value: r#"0.0.0.0 - bob [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [some-other-upstream-5000] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7"#,
562 format: "ingress_upstreaminfo"
563 ],
564 want: Ok(btreemap! {
565 "remote_addr" => "0.0.0.0",
566 "remote_user" => "bob",
567 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().into()),
568 "request" => "GET /some/path HTTP/2.0",
569 "status" => 200,
570 "body_bytes_size" => 12312,
571 "http_referer" => "https://10.0.0.1/some/referer",
572 "http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
573 "request_length" => 462,
574 "request_time" => 0.050,
575 "proxy_upstream_name" => "some-upstream-service-9000",
576 "proxy_alternative_upstream_name" => "some-other-upstream-5000",
577 "upstream_addr" => "10.0.50.80:9000",
578 "upstream_response_length" => 19437,
579 "upstream_response_time" => 0.049,
580 "upstream_status" => 200,
581 "req_id" => "752178adb17130b291aefd8c386279e7",
582 }),
583 tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(),
584 }
585
586 main_line_valid_no_proxy {
587 args: func_args![
588 value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "-""#,
589 format: "main"
590 ],
591 want: Ok(btreemap! {
592 "remote_addr" => "172.24.0.3",
593 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().into()),
594 "request" => "GET / HTTP/1.1",
595 "status" => 200,
596 "body_bytes_size" => 615,
597 "http_user_agent" => "curl/8.11.1",
598 }),
599 tdef: TypeDef::object(kind_main()).fallible(),
600 }
601
602 main_line_valid_single_proxy {
603 args: func_args![
604 value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "172.24.0.1""#,
605 format: "main"
606 ],
607 want: Ok(btreemap! {
608 "remote_addr" => "172.24.0.3",
609 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().into()),
610 "request" => "GET / HTTP/1.1",
611 "status" => 200,
612 "body_bytes_size" => 615,
613 "http_user_agent" => "curl/8.11.1",
614 "http_x_forwarded_for" => "172.24.0.1",
615 }),
616 tdef: TypeDef::object(kind_main()).fallible(),
617 }
618
619 main_line_valid_two_proxies {
620 args: func_args![
621 value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "1.2.3.4, 10.10.1.1""#,
622 format: "main"
623 ],
624 want: Ok(btreemap! {
625 "remote_addr" => "172.24.0.3",
626 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().into()),
627 "request" => "GET / HTTP/1.1",
628 "status" => 200,
629 "body_bytes_size" => 615,
630 "http_user_agent" => "curl/8.11.1",
631 "http_x_forwarded_for" => "1.2.3.4, 10.10.1.1",
632 }),
633 tdef: TypeDef::object(kind_main()).fallible(),
634 }
635
636 main_line_valid_all_fields {
637 args: func_args![
638 value: r#"172.24.0.2 - alice [03/Jan/2025:16:42:58 +0000] "GET / HTTP/1.1" 200 615 "http://domain.tld/path" "curl/8.11.1" "1.2.3.4, 10.10.1.1""#,
639 format: "main"
640 ],
641 want: Ok(btreemap! {
642 "remote_addr" => "172.24.0.2",
643 "remote_user" => "alice",
644 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2025-01-03T16:42:58Z").unwrap().into()),
645 "request" => "GET / HTTP/1.1",
646 "status" => 200,
647 "body_bytes_size" => 615,
648 "http_referer" => "http://domain.tld/path",
649 "http_user_agent" => "curl/8.11.1",
650 "http_x_forwarded_for" => "1.2.3.4, 10.10.1.1",
651 }),
652 tdef: TypeDef::object(kind_main()).fallible(),
653 }
654
655 main_line_invalid {
656 args: func_args![
657 value: r#"2025/01/03 16:41:26 [error] 31#31: *3 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 172.24.0.2, server: localhost, request: "GET /favicon.ico HTTP/1.1", host: "localhost:4080", referrer: "http://localhost:4080/""#,
658 format: "main"
659 ],
660 want: Err("failed parsing log line"),
661 tdef: TypeDef::object(kind_main()).fallible(),
662 }
663
664 error_line_valid {
665 args: func_args![
666 value: r#"2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081""#,
667 format: "error"
668 ],
669 want: Ok(btreemap! {
670 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-04-01T13:02:31Z").unwrap().into()),
671 "severity" => "error",
672 "pid" => 31,
673 "tid" => 31,
674 "cid" => 1,
675 "message" => "open() \"/usr/share/nginx/html/not-found\" failed (2: No such file or directory)",
676 "client" => "172.17.0.1",
677 "server" => "localhost",
678 "request" => "POST /not-found HTTP/1.1",
679 "host" => "localhost:8081",
680 }),
681 tdef: TypeDef::object(kind_error()).fallible(),
682 }
683
684 error_line_with_referrer {
685 args: func_args![
686 value: r#"2021/06/03 09:30:50 [error] 32#32: *6 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 10.244.0.0, server: localhost, request: "GET /favicon.ico HTTP/1.1", host: "65.21.190.83:31256", referrer: "http://65.21.190.83:31256/""#,
687 format: "error"
688 ],
689 want: Ok(btreemap! {
690 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-06-03T09:30:50Z").unwrap().into()),
691 "severity" => "error",
692 "pid" => 32,
693 "tid" => 32,
694 "cid" => 6,
695 "message" => "open() \"/usr/share/nginx/html/favicon.ico\" failed (2: No such file or directory)",
696 "client" => "10.244.0.0",
697 "server" => "localhost",
698 "request" => "GET /favicon.ico HTTP/1.1",
699 "host" => "65.21.190.83:31256",
700 "referer" => "http://65.21.190.83:31256/",
701 }),
702 tdef: TypeDef::object(kind_error()).fallible(),
703 }
704
705 error_line_with_empty_referrer {
706 args: func_args![
707 value: r#"2021/06/03 09:30:50 [error] 32#32: *6 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 10.244.0.0, server: localhost, request: "GET /favicon.ico HTTP/1.1", host: "65.21.190.83:31256", referrer: """#,
708 format: "error"
709 ],
710 want: Ok(btreemap! {
711 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-06-03T09:30:50Z").unwrap().into()),
712 "severity" => "error",
713 "pid" => 32,
714 "tid" => 32,
715 "cid" => 6,
716 "message" => "open() \"/usr/share/nginx/html/favicon.ico\" failed (2: No such file or directory)",
717 "client" => "10.244.0.0",
718 "server" => "localhost",
719 "request" => "GET /favicon.ico HTTP/1.1",
720 "host" => "65.21.190.83:31256",
721 "referer" => "",
722 }),
723 tdef: TypeDef::object(kind_error()).fallible(),
724 }
725
726 error_line_starting {
727 args: func_args![
728 value: "2021/06/17 19:25:59 [notice] 133309#133309: signal process started",
729 format: "error"
730 ],
731 want: Ok(btreemap! {
732 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-06-17T19:25:59Z").unwrap().into()),
733 "severity" => "notice",
734 "pid" => 133_309,
735 "tid" => 133_309,
736 "message" => "signal process started",
737 }),
738 tdef: TypeDef::object(kind_error()).fallible(),
739 }
740
741 error_line_with_empty_values {
742 args: func_args![
743 value: r#"2023/09/08 13:50:28 [warn] 3#3: *531 an upstream response is buffered to a temporary file /var/lib/nginx/tmp/fastcgi/6/03/0000000036 while reading upstream, client: 10.224.1.1, server: , request: "GET / HTTP/1.1", upstream: "fastcgi://127.0.0.1:9000", host: "", referrer: """#,
744 format: "error"
745 ],
746 want: Ok(btreemap! {
747 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-09-08T13:50:28Z").unwrap().into()),
748 "severity" => "warn",
749 "pid" => 3,
750 "tid" => 3,
751 "cid" => 531,
752 "message" => "an upstream response is buffered to a temporary file /var/lib/nginx/tmp/fastcgi/6/03/0000000036 while reading upstream",
753 "client" => "10.224.1.1",
754 "server" => "",
755 "request" => "GET / HTTP/1.1",
756 "upstream" => "fastcgi://127.0.0.1:9000",
757 "host" => "",
758 "referer" => "",
759 }),
760 tdef: TypeDef::object(kind_error()).fallible(),
761 }
762
763 error_line_with_upstream {
764 args: func_args![
765 value: r#"2022/04/15 08:16:13 [error] 7164#7164: *20 connect() failed (113: No route to host) while connecting to upstream, client: 10.244.0.0, server: test.local, request: "GET / HTTP/2.0", upstream: "http://127.0.0.1:80/""#,
766 format: "error"
767 ],
768 want: Ok(btreemap! {
769 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-04-15T08:16:13Z").unwrap().into()),
770 "severity" => "error",
771 "pid" => 7164,
772 "tid" => 7164,
773 "cid" => 20,
774 "message" => "connect() failed (113: No route to host) while connecting to upstream",
775 "client" => "10.244.0.0",
776 "server" => "test.local",
777 "request" => "GET / HTTP/2.0",
778 "upstream" => "http://127.0.0.1:80/",
779 }),
780 tdef: TypeDef::object(kind_error()).fallible(),
781 }
782
783 error_rate_limit {
784 args: func_args![
785 value: r#"2022/05/30 20:56:22 [error] 7164#7164: *38068741 limiting requests, excess: 50.416 by zone "api_access_token", client: 10.244.0.0, server: test.local, request: "GET / HTTP/2.0", host: "127.0.0.1:8080""#,
786 format: "error"
787 ],
788 want: Ok(btreemap! {
789 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-05-30T20:56:22Z").unwrap().into()),
790 "severity" => "error",
791 "pid" => 7164,
792 "tid" => 7164,
793 "cid" => 38_068_741,
794 "message" => "limiting requests",
795 "excess" => 50.416,
796 "zone" => "api_access_token",
797 "client" => "10.244.0.0",
798 "server" => "test.local",
799 "request" => "GET / HTTP/2.0",
800 "host" => "127.0.0.1:8080",
801 }),
802 tdef: TypeDef::object(kind_error()).fallible(),
803 }
804
805 error_rate_delaying {
806 args: func_args![
807 value: r#"2022/05/30 20:56:22 [error] 7164#7164: *38068741 delaying requests, excess: 50.416, by zone "api_access_token", client: 10.244.0.0, server: test.local, request: "GET / HTTP/2.0", host: "127.0.0.1:8080""#,
808 format: "error"
809 ],
810 want: Ok(btreemap! {
811 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-05-30T20:56:22Z").unwrap().into()),
812 "severity" => "error",
813 "pid" => 7164,
814 "tid" => 7164,
815 "cid" => 38_068_741,
816 "message" => "delaying requests",
817 "excess" => 50.416,
818 "zone" => "api_access_token",
819 "client" => "10.244.0.0",
820 "server" => "test.local",
821 "request" => "GET / HTTP/2.0",
822 "host" => "127.0.0.1:8080",
823 }),
824 tdef: TypeDef::object(kind_error()).fallible(),
825 }
826
827 error_message_with_comma {
828 args: func_args![
829 value: r#"2022/05/30 20:56:22 [info] 3134#0: *99247 epoll_wait() reported that client prematurely closed connection, so upstream connection is closed too (104: Connection reset by peer) while reading upstream, client: 10.244.0.0, server: example.org, request: "GET / HTTP/1.1", upstream: "fastcgi://unix:/run/php-fpm/php8.3-fpm.sock:", host: "example:8080""#,
830 format: "error"
831 ],
832 want: Ok(btreemap! {
833 "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-05-30T20:56:22Z").unwrap().into()),
834 "severity" => "info",
835 "pid" => 3134,
836 "tid" => 0,
837 "cid" => 99_247,
838 "message" => "epoll_wait() reported that client prematurely closed connection, so upstream connection is closed too (104: Connection reset by peer) while reading upstream",
839 "client" => "10.244.0.0",
840 "server" => "example.org",
841 "request" => "GET / HTTP/1.1",
842 "host" => "example:8080",
843 "upstream" => "fastcgi://unix:/run/php-fpm/php8.3-fpm.sock:",
844 }),
845 tdef: TypeDef::object(kind_error()).fallible(),
846 }
847 ];
848}