vrl/stdlib/
parse_nginx_log.rs

1use crate::compiler::function::EnumVariant;
2use crate::compiler::prelude::*;
3use crate::value;
4use regex::Regex;
5use std::collections::BTreeMap;
6
7use super::log_util;
8use std::sync::LazyLock;
9
10static DEFAULT_TIMESTAMP_FORMAT_STR: &str = "%d/%b/%Y:%T %z";
11static DEFAULT_TIMESTAMP_FORMAT: LazyLock<Value> =
12    LazyLock::new(|| Value::Bytes(Bytes::from(DEFAULT_TIMESTAMP_FORMAT_STR)));
13
14static FORMAT_ENUM: &[EnumVariant] = &[
15    EnumVariant {
16        value: "combined",
17        description: "Nginx combined format",
18    },
19    EnumVariant {
20        value: "error",
21        description: "Default Nginx error format",
22    },
23    EnumVariant {
24        value: "ingress_upstreaminfo",
25        description: "Provides detailed upstream information (Nginx Ingress Controller)",
26    },
27    EnumVariant {
28        value: "main",
29        description: "Nginx main format used by Docker images",
30    },
31];
32
33static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
34    vec![
35        Parameter::required("value", kind::BYTES, "The string to parse."),
36        Parameter::required(
37            "format",
38            kind::BYTES,
39            "The format to use for parsing the log.",
40        )
41        .enum_variants(FORMAT_ENUM),
42        Parameter::optional(
43            "timestamp_format",
44            kind::BYTES,
45            "
46The [date/time format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html#specifiers) to use for encoding the timestamp. The time is parsed
47in local time if the timestamp doesn't specify a timezone. The default format is `%d/%b/%Y:%T %z` for
48combined logs and `%Y/%m/%d %H:%M:%S` for error logs.",
49        )
50        .default(&DEFAULT_TIMESTAMP_FORMAT),
51    ]
52});
53
54fn parse_nginx_log(
55    bytes: &Value,
56    timestamp_format: Option<Value>,
57    format: &Bytes,
58    ctx: &Context,
59) -> Resolved {
60    let message = bytes.try_bytes_utf8_lossy()?;
61    let timestamp_format = match timestamp_format {
62        None => time_format_for_format(format.as_ref()),
63        Some(timestamp_format) => timestamp_format.try_bytes_utf8_lossy()?.to_string(),
64    };
65    let regex = regex_for_format(format.as_ref());
66    let captures = regex.captures(&message).ok_or("failed parsing log line")?;
67    log_util::log_fields(regex, &captures, &timestamp_format, *ctx.timezone())
68        .map(rename_referrer)
69        .map_err(Into::into)
70}
71
72fn variants() -> Vec<Value> {
73    vec![
74        value!("combined"),
75        value!("error"),
76        value!("ingress_upstreaminfo"),
77        value!("main"),
78    ]
79}
80
81#[derive(Clone, Copy, Debug)]
82pub struct ParseNginxLog;
83
84impl Function for ParseNginxLog {
85    fn identifier(&self) -> &'static str {
86        "parse_nginx_log"
87    }
88
89    fn usage(&self) -> &'static str {
90        "Parses Nginx access and error log lines. Lines can be in [`combined`](https://nginx.org/en/docs/http/ngx_http_log_module.html), [`ingress_upstreaminfo`](https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/log-format/), [`main`](https://hg.nginx.org/pkg-oss/file/tip/debian/debian/nginx.conf) or [`error`](https://github.com/nginx/nginx/blob/branches/stable-1.18/src/core/ngx_log.c#L102) format."
91    }
92
93    fn category(&self) -> &'static str {
94        Category::Parse.as_ref()
95    }
96
97    fn internal_failure_reasons(&self) -> &'static [&'static str] {
98        &[
99            "`value` does not match the specified format.",
100            "`timestamp_format` is not a valid format string.",
101            "The timestamp in `value` fails to parse using the provided `timestamp_format`.",
102        ]
103    }
104
105    fn return_kind(&self) -> u16 {
106        kind::OBJECT
107    }
108
109    fn notices(&self) -> &'static [&'static str] {
110        &[
111            indoc! {"
112                Missing information in the log message may be indicated by `-`. These fields are
113                omitted in the result.
114            "},
115            indoc! {"
116                In case of `ingress_upstreaminfo` format the following fields may be safely omitted
117                in the log message: `remote_addr`, `remote_user`, `http_referer`, `http_user_agent`,
118                `proxy_alternative_upstream_name`, `upstream_addr`, `upstream_response_length`,
119                `upstream_response_time`, `upstream_status`.
120            "},
121        ]
122    }
123
124    fn parameters(&self) -> &'static [Parameter] {
125        PARAMETERS.as_slice()
126    }
127
128    fn compile(
129        &self,
130        state: &state::TypeState,
131        _ctx: &mut FunctionCompileContext,
132        arguments: ArgumentList,
133    ) -> Compiled {
134        let value = arguments.required("value");
135        let format = arguments
136            .required_enum("format", &variants(), state)?
137            .try_bytes()
138            .expect("format not bytes");
139
140        let timestamp_format = arguments.optional("timestamp_format");
141
142        Ok(ParseNginxLogFn {
143            value,
144            format,
145            timestamp_format,
146        }
147        .as_expr())
148    }
149
150    fn examples(&self) -> &'static [Example] {
151        &[
152            example! {
153                title: "Parse via Nginx log format (combined)",
154                source: indoc! {r#"
155                    parse_nginx_log!(
156                        s'172.17.0.1 - alice [01/Apr/2021:12:02:31 +0000] "POST /not-found HTTP/1.1" 404 153 "http://localhost/somewhere" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75"',
157                        "combined",
158                    )
159                "#},
160                result: Ok(indoc! {r#"{
161                    "agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36",
162                    "client": "172.17.0.1",
163                    "compression": "2.75",
164                    "referer": "http://localhost/somewhere",
165                    "request": "POST /not-found HTTP/1.1",
166                    "size": 153,
167                    "status": 404,
168                    "timestamp": "2021-04-01T12:02:31Z",
169                    "user": "alice"
170                }"#}),
171            },
172            example! {
173                title: "Parse via Nginx log format (error)",
174                source: indoc! {r#"
175                    parse_nginx_log!(
176                        s'2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081"',
177                        "error"
178                    )
179                "#},
180                result: Ok(indoc! {r#"{
181                    "cid": 1,
182                    "client": "172.17.0.1",
183                    "host": "localhost:8081",
184                    "message": "open() \"/usr/share/nginx/html/not-found\" failed (2: No such file or directory)",
185                    "pid": 31,
186                    "request": "POST /not-found HTTP/1.1",
187                    "server": "localhost",
188                    "severity": "error",
189                    "tid": 31,
190                    "timestamp": "2021-04-01T13:02:31Z"
191                }"#}),
192            },
193            example! {
194                title: "Parse via Nginx log format (ingress_upstreaminfo)",
195                source: indoc! {r#"
196                    parse_nginx_log!(
197                        s'0.0.0.0 - bob [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [some-other-upstream-5000] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7',
198                        "ingress_upstreaminfo"
199                    )
200                "#},
201                result: Ok(indoc! {r#"{
202                    "body_bytes_size": 12312,
203                    "http_referer": "https://10.0.0.1/some/referer",
204                    "http_user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
205                    "proxy_alternative_upstream_name": "some-other-upstream-5000",
206                    "proxy_upstream_name": "some-upstream-service-9000",
207                    "remote_addr": "0.0.0.0",
208                    "remote_user": "bob",
209                    "req_id": "752178adb17130b291aefd8c386279e7",
210                    "request": "GET /some/path HTTP/2.0",
211                    "request_length": 462,
212                    "request_time": 0.05,
213                    "status": 200,
214                    "timestamp": "2023-03-18T15:00:00Z",
215                    "upstream_addr": "10.0.50.80:9000",
216                    "upstream_response_length": 19437,
217                    "upstream_response_time": 0.049,
218                    "upstream_status": 200
219                }"#}),
220            },
221            example! {
222                title: "Parse via Nginx log format (main)",
223                source: indoc! {r#"
224                    parse_nginx_log!(
225                        s'172.24.0.3 - alice [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "https://domain.tld/path" "curl/8.11.1" "1.2.3.4, 10.10.1.1"',
226                        "main"
227                    )
228                "#},
229                result: Ok(indoc! {r#"{
230                    "body_bytes_size": 615,
231                    "http_referer": "https://domain.tld/path",
232                    "http_user_agent": "curl/8.11.1",
233                    "http_x_forwarded_for": "1.2.3.4, 10.10.1.1",
234                    "remote_addr": "172.24.0.3",
235                    "remote_user": "alice",
236                    "request": "GET / HTTP/1.1",
237                    "status": 200,
238                    "timestamp": "2024-12-31T17:32:06Z"
239                }"#}),
240            },
241        ]
242    }
243}
244
245fn regex_for_format(format: &[u8]) -> &Regex {
246    match format {
247        b"combined" => &log_util::REGEX_NGINX_COMBINED_LOG,
248        b"ingress_upstreaminfo" => &log_util::REGEX_INGRESS_NGINX_UPSTREAMINFO_LOG,
249        b"main" => &log_util::REGEX_NGINX_MAIN_LOG,
250        b"error" => &log_util::REGEX_NGINX_ERROR_LOG,
251        _ => unreachable!(),
252    }
253}
254
255fn time_format_for_format(format: &[u8]) -> String {
256    match format {
257        b"combined" | b"ingress_upstreaminfo" | b"main" => DEFAULT_TIMESTAMP_FORMAT_STR.to_owned(),
258        b"error" => "%Y/%m/%d %H:%M:%S".to_owned(),
259        _ => unreachable!(),
260    }
261}
262
263fn rename_referrer(mut value: Value) -> Value {
264    if let Some(obj) = value.as_object_mut()
265        && let Some(referer) = obj.remove("referrer")
266    {
267        obj.insert("referer".into(), referer);
268    }
269    value
270}
271
272#[derive(Debug, Clone)]
273struct ParseNginxLogFn {
274    value: Box<dyn Expression>,
275    format: Bytes,
276    timestamp_format: Option<Box<dyn Expression>>,
277}
278
279impl FunctionExpression for ParseNginxLogFn {
280    fn resolve(&self, ctx: &mut Context) -> Resolved {
281        let bytes = self.value.resolve(ctx)?;
282        let timestamp_format = self
283            .timestamp_format
284            .as_ref()
285            .map(|expr| expr.resolve(ctx))
286            .transpose()?;
287        let format = &self.format;
288
289        parse_nginx_log(&bytes, timestamp_format, format, ctx)
290    }
291
292    fn type_def(&self, _: &state::TypeState) -> TypeDef {
293        TypeDef::object(match self.format.as_ref() {
294            b"combined" => kind_combined(),
295            b"ingress_upstreaminfo" => kind_ingress_upstreaminfo(),
296            b"main" => kind_main(),
297            b"error" => kind_error(),
298            _ => unreachable!(),
299        })
300        .fallible()
301    }
302}
303
304fn kind_combined() -> BTreeMap<Field, Kind> {
305    BTreeMap::from([
306        ("client".into(), Kind::bytes()),
307        ("user".into(), Kind::bytes().or_null()),
308        ("timestamp".into(), Kind::timestamp()),
309        ("request".into(), Kind::bytes()),
310        ("status".into(), Kind::integer()),
311        ("size".into(), Kind::integer()),
312        ("referer".into(), Kind::bytes().or_null()),
313        ("agent".into(), Kind::bytes().or_null()),
314        ("compression".into(), Kind::bytes().or_null()),
315    ])
316}
317
318fn kind_ingress_upstreaminfo() -> BTreeMap<Field, Kind> {
319    BTreeMap::from([
320        ("remote_addr".into(), Kind::bytes().or_undefined()),
321        ("remote_user".into(), Kind::bytes().or_undefined()),
322        ("timestamp".into(), Kind::timestamp()),
323        ("request".into(), Kind::bytes()),
324        ("status".into(), Kind::integer()),
325        ("body_bytes_size".into(), Kind::integer()),
326        ("http_referer".into(), Kind::bytes().or_undefined()),
327        ("http_user_agent".into(), Kind::bytes().or_undefined()),
328        ("request_length".into(), Kind::integer()),
329        ("request_time".into(), Kind::float()),
330        ("proxy_upstream_name".into(), Kind::bytes()),
331        (
332            "proxy_alternative_upstream_name".into(),
333            Kind::bytes().or_undefined(),
334        ),
335        ("upstream_addr".into(), Kind::bytes()),
336        ("upstream_response_length".into(), Kind::integer()),
337        ("upstream_response_time".into(), Kind::float()),
338        ("upstream_status".into(), Kind::integer()),
339        ("req_id".into(), Kind::bytes()),
340    ])
341}
342
343fn kind_main() -> BTreeMap<Field, Kind> {
344    BTreeMap::from([
345        ("remote_addr".into(), Kind::bytes().or_undefined()),
346        ("remote_user".into(), Kind::bytes().or_undefined()),
347        ("timestamp".into(), Kind::timestamp()),
348        ("request".into(), Kind::bytes()),
349        ("status".into(), Kind::integer()),
350        ("body_bytes_size".into(), Kind::integer()),
351        ("http_referer".into(), Kind::bytes().or_undefined()),
352        ("http_user_agent".into(), Kind::bytes().or_undefined()),
353        ("http_x_forwarded_for".into(), Kind::bytes().or_undefined()),
354    ])
355}
356
357fn kind_error() -> BTreeMap<Field, Kind> {
358    BTreeMap::from([
359        ("timestamp".into(), Kind::timestamp()),
360        ("severity".into(), Kind::bytes()),
361        ("pid".into(), Kind::integer()),
362        ("tid".into(), Kind::integer()),
363        ("cid".into(), Kind::integer()),
364        ("message".into(), Kind::bytes()),
365        ("excess".into(), Kind::float().or_null()),
366        ("zone".into(), Kind::bytes().or_null()),
367        ("client".into(), Kind::bytes().or_null()),
368        ("server".into(), Kind::bytes().or_null()),
369        ("request".into(), Kind::bytes().or_null()),
370        ("upstream".into(), Kind::bytes().or_null()),
371        ("host".into(), Kind::bytes().or_null()),
372        ("port".into(), Kind::bytes().or_null()),
373    ])
374}
375
376#[cfg(test)]
377mod tests {
378    use crate::btreemap;
379    use chrono::prelude::*;
380
381    use super::*;
382
383    test_function![
384        parse_combined_log => ParseNginxLog;
385
386        combined_line_valid {
387            args: func_args![
388                value: r#"172.17.0.1 - - [31/Mar/2021:12:04:07 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.75.0" "-""#,
389                format: "combined"
390            ],
391            want: Ok(btreemap! {
392                "client" => "172.17.0.1",
393                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-03-31T12:04:07Z").unwrap().into()),
394                "referer" => "-",
395                "request" => "GET / HTTP/1.1",
396                "status" => 200,
397                "size" => 612,
398                "agent" => "curl/7.75.0",
399            }),
400            tdef: TypeDef::object(kind_combined()).fallible(),
401        }
402
403        combined_line_valid_no_compression {
404            args: func_args![
405                value: r#"0.0.0.0 - - [23/Apr/2021:14:59:24 +0000] "GET /my-path/manifest.json HTTP/1.1" 200 504 "https://my-url.com/my-path" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36""#,
406                format: "combined"
407            ],
408            want: Ok(btreemap! {
409                "client" => "0.0.0.0",
410                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-04-23T14:59:24Z").unwrap().into()),
411                "request" => "GET /my-path/manifest.json HTTP/1.1",
412                "status" => 200,
413                "size" => 504,
414                "referer" => "https://my-url.com/my-path",
415                "agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
416            }),
417            tdef: TypeDef::object(kind_combined()).fallible(),
418        }
419
420        combined_line_valid_empty_fields {
421            args: func_args![
422                value: r#"0.0.0.0 - - [04/Oct/2022:04:34:49 +0000] "" 400 0 "" """#,
423                format: "combined"
424            ],
425            want: Ok(btreemap! {
426                "client" => "0.0.0.0",
427                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-10-04T04:34:49Z").unwrap().into()),
428                "request" => "",
429                "status" => 400,
430                "size" => 0,
431                "referer" => "",
432                "agent" => "",
433            }),
434            tdef: TypeDef::object(kind_combined()).fallible(),
435        }
436
437        combined_line_valid_bot_request {
438            args: func_args![
439                value: r#"0.0.0.0 - - [04/Oct/2022:03:07:27 +0000] "]&\xDF\xBDV\xE7\xBB<\x10;\xA2b}\xDFM\x1D" 400 150 "-" "-""#,
440                format: "combined"
441            ],
442            want: Ok(btreemap! {
443                "client" => "0.0.0.0",
444                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-10-04T03:07:27Z").unwrap().into()),
445                "request" => r"]&\xDF\xBDV\xE7\xBB<\x10;\xA2b}\xDFM\x1D",
446                "status" => 400,
447                "size" => 150,
448                "referer" => "-",
449                "agent" => "-",
450            }),
451            tdef: TypeDef::object(kind_combined()).fallible(),
452        }
453
454        combined_line_valid_empty_referer {
455            args: func_args![
456                value: r#"0.0.0.0 - - [04/Oct/2022:03:07:27 +0000] "]&\xDF\xBDV\xE7\xBB<\x10;\xA2b}\xDFM\x1D" 400 150 "" "-""#,
457                format: "combined"
458            ],
459            want: Ok(btreemap! {
460                "client" => "0.0.0.0",
461                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-10-04T03:07:27Z").unwrap().into()),
462                "request" => r"]&\xDF\xBDV\xE7\xBB<\x10;\xA2b}\xDFM\x1D",
463                "status" => 400,
464                "size" => 150,
465                "referer" => "",
466                "agent" => "-",
467            }),
468            tdef: TypeDef::object(kind_combined()).fallible(),
469        }
470
471        combined_line_valid_all_fields {
472            args: func_args![
473                value: r#"172.17.0.1 - alice [01/Apr/2021:12:02:31 +0000] "POST /not-found HTTP/1.1" 404 153 "http://localhost/somewhere" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75""#,
474                format: "combined"
475            ],
476            want: Ok(btreemap! {
477                "client" => "172.17.0.1",
478                "user" => "alice",
479                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-04-01T12:02:31Z").unwrap().into()),
480                "request" => "POST /not-found HTTP/1.1",
481                "status" => 404,
482                "size" => 153,
483                "referer" => "http://localhost/somewhere",
484                "agent" => "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36",
485                "compression" => "2.75",
486            }),
487            tdef: TypeDef::object(kind_combined()).fallible(),
488        }
489
490        ingress_nginx_upstreaminfo_valid_without_optional_fields {
491            args: func_args![
492                value: r#"0.0.0.0 - - [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7"#,
493                format: "ingress_upstreaminfo"
494            ],
495            want: Ok(btreemap! {
496                "remote_addr" => "0.0.0.0",
497                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().into()),
498                "request" => "GET /some/path HTTP/2.0",
499                "status" => 200,
500                "body_bytes_size" => 12312,
501                "http_referer" => "https://10.0.0.1/some/referer",
502                "http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
503                "request_length" => 462,
504                "request_time" => 0.050,
505                "proxy_upstream_name" => "some-upstream-service-9000",
506                "upstream_addr" => "10.0.50.80:9000",
507                "upstream_response_length" => 19437,
508                "upstream_response_time" => 0.049,
509                "upstream_status" => 200,
510                "req_id" => "752178adb17130b291aefd8c386279e7",
511            }),
512            tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(),
513        }
514
515        ingress_nginx_upstreaminfo_valid_missing_upstream {
516            args: func_args![
517                value: r#"0.0.0.0 - - [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [] - - - - 752178adb17130b291aefd8c386279e7"#,
518                format: "ingress_upstreaminfo"
519            ],
520            want: Ok(btreemap! {
521                "remote_addr" => "0.0.0.0",
522                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().into()),
523                "request" => "GET /some/path HTTP/2.0",
524                "status" => 200,
525                "body_bytes_size" => 12312,
526                "http_referer" => "https://10.0.0.1/some/referer",
527                "http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
528                "request_length" => 462,
529                "request_time" => 0.050,
530                "upstream_addr" => "-",
531                "proxy_upstream_name" => "some-upstream-service-9000",
532                "req_id" => "752178adb17130b291aefd8c386279e7",
533            }),
534            tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(),
535        }
536
537        ingress_nginx_upstreaminfo_valid_empty_referer {
538            args: func_args![
539                value: r#"0.0.0.0 - - [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [] - - - - 752178adb17130b291aefd8c386279e7"#,
540                format: "ingress_upstreaminfo"
541            ],
542            want: Ok(btreemap! {
543                "remote_addr" => "0.0.0.0",
544                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().into()),
545                "request" => "GET /some/path HTTP/2.0",
546                "status" => 200,
547                "body_bytes_size" => 12312,
548                "http_referer" => "",
549                "http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
550                "request_length" => 462,
551                "request_time" => 0.050,
552                "upstream_addr" => "-",
553                "proxy_upstream_name" => "some-upstream-service-9000",
554                "req_id" => "752178adb17130b291aefd8c386279e7",
555            }),
556            tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(),
557        }
558
559        ingress_nginx_upstreaminfo_valid_all_fields {
560            args: func_args![
561                value: r#"0.0.0.0 - bob [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [some-other-upstream-5000] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7"#,
562                format: "ingress_upstreaminfo"
563            ],
564            want: Ok(btreemap! {
565                "remote_addr" => "0.0.0.0",
566                "remote_user" => "bob",
567                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().into()),
568                "request" => "GET /some/path HTTP/2.0",
569                "status" => 200,
570                "body_bytes_size" => 12312,
571                "http_referer" => "https://10.0.0.1/some/referer",
572                "http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
573                "request_length" => 462,
574                "request_time" => 0.050,
575                "proxy_upstream_name" => "some-upstream-service-9000",
576                "proxy_alternative_upstream_name" => "some-other-upstream-5000",
577                "upstream_addr" => "10.0.50.80:9000",
578                "upstream_response_length" => 19437,
579                "upstream_response_time" => 0.049,
580                "upstream_status" => 200,
581                "req_id" => "752178adb17130b291aefd8c386279e7",
582            }),
583            tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(),
584        }
585
586        main_line_valid_no_proxy {
587            args: func_args![
588                value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "-""#,
589                format: "main"
590            ],
591            want: Ok(btreemap! {
592                "remote_addr" => "172.24.0.3",
593                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().into()),
594                "request" => "GET / HTTP/1.1",
595                "status" => 200,
596                "body_bytes_size" => 615,
597                "http_user_agent" => "curl/8.11.1",
598            }),
599            tdef: TypeDef::object(kind_main()).fallible(),
600        }
601
602        main_line_valid_single_proxy {
603            args: func_args![
604                value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "172.24.0.1""#,
605                format: "main"
606            ],
607            want: Ok(btreemap! {
608                "remote_addr" => "172.24.0.3",
609                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().into()),
610                "request" => "GET / HTTP/1.1",
611                "status" => 200,
612                "body_bytes_size" => 615,
613                "http_user_agent" => "curl/8.11.1",
614                "http_x_forwarded_for" => "172.24.0.1",
615            }),
616            tdef: TypeDef::object(kind_main()).fallible(),
617        }
618
619        main_line_valid_two_proxies {
620            args: func_args![
621                value: r#"172.24.0.3 - - [31/Dec/2024:17:32:06 +0000] "GET / HTTP/1.1" 200 615 "-" "curl/8.11.1" "1.2.3.4, 10.10.1.1""#,
622                format: "main"
623            ],
624            want: Ok(btreemap! {
625                "remote_addr" => "172.24.0.3",
626                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2024-12-31T17:32:06Z").unwrap().into()),
627                "request" => "GET / HTTP/1.1",
628                "status" => 200,
629                "body_bytes_size" => 615,
630                "http_user_agent" => "curl/8.11.1",
631                "http_x_forwarded_for" => "1.2.3.4, 10.10.1.1",
632            }),
633            tdef: TypeDef::object(kind_main()).fallible(),
634        }
635
636        main_line_valid_all_fields {
637            args: func_args![
638                value: r#"172.24.0.2 - alice [03/Jan/2025:16:42:58 +0000] "GET / HTTP/1.1" 200 615 "http://domain.tld/path" "curl/8.11.1" "1.2.3.4, 10.10.1.1""#,
639                format: "main"
640            ],
641            want: Ok(btreemap! {
642                "remote_addr" => "172.24.0.2",
643                "remote_user" => "alice",
644                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2025-01-03T16:42:58Z").unwrap().into()),
645                "request" => "GET / HTTP/1.1",
646                "status" => 200,
647                "body_bytes_size" => 615,
648                "http_referer" => "http://domain.tld/path",
649                "http_user_agent" => "curl/8.11.1",
650                "http_x_forwarded_for" => "1.2.3.4, 10.10.1.1",
651            }),
652            tdef: TypeDef::object(kind_main()).fallible(),
653        }
654
655        main_line_invalid {
656            args: func_args![
657                value: r#"2025/01/03 16:41:26 [error] 31#31: *3 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 172.24.0.2, server: localhost, request: "GET /favicon.ico HTTP/1.1", host: "localhost:4080", referrer: "http://localhost:4080/""#,
658                format: "main"
659            ],
660            want: Err("failed parsing log line"),
661            tdef: TypeDef::object(kind_main()).fallible(),
662        }
663
664        error_line_valid {
665            args: func_args![
666                value: r#"2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081""#,
667                format: "error"
668            ],
669            want: Ok(btreemap! {
670                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-04-01T13:02:31Z").unwrap().into()),
671                "severity" => "error",
672                "pid" => 31,
673                "tid" => 31,
674                "cid" => 1,
675                "message" => "open() \"/usr/share/nginx/html/not-found\" failed (2: No such file or directory)",
676                "client" => "172.17.0.1",
677                "server" => "localhost",
678                "request" => "POST /not-found HTTP/1.1",
679                "host" => "localhost:8081",
680            }),
681            tdef: TypeDef::object(kind_error()).fallible(),
682        }
683
684        error_line_with_referrer {
685            args: func_args![
686                value: r#"2021/06/03 09:30:50 [error] 32#32: *6 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 10.244.0.0, server: localhost, request: "GET /favicon.ico HTTP/1.1", host: "65.21.190.83:31256", referrer: "http://65.21.190.83:31256/""#,
687                format: "error"
688            ],
689            want: Ok(btreemap! {
690                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-06-03T09:30:50Z").unwrap().into()),
691                "severity" => "error",
692                "pid" => 32,
693                "tid" => 32,
694                "cid" => 6,
695                "message" => "open() \"/usr/share/nginx/html/favicon.ico\" failed (2: No such file or directory)",
696                "client" => "10.244.0.0",
697                "server" => "localhost",
698                "request" => "GET /favicon.ico HTTP/1.1",
699                "host" => "65.21.190.83:31256",
700                "referer" => "http://65.21.190.83:31256/",
701            }),
702            tdef: TypeDef::object(kind_error()).fallible(),
703        }
704
705        error_line_with_empty_referrer {
706            args: func_args![
707                value: r#"2021/06/03 09:30:50 [error] 32#32: *6 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 10.244.0.0, server: localhost, request: "GET /favicon.ico HTTP/1.1", host: "65.21.190.83:31256", referrer: """#,
708                format: "error"
709            ],
710            want: Ok(btreemap! {
711                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-06-03T09:30:50Z").unwrap().into()),
712                "severity" => "error",
713                "pid" => 32,
714                "tid" => 32,
715                "cid" => 6,
716                "message" => "open() \"/usr/share/nginx/html/favicon.ico\" failed (2: No such file or directory)",
717                "client" => "10.244.0.0",
718                "server" => "localhost",
719                "request" => "GET /favicon.ico HTTP/1.1",
720                "host" => "65.21.190.83:31256",
721                "referer" => "",
722            }),
723            tdef: TypeDef::object(kind_error()).fallible(),
724        }
725
726        error_line_starting {
727            args: func_args![
728                value: "2021/06/17 19:25:59 [notice] 133309#133309: signal process started",
729                format: "error"
730            ],
731            want: Ok(btreemap! {
732                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2021-06-17T19:25:59Z").unwrap().into()),
733                "severity" => "notice",
734                "pid" => 133_309,
735                "tid" => 133_309,
736                "message" => "signal process started",
737            }),
738            tdef: TypeDef::object(kind_error()).fallible(),
739        }
740
741        error_line_with_empty_values {
742            args: func_args![
743                value: r#"2023/09/08 13:50:28 [warn] 3#3: *531 an upstream response is buffered to a temporary file /var/lib/nginx/tmp/fastcgi/6/03/0000000036 while reading upstream, client: 10.224.1.1, server: , request: "GET / HTTP/1.1", upstream: "fastcgi://127.0.0.1:9000", host: "", referrer: """#,
744                format: "error"
745            ],
746            want: Ok(btreemap! {
747                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-09-08T13:50:28Z").unwrap().into()),
748                "severity" => "warn",
749                "pid" => 3,
750                "tid" => 3,
751                "cid" => 531,
752                "message" => "an upstream response is buffered to a temporary file /var/lib/nginx/tmp/fastcgi/6/03/0000000036 while reading upstream",
753                "client" => "10.224.1.1",
754                "server" => "",
755                "request" => "GET / HTTP/1.1",
756                "upstream" => "fastcgi://127.0.0.1:9000",
757                "host" => "",
758                "referer" => "",
759            }),
760            tdef: TypeDef::object(kind_error()).fallible(),
761        }
762
763        error_line_with_upstream {
764            args: func_args![
765                value: r#"2022/04/15 08:16:13 [error] 7164#7164: *20 connect() failed (113: No route to host) while connecting to upstream, client: 10.244.0.0, server: test.local, request: "GET / HTTP/2.0", upstream: "http://127.0.0.1:80/""#,
766                format: "error"
767            ],
768            want: Ok(btreemap! {
769                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-04-15T08:16:13Z").unwrap().into()),
770                "severity" => "error",
771                "pid" => 7164,
772                "tid" => 7164,
773                "cid" => 20,
774                "message" => "connect() failed (113: No route to host) while connecting to upstream",
775                "client" => "10.244.0.0",
776                "server" => "test.local",
777                "request" => "GET / HTTP/2.0",
778                "upstream" => "http://127.0.0.1:80/",
779            }),
780            tdef: TypeDef::object(kind_error()).fallible(),
781        }
782
783        error_rate_limit {
784            args: func_args![
785                value: r#"2022/05/30 20:56:22 [error] 7164#7164: *38068741 limiting requests, excess: 50.416 by zone "api_access_token", client: 10.244.0.0, server: test.local, request: "GET / HTTP/2.0", host: "127.0.0.1:8080""#,
786                format: "error"
787            ],
788            want: Ok(btreemap! {
789                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-05-30T20:56:22Z").unwrap().into()),
790                "severity" => "error",
791                "pid" => 7164,
792                "tid" => 7164,
793                "cid" => 38_068_741,
794                "message" => "limiting requests",
795                "excess" => 50.416,
796                "zone" => "api_access_token",
797                "client" => "10.244.0.0",
798                "server" => "test.local",
799                "request" => "GET / HTTP/2.0",
800                "host" => "127.0.0.1:8080",
801            }),
802            tdef: TypeDef::object(kind_error()).fallible(),
803        }
804
805        error_rate_delaying {
806            args: func_args![
807                value: r#"2022/05/30 20:56:22 [error] 7164#7164: *38068741 delaying requests, excess: 50.416, by zone "api_access_token", client: 10.244.0.0, server: test.local, request: "GET / HTTP/2.0", host: "127.0.0.1:8080""#,
808                format: "error"
809            ],
810            want: Ok(btreemap! {
811                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-05-30T20:56:22Z").unwrap().into()),
812                "severity" => "error",
813                "pid" => 7164,
814                "tid" => 7164,
815                "cid" => 38_068_741,
816                "message" => "delaying requests",
817                "excess" => 50.416,
818                "zone" => "api_access_token",
819                "client" => "10.244.0.0",
820                "server" => "test.local",
821                "request" => "GET / HTTP/2.0",
822                "host" => "127.0.0.1:8080",
823            }),
824            tdef: TypeDef::object(kind_error()).fallible(),
825        }
826
827        error_message_with_comma {
828            args: func_args![
829                value: r#"2022/05/30 20:56:22 [info] 3134#0: *99247 epoll_wait() reported that client prematurely closed connection, so upstream connection is closed too (104: Connection reset by peer) while reading upstream, client: 10.244.0.0, server: example.org, request: "GET / HTTP/1.1", upstream: "fastcgi://unix:/run/php-fpm/php8.3-fpm.sock:", host: "example:8080""#,
830                format: "error"
831            ],
832            want: Ok(btreemap! {
833                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2022-05-30T20:56:22Z").unwrap().into()),
834                "severity" => "info",
835                "pid" => 3134,
836                "tid" => 0,
837                "cid" => 99_247,
838                "message" => "epoll_wait() reported that client prematurely closed connection, so upstream connection is closed too (104: Connection reset by peer) while reading upstream",
839                "client" => "10.244.0.0",
840                "server" => "example.org",
841                "request" => "GET / HTTP/1.1",
842                "host" => "example:8080",
843                "upstream" => "fastcgi://unix:/run/php-fpm/php8.3-fpm.sock:",
844            }),
845            tdef: TypeDef::object(kind_error()).fallible(),
846        }
847    ];
848}