vrl/stdlib/
parse_common_log.rs

1use super::log_util;
2use crate::compiler::prelude::*;
3use std::collections::BTreeMap;
4use std::sync::LazyLock;
5
6static DEFAULT_TIMESTAMP_FORMAT: LazyLock<Value> =
7    LazyLock::new(|| Value::Bytes(Bytes::from("%d/%b/%Y:%T %z")));
8
9static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
10    vec![
11        Parameter::required("value", kind::BYTES, "The string to parse."),
12        Parameter::optional("timestamp_format", kind::BYTES, "The [date/time format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) to use for
13encoding the timestamp.")
14            .default(&DEFAULT_TIMESTAMP_FORMAT),
15    ]
16});
17
18fn parse_common_log(bytes: &Value, timestamp_format: &Value, ctx: &Context) -> Resolved {
19    let message = bytes.try_bytes_utf8_lossy()?;
20    let timestamp_format = timestamp_format.try_bytes_utf8_lossy()?.to_string();
21
22    log_util::parse_message(
23        &log_util::REGEX_APACHE_COMMON_LOG,
24        &message,
25        &timestamp_format,
26        *ctx.timezone(),
27        "common",
28    )
29    .map_err(Into::into)
30}
31
32#[derive(Clone, Copy, Debug)]
33pub struct ParseCommonLog;
34
35impl Function for ParseCommonLog {
36    fn identifier(&self) -> &'static str {
37        "parse_common_log"
38    }
39
40    fn usage(&self) -> &'static str {
41        "Parses the `value` using the [Common Log Format](https://httpd.apache.org/docs/current/logs.html#common) (CLF)."
42    }
43
44    fn category(&self) -> &'static str {
45        Category::Parse.as_ref()
46    }
47
48    fn internal_failure_reasons(&self) -> &'static [&'static str] {
49        &[
50            "`value` does not match the Common Log Format.",
51            "`timestamp_format` is not a valid format string.",
52            "The timestamp in `value` fails to parse using the provided `timestamp_format`.",
53        ]
54    }
55
56    fn return_kind(&self) -> u16 {
57        kind::OBJECT
58    }
59
60    fn notices(&self) -> &'static [&'static str] {
61        &[
62            "Missing information in the log message may be indicated by `-`. These fields are omitted in the result.",
63        ]
64    }
65
66    fn parameters(&self) -> &'static [Parameter] {
67        PARAMETERS.as_slice()
68    }
69
70    fn compile(
71        &self,
72        _state: &state::TypeState,
73        _ctx: &mut FunctionCompileContext,
74        arguments: ArgumentList,
75    ) -> Compiled {
76        let value = arguments.required("value");
77        let timestamp_format = arguments.optional("timestamp_format");
78
79        Ok(ParseCommonLogFn {
80            value,
81            timestamp_format,
82        }
83        .as_expr())
84    }
85
86    fn examples(&self) -> &'static [Example] {
87        &[
88            example! {
89                title: "Parse using Common Log Format (with default timestamp format)",
90                source: r#"parse_common_log!(s'127.0.0.1 bob frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326')"#,
91                result: Ok(indoc! {
92                    r#"{
93                        "host":"127.0.0.1",
94                        "identity":"bob",
95                        "message":"GET /apache_pb.gif HTTP/1.0",
96                        "method":"GET",
97                        "path":"/apache_pb.gif",
98                        "protocol":"HTTP/1.0",
99                        "size":2326,
100                        "status":200,
101                        "timestamp":"2000-10-10T20:55:36Z",
102                        "user":"frank"
103                    }"#
104                }),
105            },
106            example! {
107                title: "Parse using Common Log Format (with custom timestamp format)",
108                source: indoc! {r#"
109                    parse_common_log!(
110                        s'127.0.0.1 bob frank [2000-10-10T20:55:36Z] "GET /apache_pb.gif HTTP/1.0" 200 2326',
111                        "%+"
112                    )
113                "#},
114                result: Ok(indoc! {
115                    r#"{
116                        "host":"127.0.0.1",
117                        "identity":"bob",
118                        "message":"GET /apache_pb.gif HTTP/1.0",
119                        "method":"GET",
120                        "path":"/apache_pb.gif",
121                        "protocol":"HTTP/1.0",
122                        "size":2326,
123                        "status":200,
124                        "timestamp":"2000-10-10T20:55:36Z",
125                        "user":"frank"
126                    }"#
127                }),
128            },
129        ]
130    }
131}
132
133#[derive(Debug, Clone)]
134struct ParseCommonLogFn {
135    value: Box<dyn Expression>,
136    timestamp_format: Option<Box<dyn Expression>>,
137}
138
139impl FunctionExpression for ParseCommonLogFn {
140    fn resolve(&self, ctx: &mut Context) -> Resolved {
141        let bytes = self.value.resolve(ctx)?;
142        let timestamp_format = self
143            .timestamp_format
144            .map_resolve_with_default(ctx, || DEFAULT_TIMESTAMP_FORMAT.clone())?;
145
146        parse_common_log(&bytes, &timestamp_format, ctx)
147    }
148
149    fn type_def(&self, _: &state::TypeState) -> TypeDef {
150        TypeDef::object(inner_kind()).fallible()
151    }
152}
153
154fn inner_kind() -> BTreeMap<Field, Kind> {
155    BTreeMap::from([
156        (Field::from("host"), Kind::bytes() | Kind::null()),
157        (Field::from("identity"), Kind::bytes() | Kind::null()),
158        (Field::from("user"), Kind::bytes() | Kind::null()),
159        (Field::from("timestamp"), Kind::timestamp() | Kind::null()),
160        (Field::from("message"), Kind::bytes() | Kind::null()),
161        (Field::from("method"), Kind::bytes() | Kind::null()),
162        (Field::from("path"), Kind::bytes() | Kind::null()),
163        (Field::from("protocol"), Kind::bytes() | Kind::null()),
164        (Field::from("status"), Kind::integer() | Kind::null()),
165        (Field::from("size"), Kind::integer() | Kind::null()),
166    ])
167}
168
169#[cfg(test)]
170mod tests {
171    use crate::btreemap;
172    use chrono::prelude::*;
173
174    use super::*;
175
176    test_function![
177        parse_common_log => ParseCommonLog;
178
179        log_line_valid {
180            args: func_args![value: r#"127.0.0.1 bob frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326"#],
181            want: Ok(btreemap! {
182                "host" => "127.0.0.1",
183                "identity" => "bob",
184                "user" => "frank",
185                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2000-10-10T20:55:36Z").unwrap().into()),
186                "message" => "GET /apache_pb.gif HTTP/1.0",
187                "method" => "GET",
188                "path" => "/apache_pb.gif",
189                "protocol" => "HTTP/1.0",
190                "status" => 200,
191                "size" => 2326,
192            }),
193            tdef: TypeDef::object(inner_kind()).fallible(),
194        }
195
196        log_line_valid_empty {
197            args: func_args![value: "- - - - - - -"],
198            want: Ok(BTreeMap::new()),
199            tdef: TypeDef::object(inner_kind()).fallible(),
200        }
201
202        log_line_valid_empty_variant {
203            args: func_args![value: r#"- - - [-] "-" - -"#],
204            want: Ok(BTreeMap::new()),
205            tdef: TypeDef::object(inner_kind()).fallible(),
206        }
207
208        log_line_valid_with_timestamp_format {
209            args: func_args![value: r#"- - - [2000-10-10T20:55:36Z] "-" - -"#,
210                             timestamp_format: "%+",
211            ],
212            want: Ok(btreemap! {
213                "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2000-10-10T20:55:36Z").unwrap().into()),
214            }),
215            tdef: TypeDef::object(inner_kind()).fallible(),
216        }
217
218        log_line_invalid {
219            args: func_args![value: "not a common log line"],
220            want: Err("failed parsing common log line"),
221            tdef: TypeDef::object(inner_kind()).fallible(),
222        }
223
224        log_line_invalid_timestamp {
225            args: func_args![value: "- - - [1234] - - -"],
226            want: Err("failed parsing timestamp 1234 using format %d/%b/%Y:%T %z: input contains invalid characters"),
227            tdef: TypeDef::object(inner_kind()).fallible(),
228        }
229    ];
230}