vrl/stdlib/
parse_tokens.rs

1use crate::compiler::prelude::*;
2use crate::core::tokenize;
3
4fn parse_tokens(value: &Value) -> Resolved {
5    let string = value.try_bytes_utf8_lossy()?;
6    let tokens: Value = tokenize::parse(&string)
7        .into_iter()
8        .map(|token| match token {
9            "" | "-" => Value::Null,
10            _ => token.to_owned().into(),
11        })
12        .collect::<Vec<_>>()
13        .into();
14    Ok(tokens)
15}
16
17#[derive(Clone, Copy, Debug)]
18pub struct ParseTokens;
19
20impl Function for ParseTokens {
21    fn identifier(&self) -> &'static str {
22        "parse_tokens"
23    }
24
25    fn usage(&self) -> &'static str {
26        indoc! {r#"
27            Parses the `value` in token format. A token is considered to be one of the following:
28
29            * A word surrounded by whitespace.
30            * Text delimited by double quotes: `".."`. Quotes can be included in the token if they are escaped by a backslash (`\`).
31            * Text delimited by square brackets: `[..]`. Closing square brackets can be included in the token if they are escaped by a backslash (`\`).
32        "#}
33    }
34
35    fn category(&self) -> &'static str {
36        Category::Parse.as_ref()
37    }
38
39    fn internal_failure_reasons(&self) -> &'static [&'static str] {
40        &["`value` is not a properly formatted tokenized string."]
41    }
42
43    fn return_kind(&self) -> u16 {
44        kind::ARRAY
45    }
46
47    fn notices(&self) -> &'static [&'static str] {
48        &[indoc! {"
49            All token values are returned as strings. We recommend manually coercing values to
50            desired types as you see fit.
51        "}]
52    }
53
54    fn examples(&self) -> &'static [Example] {
55        &[example! {
56            title: "Parse tokens",
57            source: r#"parse_tokens(s'A sentence "with \"a\" sentence inside" and [some brackets]')"#,
58            result: Ok(
59                r#"["A", "sentence", "with \\\"a\\\" sentence inside", "and", "some brackets"]"#,
60            ),
61        }]
62    }
63
64    fn compile(
65        &self,
66        _state: &state::TypeState,
67        _ctx: &mut FunctionCompileContext,
68        arguments: ArgumentList,
69    ) -> Compiled {
70        let value = arguments.required("value");
71
72        Ok(ParseTokensFn { value }.as_expr())
73    }
74
75    fn parameters(&self) -> &'static [Parameter] {
76        const PARAMETERS: &[Parameter] = &[Parameter::required(
77            "value",
78            kind::BYTES,
79            "The string to tokenize.",
80        )];
81        PARAMETERS
82    }
83}
84
85#[derive(Debug, Clone)]
86struct ParseTokensFn {
87    value: Box<dyn Expression>,
88}
89
90impl FunctionExpression for ParseTokensFn {
91    fn resolve(&self, ctx: &mut Context) -> Resolved {
92        let value = self.value.resolve(ctx)?;
93        parse_tokens(&value)
94    }
95
96    fn type_def(&self, _: &state::TypeState) -> TypeDef {
97        TypeDef::array(Collection::from_unknown(Kind::bytes()))
98    }
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104
105    test_function![
106        parse_tokens => ParseTokens;
107
108        parses {
109            args: func_args![value: "217.250.207.207 - - [07/Sep/2020:16:38:00 -0400] \"DELETE /deliverables/next-generation/user-centric HTTP/1.1\" 205 11881"],
110            want: Ok(vec![
111                            "217.250.207.207".into(),
112                            Value::Null,
113                            Value::Null,
114                            "07/Sep/2020:16:38:00 -0400".into(),
115                            "DELETE /deliverables/next-generation/user-centric HTTP/1.1".into(),
116                            "205".into(),
117                            "11881".into(),
118
119                    ]),
120            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
121        }
122    ];
123}