vrl/datadog/search/
grammar.rs

1#![allow(clippy::upper_case_acronyms)]
2use itertools::Itertools;
3use pest::iterators::Pair;
4use pest_derive::Parser;
5
6use crate::datadog_search_syntax::BooleanType;
7
8use super::node::{Comparison, ComparisonValue, QueryNode, Range};
9
10#[derive(Debug, Parser)]
11#[grammar = "src/datadog/search/grammar.pest"]
12pub struct EventPlatformQuery;
13
14pub const DEFAULT_FIELD: &str = "_default_";
15const EXISTS_FIELD: &str = "_exists_";
16const MISSING_FIELD: &str = "_missing_";
17
18/// The QueryVisitor is responsible for going through the output of our
19/// parser and consuming the various tokens produced, digesting them and
20/// converting them into QueryNodes.  As per the name, we're doing this
21/// via a Visitor pattern and walking our way through the syntax tree.
22pub struct QueryVisitor;
23
24impl QueryVisitor {
25    pub fn visit_queryroot(token: Pair<Rule>, default_field: &str) -> QueryNode {
26        let contents = token.into_inner().next().unwrap();
27        match contents.as_rule() {
28            Rule::query => Self::visit_query(contents, default_field),
29            // A queryroot will only ever contain a query
30            _ => unreachable!(),
31        }
32    }
33
34    fn visit_query(token: Pair<Rule>, default_field: &str) -> QueryNode {
35        let contents = token.into_inner();
36        let mut is_not: bool = false;
37
38        //  AND takes precedence over OR.
39        // We will combine each consecutive clause in an AND group,
40        // and create a new and_group every time we encounter an OR.
41        // Finally, we will combine all the and_groups with OR.
42
43        let mut and_groups: Vec<QueryNode> = Vec::new();
44
45        let mut and_group: Vec<QueryNode> = Vec::new();
46
47        for node in contents {
48            let query_node: Option<QueryNode> = match node.as_rule() {
49                Rule::multiterm => Some(Self::visit_multiterm(node, default_field)),
50                Rule::conjunction => {
51                    let inner = node.into_inner().next().unwrap();
52                    match inner.as_rule() {
53                        Rule::AND => (),
54                        Rule::OR => {
55                            // close the current and_group and create a new one
56                            and_groups.push(QueryNode::new_boolean(BooleanType::And, and_group));
57                            and_group = Vec::new();
58                        }
59                        _ => unreachable!(),
60                    };
61                    None
62                }
63                Rule::modifiers => {
64                    let inner = node.into_inner().next().unwrap();
65                    match inner.as_rule() {
66                        Rule::PLUS => (),
67                        Rule::NOT => {
68                            is_not = true;
69                        }
70                        _ => unreachable!(),
71                    };
72                    None
73                }
74                Rule::clause => Some(Self::visit_clause(node, default_field)),
75                _ => unreachable!(),
76            };
77            // If we found a clause to add to our list, add it
78            if let Some(mut n) = query_node {
79                if is_not {
80                    is_not = false;
81
82                    n = QueryNode::NegatedNode { node: Box::new(n) }
83                }
84
85                and_group.push(n);
86            }
87        }
88
89        and_groups.push(QueryNode::new_boolean(BooleanType::And, and_group));
90        let query_node = QueryNode::new_boolean(BooleanType::Or, and_groups);
91
92        if let QueryNode::NegatedNode { node } = query_node {
93            // if the node is a negated MatchAllDocs, return MatchNoDocs
94            if let QueryNode::MatchAllDocs = *node {
95                return QueryNode::MatchNoDocs;
96            }
97            return QueryNode::NegatedNode { node };
98        }
99
100        query_node
101    }
102
103    fn visit_multiterm(token: Pair<Rule>, default_field: &str) -> QueryNode {
104        let contents = token.into_inner();
105        let mut terms: Vec<String> = Vec::new();
106        for node in contents {
107            match node.as_rule() {
108                // Can probably get a bit more suave with string allocation here but meh.
109                Rule::TERM => terms.push(Self::visit_term(node)),
110                _ => unreachable!(),
111            }
112        }
113        QueryNode::AttributeTerm {
114            attr: String::from(default_field),
115            value: terms.join(" "),
116        }
117    }
118
119    fn visit_clause(clause: Pair<Rule>, default_field: &str) -> QueryNode {
120        let mut field: Option<&str> = None;
121        for item in clause.into_inner() {
122            // As per the parser, a clause will only ever contain:
123            // matchall, field, value, query.
124            match item.as_rule() {
125                Rule::matchall => return QueryNode::MatchAllDocs,
126                Rule::field => {
127                    field = Some(Self::visit_field(item));
128                }
129                Rule::value => {
130                    // As per the parser, value can only ever be one of:
131                    // STAR, PHRASE, TERM, TERM_PREFIX, TERM_GLOB, range, comparison.
132                    let value_contents = item.into_inner().next().unwrap();
133                    match ((field.unwrap_or(default_field)), value_contents.as_rule()) {
134                        (EXISTS_FIELD, Rule::TERM) => {
135                            return QueryNode::AttributeExists {
136                                attr: Self::visit_term(value_contents),
137                            };
138                        }
139                        (EXISTS_FIELD, Rule::PHRASE) => {
140                            return QueryNode::AttributeExists {
141                                attr: Self::visit_phrase(value_contents),
142                            };
143                        }
144                        (MISSING_FIELD, Rule::TERM) => {
145                            return QueryNode::AttributeMissing {
146                                attr: Self::visit_term(value_contents),
147                            };
148                        }
149                        (MISSING_FIELD, Rule::PHRASE) => {
150                            return QueryNode::AttributeMissing {
151                                attr: Self::visit_phrase(value_contents),
152                            };
153                        }
154                        (DEFAULT_FIELD, Rule::STAR) => return QueryNode::MatchAllDocs,
155                        (f, Rule::STAR) => {
156                            return QueryNode::AttributeWildcard {
157                                attr: unescape(f),
158                                wildcard: String::from("*"),
159                            };
160                        }
161                        (f, Rule::TERM) => {
162                            return QueryNode::AttributeTerm {
163                                attr: unescape(f),
164                                value: Self::visit_term(value_contents),
165                            };
166                        }
167                        (f, Rule::PHRASE) => {
168                            return QueryNode::QuotedAttribute {
169                                attr: unescape(f),
170                                phrase: Self::visit_phrase(value_contents),
171                            };
172                        }
173                        (f, Rule::TERM_PREFIX) => {
174                            return QueryNode::AttributePrefix {
175                                attr: unescape(f),
176                                prefix: Self::visit_prefix(value_contents),
177                            };
178                        }
179                        (f, Rule::TERM_GLOB) => {
180                            return QueryNode::AttributeWildcard {
181                                attr: unescape(f),
182                                wildcard: Self::visit_wildcard(value_contents),
183                            };
184                        }
185                        (f, Rule::range) => {
186                            let range_values = value_contents.into_inner();
187
188                            // There should always be 4; brackets + 2 range values.
189                            let (lower_inclusive, lower, upper, upper_inclusive) =
190                                match range_values
191                                    .map(Self::visit_range_value)
192                                    .collect_tuple()
193                                    .expect("should be exactly 4 range values")
194                                {
195                                    (
196                                        Range::Comparison(lc),
197                                        Range::Value(lv),
198                                        Range::Value(rv),
199                                        Range::Comparison(rc),
200                                    ) => match (lc, rc) {
201                                        (Comparison::Gte, Comparison::Lte) => (true, lv, rv, true),
202                                        (Comparison::Gt, Comparison::Lt) => (false, lv, rv, false),
203                                        _ => panic!("invalid range comparison"),
204                                    },
205                                    _ => panic!("invalid range value"),
206                                };
207
208                            return QueryNode::AttributeRange {
209                                attr: unescape(f),
210                                lower,
211                                lower_inclusive,
212                                upper,
213                                upper_inclusive,
214                            };
215                        }
216                        (f, Rule::comparison) => {
217                            let mut compiter = value_contents.into_inner();
218                            let comparator = Self::visit_operator(
219                                compiter.next().unwrap().into_inner().next().unwrap(),
220                            );
221                            let comparison_value = compiter.next().unwrap();
222                            let value = match comparison_value.as_rule() {
223                                Rule::TERM => {
224                                    ComparisonValue::String(Self::visit_term(comparison_value))
225                                }
226                                Rule::PHRASE => {
227                                    ComparisonValue::String(Self::visit_phrase(comparison_value))
228                                }
229                                Rule::NUMERIC_TERM => comparison_value.as_str().into(),
230                                _ => unreachable!(),
231                            };
232                            return QueryNode::AttributeComparison {
233                                attr: unescape(f),
234                                comparator,
235                                value,
236                            };
237                        }
238                        // We've covered all the cases, so this should never happen
239                        _ => unreachable!(),
240                    }
241                }
242                Rule::query => return Self::visit_query(item, field.unwrap_or(default_field)),
243                // We've covered all the cases, so this should never happen
244                _ => unreachable!(),
245            }
246        }
247        QueryNode::MatchAllDocs
248    }
249
250    fn visit_operator(token: Pair<Rule>) -> Comparison {
251        match token.as_rule() {
252            Rule::GT => Comparison::Gt,
253            Rule::GT_EQ => Comparison::Gte,
254            Rule::LT => Comparison::Lt,
255            Rule::LT_EQ => Comparison::Lte,
256            Rule::LBRACKET => Comparison::Gt,
257            Rule::RBRACKET => Comparison::Lt,
258            _ => unreachable!(),
259        }
260    }
261
262    fn visit_range_value(token: Pair<Rule>) -> Range {
263        match token.as_rule() {
264            Rule::RANGE_VALUE => Range::Value(token.as_str().into()),
265            Rule::LBRACKET => Range::Comparison(Comparison::Gt),
266            Rule::LSQRBRACKET => Range::Comparison(Comparison::Gte),
267            Rule::RBRACKET => Range::Comparison(Comparison::Lt),
268            Rule::RSQRBRACKET => Range::Comparison(Comparison::Lte),
269            _ => unreachable!(),
270        }
271    }
272
273    fn visit_term(token: Pair<Rule>) -> String {
274        unescape(token.as_str())
275    }
276
277    fn visit_prefix(token: Pair<Rule>) -> String {
278        let prefix_string = token.as_str();
279        unescape(&prefix_string[..prefix_string.len() - 1])
280    }
281
282    fn visit_wildcard(token: Pair<Rule>) -> String {
283        unescape(token.as_str())
284    }
285
286    fn visit_phrase(token: Pair<Rule>) -> String {
287        let quoted_string = token.as_str();
288        unescape(&quoted_string[1..quoted_string.len() - 1])
289    }
290
291    fn visit_field(token: Pair<'_, Rule>) -> &str {
292        let inner = token.into_inner().next().unwrap();
293        if let Rule::TERM = inner.as_rule() {
294            return inner.as_str();
295        }
296        "BROKEN"
297    }
298}
299
300/// Remove escaped characters from a string, returning a newly allocated
301/// unescaped string.  At this point we do NOT distinguish between chars
302/// that REQUIRE escaping and those that don't, so we'll preserve anything
303/// with a '\' before it
304pub fn unescape(input: &str) -> String {
305    // Unescaping will only ever make a string shorter so let's go...
306    let mut output = String::with_capacity(input.len());
307    let mut escape_sequence = false;
308    for c in input.chars() {
309        if escape_sequence {
310            output.push(c);
311            escape_sequence = false;
312        } else if c == '\\' {
313            escape_sequence = true;
314        } else {
315            output.push(c)
316        }
317    }
318    // TODO:  Check for unterminated escape sequence and signal a problem
319    output
320}