vrl/datadog/search/
node.rs

1use regex::Regex;
2use serde::{Deserialize, Deserializer, Serialize, Serializer};
3use std::sync::LazyLock;
4
5use super::grammar::{DEFAULT_FIELD, unescape};
6
7/// This enum represents value comparisons that Queries might perform
8#[derive(Clone, Copy, Debug, Eq, PartialEq)]
9pub enum Comparison {
10    /// Greater than.
11    Gt,
12    /// Less than.
13    Lt,
14    /// Greater-or-equal-to.
15    Gte,
16    /// Less-or-equal-to.
17    Lte,
18}
19
20impl Comparison {
21    /// Returns a string representing this comparison in Lucene query formatting.
22    pub fn as_lucene(&self) -> String {
23        match self {
24            Comparison::Gt => String::from(">"),
25            Comparison::Lt => String::from("<"),
26            Comparison::Gte => String::from(">="),
27            Comparison::Lte => String::from("<="),
28        }
29    }
30}
31
32/// This enum represents the values we might be using in a comparison, whether
33/// they are Strings, Numbers (currently only floating point numbers) or an
34/// Unbounded comparison with no terminating value.
35#[derive(Clone, Debug, PartialEq)]
36pub enum ComparisonValue {
37    Unbounded,
38    String(String),
39    Integer(i64),
40    Float(f64),
41}
42
43impl std::fmt::Display for ComparisonValue {
44    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45        match self {
46            Self::String(s) => write!(f, "{s}"),
47            Self::Integer(num) => write!(f, "{num}"),
48            Self::Float(num) => write!(f, "{num}"),
49            Self::Unbounded => write!(f, "*"),
50        }
51    }
52}
53
54impl ComparisonValue {
55    /// Returns a string representing this value in Lucene query formatting
56    pub fn to_lucene(&self) -> String {
57        match self {
58            Self::String(s) => QueryNode::lucene_escape(s),
59            Self::Integer(num) => num.to_string(),
60            Self::Float(num) => num.to_string(),
61            Self::Unbounded => "*".to_string(),
62        }
63    }
64}
65
66impl<T: AsRef<str>> From<T> for ComparisonValue {
67    fn from(s: T) -> Self {
68        let v = escape_quotes(unescape(s.as_ref()));
69
70        if v == "*" {
71            ComparisonValue::Unbounded
72        } else if let Ok(v) = v.parse::<i64>() {
73            ComparisonValue::Integer(v)
74        } else if let Ok(v) = v.parse::<f64>() {
75            ComparisonValue::Float(v)
76        } else {
77            ComparisonValue::String(v)
78        }
79    }
80}
81
82/// This enum represents the tokens in a range, including "greater than (or equal to)"
83/// for the left bracket, "less than (or equal to) in the right bracket, and range values.
84#[derive(Clone, Debug, PartialEq)]
85pub enum Range {
86    Comparison(Comparison),
87    Value(ComparisonValue),
88}
89
90/// This enum represents the AND or OR Boolean operations we might perform on QueryNodes.
91#[derive(Clone, Copy, Debug, Eq, PartialEq)]
92pub enum BooleanType {
93    And,
94    Or,
95}
96
97/// QueryNodes represent specific search criteria to be enforced.
98#[derive(Clone, Debug, PartialEq)]
99pub enum QueryNode {
100    /// Match all documents.
101    MatchAllDocs,
102    /// Match no documents.
103    MatchNoDocs,
104    /// Validate existence of an attribute within a document.
105    AttributeExists { attr: String },
106    /// Validate lack of an attribute within a document.
107    AttributeMissing { attr: String },
108    /// Match an attribute against a specific range of values.
109    AttributeRange {
110        attr: String,
111        lower: ComparisonValue,
112        lower_inclusive: bool,
113        upper: ComparisonValue,
114        upper_inclusive: bool,
115    },
116    /// Compare an attribute against a single value (greater/less than operations).
117    AttributeComparison {
118        attr: String,
119        comparator: Comparison,
120        value: ComparisonValue,
121    },
122    /// Search for an attribute that matches a specific term.
123    AttributeTerm { attr: String, value: String },
124    /// Search for an attribute that matches a specific quoted phrase.
125    QuotedAttribute { attr: String, phrase: String },
126    /// Search for an attribute whose value matches against a specific prefix.
127    AttributePrefix { attr: String, prefix: String },
128    /// Search for an attribute that matches a wildcard or glob string.
129    AttributeWildcard { attr: String, wildcard: String },
130    /// Container node denoting negation of the QueryNode within.
131    NegatedNode { node: Box<QueryNode> },
132    /// Container node for compound Boolean operations.
133    Boolean {
134        oper: BooleanType,
135        nodes: Vec<QueryNode>,
136    },
137}
138
139impl QueryNode {
140    /// Returns a string representing this node in Lucene query formatting.
141    pub fn to_lucene(&self) -> String {
142        // TODO:  I'm using push_string here and there are more efficient string building methods if we care about performance here (we won't)
143        match self {
144            QueryNode::MatchAllDocs => String::from("*:*"),
145            QueryNode::MatchNoDocs => String::from("-*:*"),
146            QueryNode::AttributeExists { attr } => format!("_exists_:{attr}"),
147            QueryNode::AttributeMissing { attr } => format!("_missing_:{attr}"),
148            QueryNode::AttributeRange {
149                attr,
150                lower,
151                lower_inclusive,
152                upper,
153                upper_inclusive,
154            } => {
155                let lower_bracket = if *lower_inclusive { "[" } else { "{" };
156                let upper_bracket = if *upper_inclusive { "]" } else { "}" };
157                Self::is_default_attr(attr)
158                    + &format!(
159                        "{}{} TO {}{}",
160                        lower_bracket,
161                        lower.to_lucene(),
162                        upper.to_lucene(),
163                        upper_bracket
164                    )
165            }
166            QueryNode::AttributeComparison {
167                attr,
168                comparator,
169                value,
170            } => {
171                Self::is_default_attr(attr)
172                    + &format!("{}{}", comparator.as_lucene(), value.to_lucene())
173            }
174            QueryNode::AttributeTerm { attr, value } => {
175                Self::is_default_attr(attr) + &Self::lucene_escape(value)
176            }
177            QueryNode::QuotedAttribute { attr, phrase } => {
178                Self::is_default_attr(attr) + &format!("\"{}\"", &Self::quoted_escape(phrase))
179            }
180            QueryNode::AttributePrefix { attr, prefix } => {
181                Self::is_default_attr(attr) + &format!("{}*", &Self::lucene_escape(prefix))
182            }
183            QueryNode::AttributeWildcard { attr, wildcard } => {
184                Self::is_default_attr(attr) + wildcard
185            }
186            QueryNode::NegatedNode { node } => {
187                if matches!(
188                    **node,
189                    QueryNode::NegatedNode { .. } | QueryNode::Boolean { .. }
190                ) {
191                    format!("NOT ({})", node.to_lucene())
192                } else {
193                    format!("NOT {}", node.to_lucene())
194                }
195            }
196            QueryNode::Boolean {
197                oper: BooleanType::And,
198                nodes,
199                ..
200            } => {
201                if nodes.is_empty() {
202                    return String::from("*:*");
203                }
204                let mut output = String::new();
205                for n in nodes {
206                    if !output.is_empty() {
207                        // Put in ' AND ' if this isn't the first node we wrote
208                        output.push_str(" AND ");
209                    }
210                    if let QueryNode::NegatedNode { node } = n {
211                        output.push_str("NOT ");
212                        let qstr = if let QueryNode::Boolean { .. } = **node {
213                            format!("({})", node.to_lucene())
214                        } else {
215                            node.to_lucene()
216                        };
217                        output.push_str(&qstr);
218                    } else {
219                        let qstr = if let QueryNode::Boolean { .. } = n {
220                            format!("({})", n.to_lucene())
221                        } else {
222                            n.to_lucene()
223                        };
224                        output.push_str(&qstr);
225                    }
226                }
227                output
228            }
229            QueryNode::Boolean {
230                oper: BooleanType::Or,
231                nodes,
232                ..
233            } => {
234                if nodes.is_empty() {
235                    return String::from("-*:*");
236                }
237                let mut output = String::new();
238                for n in nodes {
239                    if !output.is_empty() {
240                        output.push_str(" OR ");
241                    }
242                    let qstr = if let QueryNode::Boolean { .. } = n {
243                        format!("({})", n.to_lucene())
244                    } else {
245                        n.to_lucene()
246                    };
247                    output.push_str(&qstr);
248                }
249                output
250            }
251        }
252    }
253
254    pub fn lucene_escape(input: &str) -> String {
255        let mut output = String::with_capacity(input.len());
256        for c in input.chars() {
257            // : + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ /
258            if matches!(
259                c,
260                ':' | '+'
261                    | '-'
262                    | '='
263                    | '>'
264                    | '<'
265                    | '!'
266                    | '('
267                    | ')'
268                    | '{'
269                    | '}'
270                    | '['
271                    | ']'
272                    | '^'
273                    | '"'
274                    | '~'
275                    | '*'
276                    | '?'
277                    | '\\'
278                    | '/'
279            ) {
280                output.push('\\');
281            }
282            // TODO:  We're not catching '&&' and '||' but....does anyone do this?
283            output.push(c);
284        }
285        output
286    }
287
288    fn quoted_escape(input: &str) -> String {
289        let mut output = String::with_capacity(input.len());
290        for c in input.chars() {
291            if matches!(c, '"' | '\\') {
292                output.push('\\');
293            }
294            // TODO:  We're not catching '&&' and '||' but....does anyone do this?
295            output.push(c);
296        }
297        output
298    }
299
300    fn is_default_attr(attr: &str) -> String {
301        if attr == DEFAULT_FIELD {
302            String::new()
303        } else {
304            format!("{attr}:")
305        }
306    }
307
308    /// Group a list of nodes into a single node, using the given conjunction.
309    /// If the group has only one node, return a clone of that node.
310    pub fn new_boolean(conjunction: BooleanType, nodes: Vec<QueryNode>) -> QueryNode {
311        if nodes.len() == 1 {
312            return nodes.into_iter().next().expect("Known to have length 1");
313        }
314
315        QueryNode::Boolean {
316            oper: conjunction,
317            nodes,
318        }
319    }
320}
321
322impl<'de> Deserialize<'de> for QueryNode {
323    fn deserialize<D>(deserializer: D) -> Result<QueryNode, D::Error>
324    where
325        D: Deserializer<'de>,
326    {
327        use serde::de::Error;
328
329        let s = String::deserialize(deserializer)?;
330
331        s.parse::<QueryNode>()
332            .map_err(|e| D::Error::custom(e.to_string()))
333    }
334}
335
336impl Serialize for QueryNode {
337    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
338    where
339        S: Serializer,
340    {
341        serializer.serialize_str(self.to_lucene().as_str())
342    }
343}
344
345static ESCAPE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new("^\"(.+)\"$").unwrap());
346
347/// Escapes surrounding `"` quotes when distinguishing between quoted terms isn't needed.
348fn escape_quotes<T: AsRef<str>>(value: T) -> String {
349    ESCAPE_RE.replace_all(value.as_ref(), "$1").to_string()
350}
351
352#[cfg(test)]
353mod tests {
354    use super::*;
355
356    #[test]
357    fn query_node_serializes_to_string() {
358        assert_eq!(
359            serde_json::to_string(&QueryNode::AttributeExists {
360                attr: "something".into()
361            })
362            .unwrap(),
363            r#""_exists_:something""#
364        );
365    }
366
367    #[test]
368    fn query_node_deserializes_from_string() {
369        assert_eq!(
370            serde_json::from_str::<QueryNode>(r#""_missing_:something_else""#).unwrap(),
371            QueryNode::AttributeMissing {
372                attr: "something_else".into()
373            }
374        );
375    }
376}