enrichment/
lib.rs

1#![deny(warnings)]
2
3pub mod find_enrichment_table_records;
4pub mod get_enrichment_table_record;
5pub mod tables;
6
7#[cfg(test)]
8mod test_util;
9mod vrl_util;
10
11use dyn_clone::DynClone;
12use indoc::indoc;
13use snafu::Snafu;
14pub use tables::{TableRegistry, TableSearch};
15use vrl::{
16    compiler::Function,
17    value::{ObjectMap, Value},
18};
19
20#[derive(Copy, Clone, Debug, PartialEq, Eq)]
21pub struct IndexHandle(pub usize);
22
23#[derive(Clone, Debug, PartialEq, Eq)]
24pub enum Condition<'a> {
25    /// Condition exactly matches the field value.
26    Equals { field: &'a str, value: Value },
27    /// The date in the field is between from and to (inclusive).
28    BetweenDates {
29        field: &'a str,
30        from: chrono::DateTime<chrono::Utc>,
31        to: chrono::DateTime<chrono::Utc>,
32    },
33    /// The date in the field is greater than or equal to `from`.
34    FromDate {
35        field: &'a str,
36        from: chrono::DateTime<chrono::Utc>,
37    },
38    /// The date in the field is less than or equal to `to`.
39    ToDate {
40        field: &'a str,
41        to: chrono::DateTime<chrono::Utc>,
42    },
43}
44
45#[derive(Clone, Copy, Debug, PartialEq, Eq)]
46pub enum Case {
47    Sensitive,
48    Insensitive,
49}
50
51#[derive(Clone, Debug, PartialEq, Eq, Snafu)]
52#[snafu(visibility(pub(crate)))]
53pub enum Error {
54    #[snafu(display("No rows found"))]
55    NoRowsFound,
56    #[snafu(display("More than one row found"))]
57    MoreThanOneRowFound,
58    #[snafu(display("Field(s) '{}' missing from dataset", fields.join(", ")))]
59    MissingDatasetFields { fields: Vec<String> },
60    #[snafu(display("Column contains invalid UTF-8: {source}"))]
61    InvalidUtfInColumn { source: std::str::Utf8Error },
62    #[snafu(display("Failed to encode value: {details}"))]
63    FailedToEncodeValue { details: String },
64    #[snafu(display("Only one condition is allowed"))]
65    OnlyOneConditionAllowed,
66    #[snafu(display("Only equality condition is allowed"))]
67    OnlyEqualityConditionAllowed,
68    #[snafu(display("{kind} condition must be specified"))]
69    MissingCondition { kind: &'static str },
70    #[snafu(display("{field} field is required"))]
71    MissingRequiredField { field: &'static str },
72    #[snafu(display("Only one field is allowed"))]
73    OnlyOneFieldAllowed,
74    #[snafu(display("Invalid address: {source}"))]
75    InvalidAddress { source: std::net::AddrParseError },
76    #[snafu(transparent)]
77    Internal { source: InternalError },
78    #[snafu(display("Table {table} not loaded"))]
79    TableNotLoaded { table: String },
80}
81
82#[derive(Clone, Debug, PartialEq, Eq, Snafu)]
83pub enum InternalError {
84    #[snafu(display("finish_load called prematurely"))]
85    FinishLoadCalled,
86    #[snafu(display("finish_load not called"))]
87    FinishLoadNotCalled,
88    // Unreachable in normal operation: we only decode values that were serialized by us.
89    #[snafu(display("Failed to decode value from memory table: {details}"))]
90    FailedToDecode { details: String },
91}
92
93impl From<Error> for vrl::prelude::ExpressionError {
94    fn from(error: Error) -> Self {
95        vrl::prelude::ExpressionError::Error {
96            message: error.to_string(),
97            labels: vec![],
98            notes: vec![],
99        }
100    }
101}
102
103/// Enrichment tables represent additional data sources that can be used to enrich the event data
104/// passing through Vector.
105pub trait Table: DynClone {
106    /// Search the enrichment table data with the given condition.
107    /// All conditions must match (AND).
108    ///
109    /// # Errors
110    /// Errors if no rows, or more than 1 row is found.
111    fn find_table_row<'a>(
112        &self,
113        case: Case,
114        condition: &'a [Condition<'a>],
115        select: Option<&[String]>,
116        wildcard: Option<&Value>,
117        index: Option<IndexHandle>,
118    ) -> Result<ObjectMap, Error>;
119
120    /// Search the enrichment table data with the given condition.
121    /// All conditions must match (AND).
122    /// Can return multiple matched records
123    fn find_table_rows<'a>(
124        &self,
125        case: Case,
126        condition: &'a [Condition<'a>],
127        select: Option<&[String]>,
128        wildcard: Option<&Value>,
129        index: Option<IndexHandle>,
130    ) -> Result<Vec<ObjectMap>, Error>;
131
132    /// Hints to the enrichment table what data is going to be searched to allow it to index the
133    /// data in advance.
134    ///
135    /// # Errors
136    /// Errors if the fields are not in the table.
137    fn add_index(&mut self, case: Case, fields: &[&str]) -> Result<IndexHandle, Error>;
138
139    /// Returns a list of the field names that are in each index
140    fn index_fields(&self) -> Vec<(Case, Vec<String>)>;
141
142    /// Returns true if the underlying data has changed and the table needs reloading.
143    fn needs_reload(&self) -> bool;
144}
145
146dyn_clone::clone_trait_object!(Table);
147
148pub fn vrl_functions() -> Vec<Box<dyn Function>> {
149    vec![
150        Box::new(get_enrichment_table_record::GetEnrichmentTableRecord) as _,
151        Box::new(find_enrichment_table_records::FindEnrichmentTableRecords) as _,
152    ]
153}
154
155pub(crate) const ENRICHMENT_TABLE_EXPLAINER: &str = indoc! {r#"
156    For `file` enrichment tables, this condition needs to be a VRL object in which
157    the key-value pairs indicate a field to search mapped to a value to search in that field.
158    This function returns the rows that match the provided condition(s). _All_ fields need to
159    match for rows to be returned; if any fields do not match, then no rows are returned.
160
161    There are three forms of search criteria:
162
163    1. **Exact match search**. The given field must match the value exactly. Case sensitivity
164       can be specified using the `case_sensitive` argument. An exact match search can use an
165       index directly into the dataset, which should make this search fairly "cheap" from a
166       performance perspective.
167
168    2. **Wildcard match search**. The given fields specified by the exact match search may also
169        be matched exactly to the value provided to the `wildcard` parameter.
170        A wildcard match search can also use an index directly into the dataset.
171
172    3. **Date range search**. The given field must be greater than or equal to the `from` date
173       and/or less than or equal to the `to` date. A date range search involves
174       sequentially scanning through the rows that have been located using any exact match
175       criteria. This can be an expensive operation if there are many rows returned by any exact
176       match criteria. Therefore, use date ranges as the _only_ criteria when the enrichment
177       data set is very small.
178
179    For `geoip` and `mmdb` enrichment tables, this condition needs to be a VRL object with a single key-value pair
180    whose value needs to be a valid IP address. Example: `{"ip": .ip }`. If a return field is expected
181    and without a value, `null` is used. This table can return the following fields:
182
183    * ISP databases:
184        * `autonomous_system_number`
185        * `autonomous_system_organization`
186        * `isp`
187        * `organization`
188
189    * City databases:
190        * `city_name`
191        * `continent_code`
192        * `country_code`
193        * `country_name`
194        * `region_code`
195        * `region_name`
196        * `metro_code`
197        * `latitude`
198        * `longitude`
199        * `postal_code`
200        * `timezone`
201
202    * Connection-Type databases:
203        * `connection_type`
204
205    To use this function, you need to update your configuration to
206    include an
207    [`enrichment_tables`](/docs/reference/configuration/global-options/#enrichment_tables)
208    parameter.
209"#};