vector/enrichment_tables/
geoip.rs

1//! Handles enrichment tables for `type = geoip`.
2//! Enrichment data is loaded from one of the MaxMind GeoIP databases,
3//! [MaxMind GeoIP2][maxmind] or [GeoLite2 binary city database][geolite].
4//!
5//! [maxmind]: https://dev.maxmind.com/geoip/geoip2/downloadable
6//! [geolite]: https://dev.maxmind.com/geoip/geoip2/geolite2/#Download_Access
7use std::{collections::BTreeMap, fs, net::IpAddr, path::PathBuf, sync::Arc, time::SystemTime};
8
9use maxminddb::{
10    Reader,
11    geoip2::{AnonymousIp, City, ConnectionType, Isp},
12};
13use ordered_float::NotNan;
14use vector_lib::{
15    configurable::configurable_component,
16    enrichment::{Case, Condition, IndexHandle, Table},
17};
18use vrl::value::{ObjectMap, Value};
19
20use crate::config::{EnrichmentTableConfig, GenerateConfig};
21
22// MaxMind GeoIP database files have a type field we can use to recognize specific
23// products. If it is an unknown type, an error will be returned.
24#[derive(Copy, Clone, Debug)]
25#[allow(missing_docs)]
26pub enum DatabaseKind {
27    Asn,
28    Isp,
29    ConnectionType,
30    City,
31    AnonymousIp,
32}
33
34impl TryFrom<&str> for DatabaseKind {
35    type Error = ();
36
37    fn try_from(value: &str) -> Result<Self, Self::Error> {
38        match value {
39            "GeoLite2-ASN" => Ok(Self::Asn),
40            "GeoIP2-ISP" => Ok(Self::Isp),
41            "GeoIP2-Connection-Type" => Ok(Self::ConnectionType),
42            "GeoIP2-City" | "GeoLite2-City" => Ok(Self::City),
43            "GeoIP2-Anonymous-IP" => Ok(Self::AnonymousIp),
44            _ => Err(()),
45        }
46    }
47}
48
49/// Configuration for the `geoip` enrichment table.
50#[derive(Clone, Debug, Eq, PartialEq)]
51#[configurable_component(enrichment_table("geoip"))]
52pub struct GeoipConfig {
53    /// Path to the [MaxMind GeoIP2][geoip2] or [GeoLite2 binary city database file][geolite2]
54    /// (**GeoLite2-City.mmdb**).
55    ///
56    /// Other databases, such as the country database, are not supported.
57    /// `mmdb` enrichment table can be used for other databases.
58    ///
59    /// [geoip2]: https://dev.maxmind.com/geoip/geoip2/downloadable
60    /// [geolite2]: https://dev.maxmind.com/geoip/geoip2/geolite2/#Download_Access
61    pub path: PathBuf,
62
63    /// The locale to use when querying the database.
64    ///
65    /// MaxMind includes localized versions of some of the fields within their database, such as
66    /// country name. This setting can control which of those localized versions are returned by the
67    /// transform.
68    ///
69    /// More information on which portions of the geolocation data are localized, and what languages
70    /// are available, can be found [here][locale_docs].
71    ///
72    /// [locale_docs]: https://support.maxmind.com/hc/en-us/articles/4414877149467-IP-Geolocation-Data#h_01FRRGRYTGZB29ERDBZCX3MR8Q
73    #[serde(default = "default_locale")]
74    pub locale: String,
75}
76
77fn default_locale() -> String {
78    // Valid locales at the time of writing are: "de”, "en", “es”, “fr”, “ja”, “pt-BR”, “ru”, and
79    // “zh-CN”.
80    //
81    // More information, including the up-to-date list of locales, can be found at
82    // https://dev.maxmind.com/geoip/docs/databases/city-and-country?lang=en.
83
84    // TODO: could we detect the system locale and use that as the default locale if it matches one
85    // of the available locales in the dataset, and then fallback to "en" otherwise?
86    "en".to_string()
87}
88
89impl GenerateConfig for GeoipConfig {
90    fn generate_config() -> toml::Value {
91        toml::Value::try_from(Self {
92            path: "/path/to/GeoLite2-City.mmdb".into(),
93            locale: default_locale(),
94        })
95        .unwrap()
96    }
97}
98
99impl EnrichmentTableConfig for GeoipConfig {
100    async fn build(
101        &self,
102        _: &crate::config::GlobalOptions,
103    ) -> crate::Result<Box<dyn Table + Send + Sync>> {
104        Ok(Box::new(Geoip::new(self.clone())?))
105    }
106}
107
108#[derive(Clone)]
109/// A struct that implements [vector_lib::enrichment::Table] to handle loading enrichment data from a GeoIP database.
110pub struct Geoip {
111    config: GeoipConfig,
112    dbreader: Arc<maxminddb::Reader<Vec<u8>>>,
113    dbkind: DatabaseKind,
114    last_modified: SystemTime,
115}
116
117impl Geoip {
118    /// Creates a new GeoIP struct from the provided config.
119    pub fn new(config: GeoipConfig) -> crate::Result<Self> {
120        let dbreader = Arc::new(Reader::open_readfile(&config.path)?);
121        let dbkind =
122            DatabaseKind::try_from(dbreader.metadata.database_type.as_str()).map_err(|_| {
123                format!(
124                    "Unsupported MMDB database type ({}). Use `mmdb` enrichment table instead.",
125                    dbreader.metadata.database_type
126                )
127            })?;
128
129        // Check if we can read database with dummy Ip.
130        let ip = IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED);
131        let result = match dbkind {
132            DatabaseKind::Asn | DatabaseKind::Isp => dbreader.lookup::<Isp>(ip).map(|_| ()),
133            DatabaseKind::ConnectionType => dbreader.lookup::<ConnectionType>(ip).map(|_| ()),
134            DatabaseKind::City => dbreader.lookup::<City>(ip).map(|_| ()),
135            DatabaseKind::AnonymousIp => dbreader.lookup::<AnonymousIp>(ip).map(|_| ()),
136        };
137
138        match result {
139            Ok(_) => Ok(Geoip {
140                last_modified: fs::metadata(&config.path)?.modified()?,
141                dbreader,
142                dbkind,
143                config,
144            }),
145            Err(error) => Err(error.into()),
146        }
147    }
148
149    fn lookup(&self, ip: IpAddr, select: Option<&[String]>) -> Option<ObjectMap> {
150        let mut map = ObjectMap::new();
151        let mut add_field = |key: &str, value: Option<Value>| {
152            if select
153                .map(|fields| fields.iter().any(|field| field == key))
154                .unwrap_or(true)
155            {
156                map.insert(key.into(), value.unwrap_or(Value::Null));
157            }
158        };
159
160        macro_rules! add_field {
161            ($k:expr_2021, $v:expr_2021) => {
162                add_field($k, $v.map(Into::into))
163            };
164        }
165
166        match self.dbkind {
167            DatabaseKind::Asn | DatabaseKind::Isp => {
168                let data = self.dbreader.lookup::<Isp>(ip).ok()??;
169
170                add_field!("autonomous_system_number", data.autonomous_system_number);
171                add_field!(
172                    "autonomous_system_organization",
173                    data.autonomous_system_organization
174                );
175                add_field!("isp", data.isp);
176                add_field!("organization", data.organization);
177            }
178            DatabaseKind::City => {
179                let data = self.dbreader.lookup::<City>(ip).ok()??;
180
181                add_field!(
182                    "city_name",
183                    self.take_translation(data.city.as_ref().and_then(|c| c.names.as_ref()))
184                );
185
186                add_field!("continent_code", data.continent.and_then(|c| c.code));
187
188                let country = data.country.as_ref();
189                add_field!("country_code", country.and_then(|country| country.iso_code));
190                add_field!(
191                    "country_name",
192                    self.take_translation(country.and_then(|c| c.names.as_ref()))
193                );
194
195                let location = data.location.as_ref();
196                add_field!("timezone", location.and_then(|location| location.time_zone));
197                add_field!(
198                    "latitude",
199                    location
200                        .and_then(|location| location.latitude)
201                        .map(|latitude| Value::Float(
202                            NotNan::new(latitude).expect("latitude cannot be Nan")
203                        ))
204                );
205                add_field!(
206                    "longitude",
207                    location
208                        .and_then(|location| location.longitude)
209                        .map(|longitude| NotNan::new(longitude).expect("longitude cannot be Nan"))
210                );
211                add_field!(
212                    "metro_code",
213                    location.and_then(|location| location.metro_code)
214                );
215
216                // last subdivision is most specific per https://github.com/maxmind/GeoIP2-java/blob/39385c6ce645374039450f57208b886cf87ade47/src/main/java/com/maxmind/geoip2/model/AbstractCityResponse.java#L96-L107
217                let subdivision = data.subdivisions.as_ref().and_then(|s| s.last());
218                add_field!(
219                    "region_name",
220                    self.take_translation(subdivision.and_then(|s| s.names.as_ref()))
221                );
222                add_field!(
223                    "region_code",
224                    subdivision.and_then(|subdivision| subdivision.iso_code)
225                );
226                add_field!("postal_code", data.postal.and_then(|p| p.code));
227            }
228            DatabaseKind::ConnectionType => {
229                let data = self.dbreader.lookup::<ConnectionType>(ip).ok()??;
230
231                add_field!("connection_type", data.connection_type);
232            }
233            DatabaseKind::AnonymousIp => {
234                let data = self.dbreader.lookup::<AnonymousIp>(ip).ok()??;
235
236                add_field!("is_anonymous", data.is_anonymous);
237                add_field!("is_anonymous_vpn", data.is_anonymous_vpn);
238                add_field!("is_hosting_provider", data.is_hosting_provider);
239                add_field!("is_public_proxy", data.is_public_proxy);
240                add_field!("is_residential_proxy", data.is_residential_proxy);
241                add_field!("is_tor_exit_node", data.is_tor_exit_node);
242            }
243        }
244
245        Some(map)
246    }
247
248    fn take_translation<'a>(
249        &self,
250        translations: Option<&BTreeMap<&str, &'a str>>,
251    ) -> Option<&'a str> {
252        translations
253            .and_then(|translations| translations.get(&*self.config.locale))
254            .copied()
255    }
256}
257
258impl Table for Geoip {
259    /// Search the enrichment table data with the given condition.
260    /// All conditions must match (AND).
261    ///
262    /// # Errors
263    /// Errors if no rows, or more than 1 row is found.
264    fn find_table_row<'a>(
265        &self,
266        case: Case,
267        condition: &'a [Condition<'a>],
268        select: Option<&[String]>,
269        wildcard: Option<&Value>,
270        index: Option<IndexHandle>,
271    ) -> Result<ObjectMap, String> {
272        let mut rows = self.find_table_rows(case, condition, select, wildcard, index)?;
273
274        match rows.pop() {
275            Some(row) if rows.is_empty() => Ok(row),
276            Some(_) => Err("More than 1 row found".to_string()),
277            None => Err("IP not found".to_string()),
278        }
279    }
280
281    /// Search the enrichment table data with the given condition.
282    /// All conditions must match (AND).
283    /// Can return multiple matched records
284    fn find_table_rows<'a>(
285        &self,
286        _: Case,
287        condition: &'a [Condition<'a>],
288        select: Option<&[String]>,
289        _wildcard: Option<&Value>,
290        _: Option<IndexHandle>,
291    ) -> Result<Vec<ObjectMap>, String> {
292        match condition.first() {
293            Some(_) if condition.len() > 1 => Err("Only one condition is allowed".to_string()),
294            Some(Condition::Equals { value, .. }) => {
295                let ip = value
296                    .to_string_lossy()
297                    .parse::<IpAddr>()
298                    .map_err(|_| "Invalid IP address".to_string())?;
299                Ok(self
300                    .lookup(ip, select)
301                    .map(|values| vec![values])
302                    .unwrap_or_default())
303            }
304            Some(_) => Err("Only equality condition is allowed".to_string()),
305            None => Err("IP condition must be specified".to_string()),
306        }
307    }
308
309    /// Hints to the enrichment table what data is going to be searched to allow it to index the
310    /// data in advance.
311    ///
312    /// # Errors
313    /// Errors if the fields are not in the table.
314    fn add_index(&mut self, _: Case, fields: &[&str]) -> Result<IndexHandle, String> {
315        match fields.len() {
316            0 => Err("IP field is required".to_string()),
317            1 => Ok(IndexHandle(0)),
318            _ => Err("Only one field is allowed".to_string()),
319        }
320    }
321
322    /// Returns a list of the field names that are in each index
323    fn index_fields(&self) -> Vec<(Case, Vec<String>)> {
324        Vec::new()
325    }
326
327    /// Returns true if the underlying data has changed and the table needs reloading.
328    fn needs_reload(&self) -> bool {
329        matches!(fs::metadata(&self.config.path)
330            .and_then(|metadata| metadata.modified()),
331            Ok(modified) if modified > self.last_modified)
332    }
333}
334
335impl std::fmt::Debug for Geoip {
336    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
337        write!(
338            f,
339            "Geoip {} database {})",
340            self.config.locale,
341            self.config.path.display()
342        )
343    }
344}
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349
350    #[test]
351    fn city_lookup() {
352        let values = find("2.125.160.216", "tests/data/GeoIP2-City-Test.mmdb").unwrap();
353
354        let mut expected = ObjectMap::new();
355        expected.insert("city_name".into(), "Boxford".into());
356        expected.insert("country_code".into(), "GB".into());
357        expected.insert("continent_code".into(), "EU".into());
358        expected.insert("country_name".into(), "United Kingdom".into());
359        expected.insert("region_code".into(), "WBK".into());
360        expected.insert("region_name".into(), "West Berkshire".into());
361        expected.insert("timezone".into(), "Europe/London".into());
362        expected.insert("latitude".into(), Value::from(51.75));
363        expected.insert("longitude".into(), Value::from(-1.25));
364        expected.insert("postal_code".into(), "OX1".into());
365        expected.insert("metro_code".into(), Value::Null);
366
367        assert_eq!(values, expected);
368    }
369
370    #[test]
371    fn city_partial_lookup() {
372        let values = find_select(
373            "2.125.160.216",
374            "tests/data/GeoIP2-City-Test.mmdb",
375            Some(&["latitude".to_string(), "longitude".to_string()]),
376        )
377        .unwrap();
378
379        let mut expected = ObjectMap::new();
380        expected.insert("latitude".into(), Value::from(51.75));
381        expected.insert("longitude".into(), Value::from(-1.25));
382
383        assert_eq!(values, expected);
384    }
385
386    #[test]
387    fn city_lookup_partial_results() {
388        let values = find("67.43.156.9", "tests/data/GeoIP2-City-Test.mmdb").unwrap();
389
390        let mut expected = ObjectMap::new();
391        expected.insert("city_name".into(), Value::Null);
392        expected.insert("country_code".into(), "BT".into());
393        expected.insert("country_name".into(), "Bhutan".into());
394        expected.insert("continent_code".into(), "AS".into());
395        expected.insert("region_code".into(), Value::Null);
396        expected.insert("region_name".into(), Value::Null);
397        expected.insert("timezone".into(), "Asia/Thimphu".into());
398        expected.insert("latitude".into(), Value::from(27.5));
399        expected.insert("longitude".into(), Value::from(90.5));
400        expected.insert("postal_code".into(), Value::Null);
401        expected.insert("metro_code".into(), Value::Null);
402
403        assert_eq!(values, expected);
404    }
405
406    #[test]
407    fn city_lookup_no_results() {
408        let values = find("10.1.12.1", "tests/data/GeoIP2-City-Test.mmdb");
409
410        assert!(values.is_none());
411    }
412
413    #[test]
414    fn isp_lookup() {
415        let values = find("208.192.1.2", "tests/data/GeoIP2-ISP-Test.mmdb").unwrap();
416
417        let mut expected = ObjectMap::new();
418        expected.insert("autonomous_system_number".into(), 701i64.into());
419        expected.insert(
420            "autonomous_system_organization".into(),
421            "MCI Communications Services, Inc. d/b/a Verizon Business".into(),
422        );
423        expected.insert("isp".into(), "Verizon Business".into());
424        expected.insert("organization".into(), "Verizon Business".into());
425
426        assert_eq!(values, expected);
427    }
428
429    #[test]
430    fn isp_lookup_partial_results() {
431        let values = find("2600:7000::1", "tests/data/GeoLite2-ASN-Test.mmdb").unwrap();
432
433        let mut expected = ObjectMap::new();
434        expected.insert("autonomous_system_number".into(), 6939i64.into());
435        expected.insert(
436            "autonomous_system_organization".into(),
437            "Hurricane Electric, Inc.".into(),
438        );
439        expected.insert("isp".into(), Value::Null);
440        expected.insert("organization".into(), Value::Null);
441
442        assert_eq!(values, expected);
443    }
444
445    #[test]
446    fn isp_lookup_no_results() {
447        let values = find("10.1.12.1", "tests/data/GeoLite2-ASN-Test.mmdb");
448
449        assert!(values.is_none());
450    }
451
452    #[test]
453    fn connection_type_lookup_success() {
454        let values = find(
455            "201.243.200.1",
456            "tests/data/GeoIP2-Connection-Type-Test.mmdb",
457        )
458        .unwrap();
459
460        let mut expected = ObjectMap::new();
461        expected.insert("connection_type".into(), "Corporate".into());
462
463        assert_eq!(values, expected);
464    }
465
466    #[test]
467    fn connection_type_lookup_missing() {
468        let values = find("10.1.12.1", "tests/data/GeoIP2-Connection-Type-Test.mmdb");
469
470        assert!(values.is_none());
471    }
472
473    #[test]
474    fn custom_mmdb_type_error() {
475        let result = Geoip::new(GeoipConfig {
476            path: "tests/data/custom-type.mmdb".into(),
477            locale: default_locale(),
478        });
479
480        assert!(result.is_err());
481    }
482    #[test]
483    fn anonymous_ip_lookup() {
484        let values = find("101.99.92.179", "tests/data/GeoIP2-Anonymous-IP-Test.mmdb").unwrap();
485
486        let mut expected = ObjectMap::new();
487        expected.insert("is_anonymous".into(), true.into());
488        expected.insert("is_anonymous_vpn".into(), true.into());
489        expected.insert("is_hosting_provider".into(), true.into());
490        expected.insert("is_tor_exit_node".into(), true.into());
491        expected.insert("is_public_proxy".into(), Value::Null);
492        expected.insert("is_residential_proxy".into(), Value::Null);
493
494        assert_eq!(values, expected);
495    }
496
497    #[test]
498    fn anonymous_ip_lookup_no_results() {
499        let values = find("10.1.12.1", "tests/data/GeoIP2-Anonymous-IP-Test.mmdb");
500
501        assert!(values.is_none());
502    }
503
504    fn find(ip: &str, database: &str) -> Option<ObjectMap> {
505        find_select(ip, database, None)
506    }
507
508    fn find_select(ip: &str, database: &str, select: Option<&[String]>) -> Option<ObjectMap> {
509        Geoip::new(GeoipConfig {
510            path: database.into(),
511            locale: default_locale(),
512        })
513        .unwrap()
514        .find_table_rows(
515            Case::Insensitive,
516            &[Condition::Equals {
517                field: "ip",
518                value: ip.into(),
519            }],
520            select,
521            None,
522            None,
523        )
524        .unwrap()
525        .pop()
526    }
527}