vector/enrichment_tables/
geoip.rs

1//! Handles enrichment tables for `type = geoip`.
2//! Enrichment data is loaded from one of the MaxMind GeoIP databases,
3//! [MaxMind GeoIP2][maxmind] or [GeoLite2 binary city database][geolite].
4//!
5//! [maxmind]: https://dev.maxmind.com/geoip/geoip2/downloadable
6//! [geolite]: https://dev.maxmind.com/geoip/geoip2/geolite2/#Download_Access
7use std::{collections::BTreeMap, fs, net::IpAddr, path::PathBuf, sync::Arc, time::SystemTime};
8
9use maxminddb::{
10    geoip2::{AnonymousIp, City, ConnectionType, Isp},
11    Reader,
12};
13use ordered_float::NotNan;
14use vector_lib::configurable::configurable_component;
15use vector_lib::enrichment::{Case, Condition, IndexHandle, Table};
16use vrl::value::{ObjectMap, Value};
17
18use crate::config::{EnrichmentTableConfig, GenerateConfig};
19
20// MaxMind GeoIP database files have a type field we can use to recognize specific
21// products. If it is an unknown type, an error will be returned.
22#[derive(Copy, Clone, Debug)]
23#[allow(missing_docs)]
24pub enum DatabaseKind {
25    Asn,
26    Isp,
27    ConnectionType,
28    City,
29    AnonymousIp,
30}
31
32impl TryFrom<&str> for DatabaseKind {
33    type Error = ();
34
35    fn try_from(value: &str) -> Result<Self, Self::Error> {
36        match value {
37            "GeoLite2-ASN" => Ok(Self::Asn),
38            "GeoIP2-ISP" => Ok(Self::Isp),
39            "GeoIP2-Connection-Type" => Ok(Self::ConnectionType),
40            "GeoIP2-City" | "GeoLite2-City" => Ok(Self::City),
41            "GeoIP2-Anonymous-IP" => Ok(Self::AnonymousIp),
42            _ => Err(()),
43        }
44    }
45}
46
47/// Configuration for the `geoip` enrichment table.
48#[derive(Clone, Debug, Eq, PartialEq)]
49#[configurable_component(enrichment_table("geoip"))]
50pub struct GeoipConfig {
51    /// Path to the [MaxMind GeoIP2][geoip2] or [GeoLite2 binary city database file][geolite2]
52    /// (**GeoLite2-City.mmdb**).
53    ///
54    /// Other databases, such as the country database, are not supported.
55    /// `mmdb` enrichment table can be used for other databases.
56    ///
57    /// [geoip2]: https://dev.maxmind.com/geoip/geoip2/downloadable
58    /// [geolite2]: https://dev.maxmind.com/geoip/geoip2/geolite2/#Download_Access
59    pub path: PathBuf,
60
61    /// The locale to use when querying the database.
62    ///
63    /// MaxMind includes localized versions of some of the fields within their database, such as
64    /// country name. This setting can control which of those localized versions are returned by the
65    /// transform.
66    ///
67    /// More information on which portions of the geolocation data are localized, and what languages
68    /// are available, can be found [here][locale_docs].
69    ///
70    /// [locale_docs]: https://support.maxmind.com/hc/en-us/articles/4414877149467-IP-Geolocation-Data#h_01FRRGRYTGZB29ERDBZCX3MR8Q
71    #[serde(default = "default_locale")]
72    pub locale: String,
73}
74
75fn default_locale() -> String {
76    // Valid locales at the time of writing are: "de”, "en", “es”, “fr”, “ja”, “pt-BR”, “ru”, and
77    // “zh-CN”.
78    //
79    // More information, including the up-to-date list of locales, can be found at
80    // https://dev.maxmind.com/geoip/docs/databases/city-and-country?lang=en.
81
82    // TODO: could we detect the system locale and use that as the default locale if it matches one
83    // of the available locales in the dataset, and then fallback to "en" otherwise?
84    "en".to_string()
85}
86
87impl GenerateConfig for GeoipConfig {
88    fn generate_config() -> toml::Value {
89        toml::Value::try_from(Self {
90            path: "/path/to/GeoLite2-City.mmdb".into(),
91            locale: default_locale(),
92        })
93        .unwrap()
94    }
95}
96
97impl EnrichmentTableConfig for GeoipConfig {
98    async fn build(
99        &self,
100        _: &crate::config::GlobalOptions,
101    ) -> crate::Result<Box<dyn Table + Send + Sync>> {
102        Ok(Box::new(Geoip::new(self.clone())?))
103    }
104}
105
106#[derive(Clone)]
107/// A struct that implements [vector_lib::enrichment::Table] to handle loading enrichment data from a GeoIP database.
108pub struct Geoip {
109    config: GeoipConfig,
110    dbreader: Arc<maxminddb::Reader<Vec<u8>>>,
111    dbkind: DatabaseKind,
112    last_modified: SystemTime,
113}
114
115impl Geoip {
116    /// Creates a new GeoIP struct from the provided config.
117    pub fn new(config: GeoipConfig) -> crate::Result<Self> {
118        let dbreader = Arc::new(Reader::open_readfile(&config.path)?);
119        let dbkind =
120            DatabaseKind::try_from(dbreader.metadata.database_type.as_str()).map_err(|_| {
121                format!(
122                    "Unsupported MMDB database type ({}). Use `mmdb` enrichment table instead.",
123                    dbreader.metadata.database_type
124                )
125            })?;
126
127        // Check if we can read database with dummy Ip.
128        let ip = IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED);
129        let result = match dbkind {
130            DatabaseKind::Asn | DatabaseKind::Isp => dbreader.lookup::<Isp>(ip).map(|_| ()),
131            DatabaseKind::ConnectionType => dbreader.lookup::<ConnectionType>(ip).map(|_| ()),
132            DatabaseKind::City => dbreader.lookup::<City>(ip).map(|_| ()),
133            DatabaseKind::AnonymousIp => dbreader.lookup::<AnonymousIp>(ip).map(|_| ()),
134        };
135
136        match result {
137            Ok(_) => Ok(Geoip {
138                last_modified: fs::metadata(&config.path)?.modified()?,
139                dbreader,
140                dbkind,
141                config,
142            }),
143            Err(error) => Err(error.into()),
144        }
145    }
146
147    fn lookup(&self, ip: IpAddr, select: Option<&[String]>) -> Option<ObjectMap> {
148        let mut map = ObjectMap::new();
149        let mut add_field = |key: &str, value: Option<Value>| {
150            if select
151                .map(|fields| fields.iter().any(|field| field == key))
152                .unwrap_or(true)
153            {
154                map.insert(key.into(), value.unwrap_or(Value::Null));
155            }
156        };
157
158        macro_rules! add_field {
159            ($k:expr_2021, $v:expr_2021) => {
160                add_field($k, $v.map(Into::into))
161            };
162        }
163
164        match self.dbkind {
165            DatabaseKind::Asn | DatabaseKind::Isp => {
166                let data = self.dbreader.lookup::<Isp>(ip).ok()??;
167
168                add_field!("autonomous_system_number", data.autonomous_system_number);
169                add_field!(
170                    "autonomous_system_organization",
171                    data.autonomous_system_organization
172                );
173                add_field!("isp", data.isp);
174                add_field!("organization", data.organization);
175            }
176            DatabaseKind::City => {
177                let data = self.dbreader.lookup::<City>(ip).ok()??;
178
179                add_field!(
180                    "city_name",
181                    self.take_translation(data.city.as_ref().and_then(|c| c.names.as_ref()))
182                );
183
184                add_field!("continent_code", data.continent.and_then(|c| c.code));
185
186                let country = data.country.as_ref();
187                add_field!("country_code", country.and_then(|country| country.iso_code));
188                add_field!(
189                    "country_name",
190                    self.take_translation(country.and_then(|c| c.names.as_ref()))
191                );
192
193                let location = data.location.as_ref();
194                add_field!("timezone", location.and_then(|location| location.time_zone));
195                add_field!(
196                    "latitude",
197                    location
198                        .and_then(|location| location.latitude)
199                        .map(|latitude| Value::Float(
200                            NotNan::new(latitude).expect("latitude cannot be Nan")
201                        ))
202                );
203                add_field!(
204                    "longitude",
205                    location
206                        .and_then(|location| location.longitude)
207                        .map(|longitude| NotNan::new(longitude).expect("longitude cannot be Nan"))
208                );
209                add_field!(
210                    "metro_code",
211                    location.and_then(|location| location.metro_code)
212                );
213
214                // last subdivision is most specific per https://github.com/maxmind/GeoIP2-java/blob/39385c6ce645374039450f57208b886cf87ade47/src/main/java/com/maxmind/geoip2/model/AbstractCityResponse.java#L96-L107
215                let subdivision = data.subdivisions.as_ref().and_then(|s| s.last());
216                add_field!(
217                    "region_name",
218                    self.take_translation(subdivision.and_then(|s| s.names.as_ref()))
219                );
220                add_field!(
221                    "region_code",
222                    subdivision.and_then(|subdivision| subdivision.iso_code)
223                );
224                add_field!("postal_code", data.postal.and_then(|p| p.code));
225            }
226            DatabaseKind::ConnectionType => {
227                let data = self.dbreader.lookup::<ConnectionType>(ip).ok()??;
228
229                add_field!("connection_type", data.connection_type);
230            }
231            DatabaseKind::AnonymousIp => {
232                let data = self.dbreader.lookup::<AnonymousIp>(ip).ok()??;
233
234                add_field!("is_anonymous", data.is_anonymous);
235                add_field!("is_anonymous_vpn", data.is_anonymous_vpn);
236                add_field!("is_hosting_provider", data.is_hosting_provider);
237                add_field!("is_public_proxy", data.is_public_proxy);
238                add_field!("is_residential_proxy", data.is_residential_proxy);
239                add_field!("is_tor_exit_node", data.is_tor_exit_node);
240            }
241        }
242
243        Some(map)
244    }
245
246    fn take_translation<'a>(
247        &self,
248        translations: Option<&BTreeMap<&str, &'a str>>,
249    ) -> Option<&'a str> {
250        translations
251            .and_then(|translations| translations.get(&*self.config.locale))
252            .copied()
253    }
254}
255
256impl Table for Geoip {
257    /// Search the enrichment table data with the given condition.
258    /// All conditions must match (AND).
259    ///
260    /// # Errors
261    /// Errors if no rows, or more than 1 row is found.
262    fn find_table_row<'a>(
263        &self,
264        case: Case,
265        condition: &'a [Condition<'a>],
266        select: Option<&[String]>,
267        wildcard: Option<&Value>,
268        index: Option<IndexHandle>,
269    ) -> Result<ObjectMap, String> {
270        let mut rows = self.find_table_rows(case, condition, select, wildcard, index)?;
271
272        match rows.pop() {
273            Some(row) if rows.is_empty() => Ok(row),
274            Some(_) => Err("More than 1 row found".to_string()),
275            None => Err("IP not found".to_string()),
276        }
277    }
278
279    /// Search the enrichment table data with the given condition.
280    /// All conditions must match (AND).
281    /// Can return multiple matched records
282    fn find_table_rows<'a>(
283        &self,
284        _: Case,
285        condition: &'a [Condition<'a>],
286        select: Option<&[String]>,
287        _wildcard: Option<&Value>,
288        _: Option<IndexHandle>,
289    ) -> Result<Vec<ObjectMap>, String> {
290        match condition.first() {
291            Some(_) if condition.len() > 1 => Err("Only one condition is allowed".to_string()),
292            Some(Condition::Equals { value, .. }) => {
293                let ip = value
294                    .to_string_lossy()
295                    .parse::<IpAddr>()
296                    .map_err(|_| "Invalid IP address".to_string())?;
297                Ok(self
298                    .lookup(ip, select)
299                    .map(|values| vec![values])
300                    .unwrap_or_default())
301            }
302            Some(_) => Err("Only equality condition is allowed".to_string()),
303            None => Err("IP condition must be specified".to_string()),
304        }
305    }
306
307    /// Hints to the enrichment table what data is going to be searched to allow it to index the
308    /// data in advance.
309    ///
310    /// # Errors
311    /// Errors if the fields are not in the table.
312    fn add_index(&mut self, _: Case, fields: &[&str]) -> Result<IndexHandle, String> {
313        match fields.len() {
314            0 => Err("IP field is required".to_string()),
315            1 => Ok(IndexHandle(0)),
316            _ => Err("Only one field is allowed".to_string()),
317        }
318    }
319
320    /// Returns a list of the field names that are in each index
321    fn index_fields(&self) -> Vec<(Case, Vec<String>)> {
322        Vec::new()
323    }
324
325    /// Returns true if the underlying data has changed and the table needs reloading.
326    fn needs_reload(&self) -> bool {
327        matches!(fs::metadata(&self.config.path)
328            .and_then(|metadata| metadata.modified()),
329            Ok(modified) if modified > self.last_modified)
330    }
331}
332
333impl std::fmt::Debug for Geoip {
334    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
335        write!(
336            f,
337            "Geoip {} database {})",
338            self.config.locale,
339            self.config.path.display()
340        )
341    }
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347
348    #[test]
349    fn city_lookup() {
350        let values = find("2.125.160.216", "tests/data/GeoIP2-City-Test.mmdb").unwrap();
351
352        let mut expected = ObjectMap::new();
353        expected.insert("city_name".into(), "Boxford".into());
354        expected.insert("country_code".into(), "GB".into());
355        expected.insert("continent_code".into(), "EU".into());
356        expected.insert("country_name".into(), "United Kingdom".into());
357        expected.insert("region_code".into(), "WBK".into());
358        expected.insert("region_name".into(), "West Berkshire".into());
359        expected.insert("timezone".into(), "Europe/London".into());
360        expected.insert("latitude".into(), Value::from(51.75));
361        expected.insert("longitude".into(), Value::from(-1.25));
362        expected.insert("postal_code".into(), "OX1".into());
363        expected.insert("metro_code".into(), Value::Null);
364
365        assert_eq!(values, expected);
366    }
367
368    #[test]
369    fn city_partial_lookup() {
370        let values = find_select(
371            "2.125.160.216",
372            "tests/data/GeoIP2-City-Test.mmdb",
373            Some(&["latitude".to_string(), "longitude".to_string()]),
374        )
375        .unwrap();
376
377        let mut expected = ObjectMap::new();
378        expected.insert("latitude".into(), Value::from(51.75));
379        expected.insert("longitude".into(), Value::from(-1.25));
380
381        assert_eq!(values, expected);
382    }
383
384    #[test]
385    fn city_lookup_partial_results() {
386        let values = find("67.43.156.9", "tests/data/GeoIP2-City-Test.mmdb").unwrap();
387
388        let mut expected = ObjectMap::new();
389        expected.insert("city_name".into(), Value::Null);
390        expected.insert("country_code".into(), "BT".into());
391        expected.insert("country_name".into(), "Bhutan".into());
392        expected.insert("continent_code".into(), "AS".into());
393        expected.insert("region_code".into(), Value::Null);
394        expected.insert("region_name".into(), Value::Null);
395        expected.insert("timezone".into(), "Asia/Thimphu".into());
396        expected.insert("latitude".into(), Value::from(27.5));
397        expected.insert("longitude".into(), Value::from(90.5));
398        expected.insert("postal_code".into(), Value::Null);
399        expected.insert("metro_code".into(), Value::Null);
400
401        assert_eq!(values, expected);
402    }
403
404    #[test]
405    fn city_lookup_no_results() {
406        let values = find("10.1.12.1", "tests/data/GeoIP2-City-Test.mmdb");
407
408        assert!(values.is_none());
409    }
410
411    #[test]
412    fn isp_lookup() {
413        let values = find("208.192.1.2", "tests/data/GeoIP2-ISP-Test.mmdb").unwrap();
414
415        let mut expected = ObjectMap::new();
416        expected.insert("autonomous_system_number".into(), 701i64.into());
417        expected.insert(
418            "autonomous_system_organization".into(),
419            "MCI Communications Services, Inc. d/b/a Verizon Business".into(),
420        );
421        expected.insert("isp".into(), "Verizon Business".into());
422        expected.insert("organization".into(), "Verizon Business".into());
423
424        assert_eq!(values, expected);
425    }
426
427    #[test]
428    fn isp_lookup_partial_results() {
429        let values = find("2600:7000::1", "tests/data/GeoLite2-ASN-Test.mmdb").unwrap();
430
431        let mut expected = ObjectMap::new();
432        expected.insert("autonomous_system_number".into(), 6939i64.into());
433        expected.insert(
434            "autonomous_system_organization".into(),
435            "Hurricane Electric, Inc.".into(),
436        );
437        expected.insert("isp".into(), Value::Null);
438        expected.insert("organization".into(), Value::Null);
439
440        assert_eq!(values, expected);
441    }
442
443    #[test]
444    fn isp_lookup_no_results() {
445        let values = find("10.1.12.1", "tests/data/GeoLite2-ASN-Test.mmdb");
446
447        assert!(values.is_none());
448    }
449
450    #[test]
451    fn connection_type_lookup_success() {
452        let values = find(
453            "201.243.200.1",
454            "tests/data/GeoIP2-Connection-Type-Test.mmdb",
455        )
456        .unwrap();
457
458        let mut expected = ObjectMap::new();
459        expected.insert("connection_type".into(), "Corporate".into());
460
461        assert_eq!(values, expected);
462    }
463
464    #[test]
465    fn connection_type_lookup_missing() {
466        let values = find("10.1.12.1", "tests/data/GeoIP2-Connection-Type-Test.mmdb");
467
468        assert!(values.is_none());
469    }
470
471    #[test]
472    fn custom_mmdb_type_error() {
473        let result = Geoip::new(GeoipConfig {
474            path: "tests/data/custom-type.mmdb".into(),
475            locale: default_locale(),
476        });
477
478        assert!(result.is_err());
479    }
480    #[test]
481    fn anonymous_ip_lookup() {
482        let values = find("101.99.92.179", "tests/data/GeoIP2-Anonymous-IP-Test.mmdb").unwrap();
483
484        let mut expected = ObjectMap::new();
485        expected.insert("is_anonymous".into(), true.into());
486        expected.insert("is_anonymous_vpn".into(), true.into());
487        expected.insert("is_hosting_provider".into(), true.into());
488        expected.insert("is_tor_exit_node".into(), true.into());
489        expected.insert("is_public_proxy".into(), Value::Null);
490        expected.insert("is_residential_proxy".into(), Value::Null);
491
492        assert_eq!(values, expected);
493    }
494
495    #[test]
496    fn anonymous_ip_lookup_no_results() {
497        let values = find("10.1.12.1", "tests/data/GeoIP2-Anonymous-IP-Test.mmdb");
498
499        assert!(values.is_none());
500    }
501
502    fn find(ip: &str, database: &str) -> Option<ObjectMap> {
503        find_select(ip, database, None)
504    }
505
506    fn find_select(ip: &str, database: &str, select: Option<&[String]>) -> Option<ObjectMap> {
507        Geoip::new(GeoipConfig {
508            path: database.into(),
509            locale: default_locale(),
510        })
511        .unwrap()
512        .find_table_rows(
513            Case::Insensitive,
514            &[Condition::Equals {
515                field: "ip",
516                value: ip.into(),
517            }],
518            select,
519            None,
520            None,
521        )
522        .unwrap()
523        .pop()
524    }
525}