1use std::{fs, net::IpAddr, path::PathBuf, sync::Arc, time::SystemTime};
8
9use maxminddb::{
10 Reader,
11 geoip2::{AnonymousIp, City, ConnectionType, Isp, Names},
12};
13use ordered_float::NotNan;
14use serde::Deserialize;
15use vector_lib::{
16 configurable::configurable_component,
17 enrichment::{Case, Condition, IndexHandle, Table},
18};
19use vrl::value::{ObjectMap, Value};
20
21use crate::config::{EnrichmentTableConfig, GenerateConfig};
22
23#[derive(Copy, Clone, Debug)]
26#[allow(missing_docs)]
27pub enum DatabaseKind {
28 Asn,
29 Isp,
30 ConnectionType,
31 City,
32 AnonymousIp,
33}
34
35impl TryFrom<&str> for DatabaseKind {
36 type Error = ();
37
38 fn try_from(value: &str) -> Result<Self, Self::Error> {
39 match value {
40 "GeoLite2-ASN" => Ok(Self::Asn),
41 "GeoIP2-ISP" => Ok(Self::Isp),
42 "GeoIP2-Connection-Type" => Ok(Self::ConnectionType),
43 "GeoIP2-City" | "GeoLite2-City" => Ok(Self::City),
44 "GeoIP2-Anonymous-IP" => Ok(Self::AnonymousIp),
45 _ => Err(()),
46 }
47 }
48}
49
50#[derive(Clone, Debug, Eq, PartialEq)]
52#[configurable_component(enrichment_table("geoip"))]
53pub struct GeoipConfig {
54 pub path: PathBuf,
63
64 #[serde(default = "default_locale")]
75 pub locale: String,
76}
77
78fn default_locale() -> String {
79 "en".to_string()
88}
89
90impl GenerateConfig for GeoipConfig {
91 fn generate_config() -> toml::Value {
92 toml::Value::try_from(Self {
93 path: "/path/to/GeoLite2-City.mmdb".into(),
94 locale: default_locale(),
95 })
96 .unwrap()
97 }
98}
99
100impl EnrichmentTableConfig for GeoipConfig {
101 async fn build(
102 &self,
103 _: &crate::config::GlobalOptions,
104 ) -> crate::Result<Box<dyn Table + Send + Sync>> {
105 Ok(Box::new(Geoip::new(self.clone())?))
106 }
107}
108
109#[derive(Clone)]
110pub struct Geoip {
112 config: GeoipConfig,
113 dbreader: Arc<maxminddb::Reader<Vec<u8>>>,
114 dbkind: DatabaseKind,
115 last_modified: SystemTime,
116}
117
118fn lookup_value<'de, A: Deserialize<'de>>(
119 dbreader: &'de Reader<Vec<u8>>,
120 address: IpAddr,
121) -> crate::Result<Option<A>> {
122 let result = dbreader.lookup(address)?;
123 Ok(result.decode::<A>()?)
124}
125
126impl Geoip {
127 pub fn new(config: GeoipConfig) -> crate::Result<Self> {
129 let dbreader = Arc::new(Reader::open_readfile(&config.path)?);
130 let dbkind =
131 DatabaseKind::try_from(dbreader.metadata.database_type.as_str()).map_err(|_| {
132 format!(
133 "Unsupported MMDB database type ({}). Use `mmdb` enrichment table instead.",
134 dbreader.metadata.database_type
135 )
136 })?;
137
138 let ip = IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED);
140 match dbkind {
141 DatabaseKind::Asn | DatabaseKind::Isp => lookup_value::<Isp>(&dbreader, ip).map(|_| ()),
143 DatabaseKind::ConnectionType => {
144 lookup_value::<ConnectionType>(&dbreader, ip).map(|_| ())
145 }
146 DatabaseKind::City => lookup_value::<City>(&dbreader, ip).map(|_| ()),
147 DatabaseKind::AnonymousIp => lookup_value::<AnonymousIp>(&dbreader, ip).map(|_| ()),
148 }?;
149
150 Ok(Geoip {
151 last_modified: fs::metadata(&config.path)?.modified()?,
152 dbreader,
153 dbkind,
154 config,
155 })
156 }
157
158 fn lookup(&self, ip: IpAddr, select: Option<&[String]>) -> Option<ObjectMap> {
159 let mut map = ObjectMap::new();
160 let mut add_field = |key: &str, value: Option<Value>| {
161 if select
162 .map(|fields| fields.iter().any(|field| field == key))
163 .unwrap_or(true)
164 {
165 map.insert(key.into(), value.unwrap_or(Value::Null));
166 }
167 };
168
169 macro_rules! add_field {
170 ($k:expr_2021, $v:expr_2021) => {
171 add_field($k, $v.map(Into::into))
172 };
173 }
174
175 match self.dbkind {
176 DatabaseKind::Asn | DatabaseKind::Isp => {
177 let data = lookup_value::<Isp>(&self.dbreader, ip).ok()??;
178
179 add_field!("autonomous_system_number", data.autonomous_system_number);
180 add_field!(
181 "autonomous_system_organization",
182 data.autonomous_system_organization
183 );
184 add_field!("isp", data.isp);
185 add_field!("organization", data.organization);
186 }
187 DatabaseKind::City => {
188 let data: City = lookup_value::<City>(&self.dbreader, ip).ok()??;
189
190 add_field!("city_name", self.take_translation(&data.city.names));
191
192 add_field!("continent_code", data.continent.code);
193
194 let country = data.country;
195 add_field!("country_code", country.iso_code);
196 add_field!("country_name", self.take_translation(&country.names));
197
198 let location = data.location;
199 add_field!("timezone", location.time_zone);
200 add_field!(
201 "latitude",
202 location.latitude.map(|latitude| Value::Float(
203 NotNan::new(latitude).expect("latitude cannot be Nan")
204 ))
205 );
206 add_field!(
207 "longitude",
208 location
209 .longitude
210 .map(|longitude| NotNan::new(longitude).expect("longitude cannot be Nan"))
211 );
212 add_field!("metro_code", location.metro_code);
213
214 let subdivision = data.subdivisions.last();
216 add_field!(
217 "region_name",
218 subdivision.map(|s| self.take_translation(&s.names))
219 );
220
221 add_field!(
222 "region_code",
223 subdivision.and_then(|subdivision| subdivision.iso_code)
224 );
225 add_field!("postal_code", data.postal.code);
226 }
227 DatabaseKind::ConnectionType => {
228 let data = lookup_value::<ConnectionType>(&self.dbreader, ip).ok()??;
229
230 add_field!("connection_type", data.connection_type);
231 }
232 DatabaseKind::AnonymousIp => {
233 let data = lookup_value::<AnonymousIp>(&self.dbreader, ip).ok()??;
234
235 add_field!("is_anonymous", data.is_anonymous);
236 add_field!("is_anonymous_vpn", data.is_anonymous_vpn);
237 add_field!("is_hosting_provider", data.is_hosting_provider);
238 add_field!("is_public_proxy", data.is_public_proxy);
239 add_field!("is_residential_proxy", data.is_residential_proxy);
240 add_field!("is_tor_exit_node", data.is_tor_exit_node);
241 }
242 }
243
244 Some(map)
245 }
246
247 fn take_translation<'a>(&self, translations: &'a Names<'a>) -> Option<&'a str> {
248 match self.config.locale.as_ref() {
249 "en" => translations.english,
250 "de" => translations.german,
251 "es" => translations.spanish,
252 "fr" => translations.french,
253 "ja" => translations.japanese,
254 "pt-BR" => translations.brazilian_portuguese,
255 "ru" => translations.russian,
256 "zh-CN" => translations.simplified_chinese,
257 _ => None,
258 }
259 }
260}
261
262impl Table for Geoip {
263 fn find_table_row<'a>(
269 &self,
270 case: Case,
271 condition: &'a [Condition<'a>],
272 select: Option<&[String]>,
273 wildcard: Option<&Value>,
274 index: Option<IndexHandle>,
275 ) -> Result<ObjectMap, String> {
276 let mut rows = self.find_table_rows(case, condition, select, wildcard, index)?;
277
278 match rows.pop() {
279 Some(row) if rows.is_empty() => Ok(row),
280 Some(_) => Err("More than 1 row found".to_string()),
281 None => Err("IP not found".to_string()),
282 }
283 }
284
285 fn find_table_rows<'a>(
289 &self,
290 _: Case,
291 condition: &'a [Condition<'a>],
292 select: Option<&[String]>,
293 _wildcard: Option<&Value>,
294 _: Option<IndexHandle>,
295 ) -> Result<Vec<ObjectMap>, String> {
296 match condition.first() {
297 Some(_) if condition.len() > 1 => Err("Only one condition is allowed".to_string()),
298 Some(Condition::Equals { value, .. }) => {
299 let ip = value
300 .to_string_lossy()
301 .parse::<IpAddr>()
302 .map_err(|_| "Invalid IP address".to_string())?;
303 Ok(self
304 .lookup(ip, select)
305 .map(|values| vec![values])
306 .unwrap_or_default())
307 }
308 Some(_) => Err("Only equality condition is allowed".to_string()),
309 None => Err("IP condition must be specified".to_string()),
310 }
311 }
312
313 fn add_index(&mut self, _: Case, fields: &[&str]) -> Result<IndexHandle, String> {
319 match fields.len() {
320 0 => Err("IP field is required".to_string()),
321 1 => Ok(IndexHandle(0)),
322 _ => Err("Only one field is allowed".to_string()),
323 }
324 }
325
326 fn index_fields(&self) -> Vec<(Case, Vec<String>)> {
328 Vec::new()
329 }
330
331 fn needs_reload(&self) -> bool {
333 matches!(fs::metadata(&self.config.path)
334 .and_then(|metadata| metadata.modified()),
335 Ok(modified) if modified > self.last_modified)
336 }
337}
338
339impl std::fmt::Debug for Geoip {
340 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
341 write!(
342 f,
343 "Geoip {} database {})",
344 self.config.locale,
345 self.config.path.display()
346 )
347 }
348}
349
350#[cfg(test)]
351mod tests {
352 use super::*;
353
354 #[test]
355 fn city_lookup() {
356 let values = find("2.125.160.216", "tests/data/GeoIP2-City-Test.mmdb").unwrap();
357
358 let mut expected = ObjectMap::new();
359 expected.insert("city_name".into(), "Boxford".into());
360 expected.insert("country_code".into(), "GB".into());
361 expected.insert("continent_code".into(), "EU".into());
362 expected.insert("country_name".into(), "United Kingdom".into());
363 expected.insert("region_code".into(), "WBK".into());
364 expected.insert("region_name".into(), "West Berkshire".into());
365 expected.insert("timezone".into(), "Europe/London".into());
366 expected.insert("latitude".into(), Value::from(51.75));
367 expected.insert("longitude".into(), Value::from(-1.25));
368 expected.insert("postal_code".into(), "OX1".into());
369 expected.insert("metro_code".into(), Value::Null);
370
371 assert_eq!(values, expected);
372 }
373
374 #[test]
375 fn city_partial_lookup() {
376 let values = find_select(
377 "2.125.160.216",
378 "tests/data/GeoIP2-City-Test.mmdb",
379 Some(&["latitude".to_string(), "longitude".to_string()]),
380 )
381 .unwrap();
382
383 let mut expected = ObjectMap::new();
384 expected.insert("latitude".into(), Value::from(51.75));
385 expected.insert("longitude".into(), Value::from(-1.25));
386
387 assert_eq!(values, expected);
388 }
389
390 #[test]
391 fn city_lookup_partial_results() {
392 let values = find("67.43.156.9", "tests/data/GeoIP2-City-Test.mmdb").unwrap();
393
394 let mut expected = ObjectMap::new();
395 expected.insert("city_name".into(), Value::Null);
396 expected.insert("country_code".into(), "BT".into());
397 expected.insert("country_name".into(), "Bhutan".into());
398 expected.insert("continent_code".into(), "AS".into());
399 expected.insert("region_code".into(), Value::Null);
400 expected.insert("region_name".into(), Value::Null);
401 expected.insert("timezone".into(), "Asia/Thimphu".into());
402 expected.insert("latitude".into(), Value::from(27.5));
403 expected.insert("longitude".into(), Value::from(90.5));
404 expected.insert("postal_code".into(), Value::Null);
405 expected.insert("metro_code".into(), Value::Null);
406
407 assert_eq!(values, expected);
408 }
409
410 #[test]
411 fn city_lookup_no_results() {
412 let values = find("10.1.12.1", "tests/data/GeoIP2-City-Test.mmdb");
413
414 assert!(values.is_none());
415 }
416
417 #[test]
418 fn isp_lookup() {
419 let values = find("208.192.1.2", "tests/data/GeoIP2-ISP-Test.mmdb").unwrap();
420
421 let mut expected = ObjectMap::new();
422 expected.insert("autonomous_system_number".into(), 701i64.into());
423 expected.insert(
424 "autonomous_system_organization".into(),
425 "MCI Communications Services, Inc. d/b/a Verizon Business".into(),
426 );
427 expected.insert("isp".into(), "Verizon Business".into());
428 expected.insert("organization".into(), "Verizon Business".into());
429
430 assert_eq!(values, expected);
431 }
432
433 #[test]
434 fn isp_lookup_partial_results() {
435 let values = find("2600:7000::1", "tests/data/GeoLite2-ASN-Test.mmdb").unwrap();
436
437 let mut expected = ObjectMap::new();
438 expected.insert("autonomous_system_number".into(), 6939i64.into());
439 expected.insert(
440 "autonomous_system_organization".into(),
441 "Hurricane Electric, Inc.".into(),
442 );
443 expected.insert("isp".into(), Value::Null);
444 expected.insert("organization".into(), Value::Null);
445
446 assert_eq!(values, expected);
447 }
448
449 #[test]
450 fn isp_lookup_no_results() {
451 let values = find("10.1.12.1", "tests/data/GeoLite2-ASN-Test.mmdb");
452
453 assert!(values.is_none());
454 }
455
456 #[test]
457 fn connection_type_lookup_success() {
458 let values = find(
459 "201.243.200.1",
460 "tests/data/GeoIP2-Connection-Type-Test.mmdb",
461 )
462 .unwrap();
463
464 let mut expected = ObjectMap::new();
465 expected.insert("connection_type".into(), "Corporate".into());
466
467 assert_eq!(values, expected);
468 }
469
470 #[test]
471 fn connection_type_lookup_missing() {
472 let values = find("10.1.12.1", "tests/data/GeoIP2-Connection-Type-Test.mmdb");
473
474 assert!(values.is_none());
475 }
476
477 #[test]
478 fn custom_mmdb_type_error() {
479 let result = Geoip::new(GeoipConfig {
480 path: "tests/data/custom-type.mmdb".into(),
481 locale: default_locale(),
482 });
483
484 assert!(result.is_err());
485 }
486 #[test]
487 fn anonymous_ip_lookup() {
488 let values = find("101.99.92.179", "tests/data/GeoIP2-Anonymous-IP-Test.mmdb").unwrap();
489
490 let mut expected = ObjectMap::new();
491 expected.insert("is_anonymous".into(), true.into());
492 expected.insert("is_anonymous_vpn".into(), true.into());
493 expected.insert("is_hosting_provider".into(), true.into());
494 expected.insert("is_tor_exit_node".into(), true.into());
495 expected.insert("is_public_proxy".into(), Value::Null);
496 expected.insert("is_residential_proxy".into(), Value::Null);
497
498 assert_eq!(values, expected);
499 }
500
501 #[test]
502 fn anonymous_ip_lookup_no_results() {
503 let values = find("10.1.12.1", "tests/data/GeoIP2-Anonymous-IP-Test.mmdb");
504
505 assert!(values.is_none());
506 }
507
508 fn find(ip: &str, database: &str) -> Option<ObjectMap> {
509 find_select(ip, database, None)
510 }
511
512 fn find_select(ip: &str, database: &str, select: Option<&[String]>) -> Option<ObjectMap> {
513 Geoip::new(GeoipConfig {
514 path: database.into(),
515 locale: default_locale(),
516 })
517 .unwrap()
518 .find_table_rows(
519 Case::Insensitive,
520 &[Condition::Equals {
521 field: "ip",
522 value: ip.into(),
523 }],
524 select,
525 None,
526 None,
527 )
528 .unwrap()
529 .pop()
530 }
531}