use std::{collections::BTreeMap, fs, net::IpAddr, sync::Arc, time::SystemTime};
use maxminddb::{
geoip2::{AnonymousIp, City, ConnectionType, Isp},
MaxMindDBError, Reader,
};
use ordered_float::NotNan;
use vector_lib::configurable::configurable_component;
use vector_lib::enrichment::{Case, Condition, IndexHandle, Table};
use vrl::value::{ObjectMap, Value};
use crate::config::{EnrichmentTableConfig, GenerateConfig};
#[derive(Copy, Clone, Debug)]
#[allow(missing_docs)]
pub enum DatabaseKind {
Asn,
Isp,
ConnectionType,
City,
AnonymousIp,
}
impl TryFrom<&str> for DatabaseKind {
type Error = ();
fn try_from(value: &str) -> Result<Self, Self::Error> {
match value {
"GeoLite2-ASN" => Ok(Self::Asn),
"GeoIP2-ISP" => Ok(Self::Isp),
"GeoIP2-Connection-Type" => Ok(Self::ConnectionType),
"GeoIP2-City" | "GeoLite2-City" => Ok(Self::City),
"GeoIP2-Anonymous-IP" => Ok(Self::AnonymousIp),
_ => Err(()),
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
#[configurable_component(enrichment_table("geoip"))]
pub struct GeoipConfig {
pub path: String,
#[serde(default = "default_locale")]
pub locale: String,
}
fn default_locale() -> String {
"en".to_string()
}
impl GenerateConfig for GeoipConfig {
fn generate_config() -> toml::Value {
toml::Value::try_from(Self {
path: "/path/to/GeoLite2-City.mmdb".to_string(),
locale: default_locale(),
})
.unwrap()
}
}
impl EnrichmentTableConfig for GeoipConfig {
async fn build(
&self,
_: &crate::config::GlobalOptions,
) -> crate::Result<Box<dyn Table + Send + Sync>> {
Ok(Box::new(Geoip::new(self.clone())?))
}
}
#[derive(Clone)]
pub struct Geoip {
config: GeoipConfig,
dbreader: Arc<maxminddb::Reader<Vec<u8>>>,
dbkind: DatabaseKind,
last_modified: SystemTime,
}
impl Geoip {
pub fn new(config: GeoipConfig) -> crate::Result<Self> {
let dbreader = Arc::new(Reader::open_readfile(config.path.clone())?);
let dbkind =
DatabaseKind::try_from(dbreader.metadata.database_type.as_str()).map_err(|_| {
format!(
"Unsupported MMDB database type ({}). Use `mmdb` enrichment table instead.",
dbreader.metadata.database_type
)
})?;
let ip = IpAddr::V4(std::net::Ipv4Addr::UNSPECIFIED);
let result = match dbkind {
DatabaseKind::Asn | DatabaseKind::Isp => dbreader.lookup::<Isp>(ip).map(|_| ()),
DatabaseKind::ConnectionType => dbreader.lookup::<ConnectionType>(ip).map(|_| ()),
DatabaseKind::City => dbreader.lookup::<City>(ip).map(|_| ()),
DatabaseKind::AnonymousIp => dbreader.lookup::<AnonymousIp>(ip).map(|_| ()),
};
match result {
Ok(_) | Err(MaxMindDBError::AddressNotFoundError(_)) => Ok(Geoip {
last_modified: fs::metadata(&config.path)?.modified()?,
dbreader,
dbkind,
config,
}),
Err(error) => Err(error.into()),
}
}
fn lookup(&self, ip: IpAddr, select: Option<&[String]>) -> Option<ObjectMap> {
let mut map = ObjectMap::new();
let mut add_field = |key: &str, value: Option<Value>| {
if select
.map(|fields| fields.iter().any(|field| field == key))
.unwrap_or(true)
{
map.insert(key.into(), value.unwrap_or(Value::Null));
}
};
macro_rules! add_field {
($k:expr, $v:expr) => {
add_field($k, $v.map(Into::into))
};
}
match self.dbkind {
DatabaseKind::Asn | DatabaseKind::Isp => {
let data = self.dbreader.lookup::<Isp>(ip).ok()?;
add_field!("autonomous_system_number", data.autonomous_system_number);
add_field!(
"autonomous_system_organization",
data.autonomous_system_organization
);
add_field!("isp", data.isp);
add_field!("organization", data.organization);
}
DatabaseKind::City => {
let data = self.dbreader.lookup::<City>(ip).ok()?;
add_field!(
"city_name",
self.take_translation(data.city.as_ref().and_then(|c| c.names.as_ref()))
);
add_field!("continent_code", data.continent.and_then(|c| c.code));
let country = data.country.as_ref();
add_field!("country_code", country.and_then(|country| country.iso_code));
add_field!(
"country_name",
self.take_translation(country.and_then(|c| c.names.as_ref()))
);
let location = data.location.as_ref();
add_field!("timezone", location.and_then(|location| location.time_zone));
add_field!(
"latitude",
location
.and_then(|location| location.latitude)
.map(|latitude| Value::Float(
NotNan::new(latitude).expect("latitude cannot be Nan")
))
);
add_field!(
"longitude",
location
.and_then(|location| location.longitude)
.map(|longitude| NotNan::new(longitude).expect("longitude cannot be Nan"))
);
add_field!(
"metro_code",
location.and_then(|location| location.metro_code)
);
let subdivision = data.subdivisions.as_ref().and_then(|s| s.last());
add_field!(
"region_name",
self.take_translation(subdivision.and_then(|s| s.names.as_ref()))
);
add_field!(
"region_code",
subdivision.and_then(|subdivision| subdivision.iso_code)
);
add_field!("postal_code", data.postal.and_then(|p| p.code));
}
DatabaseKind::ConnectionType => {
let data = self.dbreader.lookup::<ConnectionType>(ip).ok()?;
add_field!("connection_type", data.connection_type);
}
DatabaseKind::AnonymousIp => {
let data = self.dbreader.lookup::<AnonymousIp>(ip).ok()?;
add_field!("is_anonymous", data.is_anonymous);
add_field!("is_anonymous_vpn", data.is_anonymous_vpn);
add_field!("is_hosting_provider", data.is_hosting_provider);
add_field!("is_public_proxy", data.is_public_proxy);
add_field!("is_residential_proxy", data.is_residential_proxy);
add_field!("is_tor_exit_node", data.is_tor_exit_node);
}
}
Some(map)
}
fn take_translation<'a>(
&self,
translations: Option<&BTreeMap<&str, &'a str>>,
) -> Option<&'a str> {
translations
.and_then(|translations| translations.get(&*self.config.locale))
.copied()
}
}
impl Table for Geoip {
fn find_table_row<'a>(
&self,
case: Case,
condition: &'a [Condition<'a>],
select: Option<&[String]>,
index: Option<IndexHandle>,
) -> Result<ObjectMap, String> {
let mut rows = self.find_table_rows(case, condition, select, index)?;
match rows.pop() {
Some(row) if rows.is_empty() => Ok(row),
Some(_) => Err("More than 1 row found".to_string()),
None => Err("IP not found".to_string()),
}
}
fn find_table_rows<'a>(
&self,
_: Case,
condition: &'a [Condition<'a>],
select: Option<&[String]>,
_: Option<IndexHandle>,
) -> Result<Vec<ObjectMap>, String> {
match condition.first() {
Some(_) if condition.len() > 1 => Err("Only one condition is allowed".to_string()),
Some(Condition::Equals { value, .. }) => {
let ip = value
.to_string_lossy()
.parse::<IpAddr>()
.map_err(|_| "Invalid IP address".to_string())?;
Ok(self
.lookup(ip, select)
.map(|values| vec![values])
.unwrap_or_default())
}
Some(_) => Err("Only equality condition is allowed".to_string()),
None => Err("IP condition must be specified".to_string()),
}
}
fn add_index(&mut self, _: Case, fields: &[&str]) -> Result<IndexHandle, String> {
match fields.len() {
0 => Err("IP field is required".to_string()),
1 => Ok(IndexHandle(0)),
_ => Err("Only one field is allowed".to_string()),
}
}
fn index_fields(&self) -> Vec<(Case, Vec<String>)> {
Vec::new()
}
fn needs_reload(&self) -> bool {
matches!(fs::metadata(&self.config.path)
.and_then(|metadata| metadata.modified()),
Ok(modified) if modified > self.last_modified)
}
}
impl std::fmt::Debug for Geoip {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Geoip {} database {})",
self.config.locale, self.config.path
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn city_lookup() {
let values = find("2.125.160.216", "tests/data/GeoIP2-City-Test.mmdb").unwrap();
let mut expected = ObjectMap::new();
expected.insert("city_name".into(), "Boxford".into());
expected.insert("country_code".into(), "GB".into());
expected.insert("continent_code".into(), "EU".into());
expected.insert("country_name".into(), "United Kingdom".into());
expected.insert("region_code".into(), "WBK".into());
expected.insert("region_name".into(), "West Berkshire".into());
expected.insert("timezone".into(), "Europe/London".into());
expected.insert("latitude".into(), Value::from(51.75));
expected.insert("longitude".into(), Value::from(-1.25));
expected.insert("postal_code".into(), "OX1".into());
expected.insert("metro_code".into(), Value::Null);
assert_eq!(values, expected);
}
#[test]
fn city_partial_lookup() {
let values = find_select(
"2.125.160.216",
"tests/data/GeoIP2-City-Test.mmdb",
Some(&["latitude".to_string(), "longitude".to_string()]),
)
.unwrap();
let mut expected = ObjectMap::new();
expected.insert("latitude".into(), Value::from(51.75));
expected.insert("longitude".into(), Value::from(-1.25));
assert_eq!(values, expected);
}
#[test]
fn city_lookup_partial_results() {
let values = find("67.43.156.9", "tests/data/GeoIP2-City-Test.mmdb").unwrap();
let mut expected = ObjectMap::new();
expected.insert("city_name".into(), Value::Null);
expected.insert("country_code".into(), "BT".into());
expected.insert("country_name".into(), "Bhutan".into());
expected.insert("continent_code".into(), "AS".into());
expected.insert("region_code".into(), Value::Null);
expected.insert("region_name".into(), Value::Null);
expected.insert("timezone".into(), "Asia/Thimphu".into());
expected.insert("latitude".into(), Value::from(27.5));
expected.insert("longitude".into(), Value::from(90.5));
expected.insert("postal_code".into(), Value::Null);
expected.insert("metro_code".into(), Value::Null);
assert_eq!(values, expected);
}
#[test]
fn city_lookup_no_results() {
let values = find("10.1.12.1", "tests/data/GeoIP2-City-Test.mmdb");
assert!(values.is_none());
}
#[test]
fn isp_lookup() {
let values = find("208.192.1.2", "tests/data/GeoIP2-ISP-Test.mmdb").unwrap();
let mut expected = ObjectMap::new();
expected.insert("autonomous_system_number".into(), 701i64.into());
expected.insert(
"autonomous_system_organization".into(),
"MCI Communications Services, Inc. d/b/a Verizon Business".into(),
);
expected.insert("isp".into(), "Verizon Business".into());
expected.insert("organization".into(), "Verizon Business".into());
assert_eq!(values, expected);
}
#[test]
fn isp_lookup_partial_results() {
let values = find("2600:7000::1", "tests/data/GeoLite2-ASN-Test.mmdb").unwrap();
let mut expected = ObjectMap::new();
expected.insert("autonomous_system_number".into(), 6939i64.into());
expected.insert(
"autonomous_system_organization".into(),
"Hurricane Electric, Inc.".into(),
);
expected.insert("isp".into(), Value::Null);
expected.insert("organization".into(), Value::Null);
assert_eq!(values, expected);
}
#[test]
fn isp_lookup_no_results() {
let values = find("10.1.12.1", "tests/data/GeoLite2-ASN-Test.mmdb");
assert!(values.is_none());
}
#[test]
fn connection_type_lookup_success() {
let values = find(
"201.243.200.1",
"tests/data/GeoIP2-Connection-Type-Test.mmdb",
)
.unwrap();
let mut expected = ObjectMap::new();
expected.insert("connection_type".into(), "Corporate".into());
assert_eq!(values, expected);
}
#[test]
fn connection_type_lookup_missing() {
let values = find("10.1.12.1", "tests/data/GeoIP2-Connection-Type-Test.mmdb");
assert!(values.is_none());
}
#[test]
fn custom_mmdb_type_error() {
let result = Geoip::new(GeoipConfig {
path: "tests/data/custom-type.mmdb".to_string(),
locale: default_locale(),
});
assert!(result.is_err());
}
#[test]
fn anonymous_ip_lookup() {
let values = find("101.99.92.179", "tests/data/GeoIP2-Anonymous-IP-Test.mmdb").unwrap();
let mut expected = ObjectMap::new();
expected.insert("is_anonymous".into(), true.into());
expected.insert("is_anonymous_vpn".into(), true.into());
expected.insert("is_hosting_provider".into(), true.into());
expected.insert("is_tor_exit_node".into(), true.into());
expected.insert("is_public_proxy".into(), Value::Null);
expected.insert("is_residential_proxy".into(), Value::Null);
assert_eq!(values, expected);
}
#[test]
fn anonymous_ip_lookup_no_results() {
let values = find("10.1.12.1", "tests/data/GeoIP2-Anonymous-IP-Test.mmdb");
assert!(values.is_none());
}
fn find(ip: &str, database: &str) -> Option<ObjectMap> {
find_select(ip, database, None)
}
fn find_select(ip: &str, database: &str, select: Option<&[String]>) -> Option<ObjectMap> {
Geoip::new(GeoipConfig {
path: database.to_string(),
locale: default_locale(),
})
.unwrap()
.find_table_rows(
Case::Insensitive,
&[Condition::Equals {
field: "ip",
value: ip.into(),
}],
select,
None,
)
.unwrap()
.pop()
}
}