vector_config_common/
human_friendly.rs

1use std::collections::{HashMap, HashSet};
2use std::sync::LazyLock;
3
4use convert_case::{Boundary, Case, Converter};
5
6/// Well-known replacements.
7///
8/// Replacements are instances of strings with unique capitalization that cannot be achieved
9/// programmatically, as well as the potential insertion of additional characters, such as the
10/// replacement of "pubsub" with "Pub/Sub".
11static WELL_KNOWN_REPLACEMENTS: LazyLock<HashMap<String, &'static str>> = LazyLock::new(|| {
12    let pairs = vec![
13        ("eventstoredb", "EventStoreDB"),
14        ("mongodb", "MongoDB"),
15        ("opentelemetry", "OpenTelemetry"),
16        ("otel", "OTEL"),
17        ("postgresql", "PostgreSQL"),
18        ("pubsub", "Pub/Sub"),
19        ("statsd", "StatsD"),
20        ("journald", "JournalD"),
21        ("appsignal", "AppSignal"),
22        ("clickhouse", "ClickHouse"),
23        ("influxdb", "InfluxDB"),
24        ("webhdfs", "WebHDFS"),
25        ("cloudwatch", "CloudWatch"),
26        ("logdna", "LogDNA"),
27        ("geoip", "GeoIP"),
28        ("ssekms", "SSE-KMS"),
29        ("aes256", "AES-256"),
30        ("apiserver", "API Server"),
31        ("dir", "Directory"),
32        ("ids", "IDs"),
33        ("ips", "IPs"),
34        ("grpc", "gRPC"),
35        ("oauth2", "OAuth2"),
36    ];
37
38    pairs.iter().map(|(k, v)| (k.to_lowercase(), *v)).collect()
39});
40
41/// Well-known acronyms.
42///
43/// Acronyms are distinct from replacements because they should be entirely capitalized (i.e. "aws"
44/// or "aWs" or "Aws" should always be replaced with "AWS") whereas replacements may insert
45/// additional characters or capitalize specific characters within the original string.
46static WELL_KNOWN_ACRONYMS: LazyLock<HashSet<String>> = LazyLock::new(|| {
47    let acronyms = &[
48        "api", "amqp", "aws", "ec2", "ecs", "gcp", "hec", "http", "https", "nats", "nginx", "s3",
49        "sqs", "tls", "ssl", "otel", "gelf", "csv", "json", "rfc3339", "lz4", "us", "eu", "bsd",
50        "vrl", "tcp", "udp", "id", "uuid", "kms", "uri", "url", "acp", "uid", "ip", "pid",
51        "ndjson", "ewma", "rtt", "cpu", "acl", "imds", "acl", "alpn", "sasl",
52    ];
53
54    acronyms.iter().map(|s| s.to_lowercase()).collect()
55});
56
57/// Generates a human-friendly version of the given string.
58///
59/// Many instances exist where type names, or string constants, represent a condensed form of an
60/// otherwise human-friendly/recognize string, such as "aws_s3" (for AWS S3) or "InfluxdbMetrics"
61/// (for InfluxDB Metrics) and so on.
62///
63/// This function takes a given input and restores it back to the human-friendly version by
64/// splitting it on the relevant word boundaries, adjusting the input to title case, and applying
65/// well-known replacements to ensure that brand-specific casing (such as "CloudWatch" instead of
66/// "Cloudwatch", or handling acronyms like AWS, GCP, and so on) makes it into the final version.
67pub fn generate_human_friendly_string(input: &str) -> String {
68    // Create our case converter, which specifically ignores letter/digit boundaries, which is
69    // important for not turning substrings like "Ec2" or "S3" into "Ec"/"2" and "S"/"3",
70    // respectively.
71    let converter = Converter::new()
72        .to_case(Case::Title)
73        .remove_boundaries(&[Boundary::LOWER_DIGIT, Boundary::UPPER_DIGIT]);
74    let normalized = converter.convert(input);
75
76    let replaced_segments = normalized
77        .split(' ')
78        .map(replace_well_known_segments)
79        .collect::<Vec<_>>();
80    replaced_segments.join(" ")
81}
82
83fn replace_well_known_segments(input: &str) -> String {
84    let as_lower = input.to_lowercase();
85    if let Some(replacement) = WELL_KNOWN_REPLACEMENTS.get(&as_lower) {
86        replacement.to_string()
87    } else if WELL_KNOWN_ACRONYMS.contains(&as_lower) {
88        input.to_uppercase()
89    } else {
90        input.to_string()
91    }
92}
93
94#[cfg(test)]
95mod tests {
96    use super::generate_human_friendly_string;
97
98    #[test]
99    fn autodetect_input_case() {
100        let pascal_input = "LogToMetric";
101        let snake_input = "log_to_metric";
102
103        let pascal_friendly = generate_human_friendly_string(pascal_input);
104        let snake_friendly = generate_human_friendly_string(snake_input);
105
106        let expected = "Log To Metric";
107        assert_eq!(expected, pascal_friendly);
108        assert_eq!(expected, snake_friendly);
109    }
110
111    #[test]
112    fn digit_letter_boundaries() {
113        let input1 = "Ec2Metadata";
114        let expected1 = "EC2 Metadata";
115        let actual1 = generate_human_friendly_string(input1);
116        assert_eq!(expected1, actual1);
117
118        let input2 = "AwsS3";
119        let expected2 = "AWS S3";
120        let actual2 = generate_human_friendly_string(input2);
121        assert_eq!(expected2, actual2);
122    }
123}