vector_config_common/
human_friendly.rs

1use std::{
2    collections::{HashMap, HashSet},
3    sync::LazyLock,
4};
5
6use convert_case::{Boundary, Case, Converter};
7
8/// Well-known replacements.
9///
10/// Replacements are instances of strings with unique capitalization that cannot be achieved
11/// programmatically, as well as the potential insertion of additional characters, such as the
12/// replacement of "pubsub" with "Pub/Sub".
13static WELL_KNOWN_REPLACEMENTS: LazyLock<HashMap<String, &'static str>> = LazyLock::new(|| {
14    let pairs = vec![
15        ("eventstoredb", "EventStoreDB"),
16        ("mongodb", "MongoDB"),
17        ("opentelemetry", "OpenTelemetry"),
18        ("otel", "OTEL"),
19        ("postgresql", "PostgreSQL"),
20        ("pubsub", "Pub/Sub"),
21        ("statsd", "StatsD"),
22        ("journald", "JournalD"),
23        ("appsignal", "AppSignal"),
24        ("clickhouse", "ClickHouse"),
25        ("influxdb", "InfluxDB"),
26        ("webhdfs", "WebHDFS"),
27        ("cloudwatch", "CloudWatch"),
28        ("logdna", "LogDNA"),
29        ("geoip", "GeoIP"),
30        ("ssekms", "SSE-KMS"),
31        ("aes256", "AES-256"),
32        ("apiserver", "API Server"),
33        ("dir", "Directory"),
34        ("ids", "IDs"),
35        ("ips", "IPs"),
36        ("grpc", "gRPC"),
37        ("oauth2", "OAuth2"),
38    ];
39
40    pairs.iter().map(|(k, v)| (k.to_lowercase(), *v)).collect()
41});
42
43/// Well-known acronyms.
44///
45/// Acronyms are distinct from replacements because they should be entirely capitalized (i.e. "aws"
46/// or "aWs" or "Aws" should always be replaced with "AWS") whereas replacements may insert
47/// additional characters or capitalize specific characters within the original string.
48static WELL_KNOWN_ACRONYMS: LazyLock<HashSet<String>> = LazyLock::new(|| {
49    let acronyms = &[
50        "api", "amqp", "aws", "ec2", "ecs", "gcp", "hec", "http", "https", "nats", "nginx", "s3",
51        "sqs", "tls", "ssl", "otel", "gelf", "csv", "json", "rfc3339", "lz4", "us", "eu", "bsd",
52        "vrl", "tcp", "udp", "id", "uuid", "kms", "uri", "url", "acp", "uid", "ip", "pid",
53        "ndjson", "ewma", "rtt", "cpu", "acl", "imds", "acl", "alpn", "sasl",
54    ];
55
56    acronyms.iter().map(|s| s.to_lowercase()).collect()
57});
58
59/// Generates a human-friendly version of the given string.
60///
61/// Many instances exist where type names, or string constants, represent a condensed form of an
62/// otherwise human-friendly/recognize string, such as "aws_s3" (for AWS S3) or "InfluxdbMetrics"
63/// (for InfluxDB Metrics) and so on.
64///
65/// This function takes a given input and restores it back to the human-friendly version by
66/// splitting it on the relevant word boundaries, adjusting the input to title case, and applying
67/// well-known replacements to ensure that brand-specific casing (such as "CloudWatch" instead of
68/// "Cloudwatch", or handling acronyms like AWS, GCP, and so on) makes it into the final version.
69pub fn generate_human_friendly_string(input: &str) -> String {
70    // Create our case converter, which specifically ignores letter/digit boundaries, which is
71    // important for not turning substrings like "Ec2" or "S3" into "Ec"/"2" and "S"/"3",
72    // respectively.
73    let converter = Converter::new()
74        .to_case(Case::Title)
75        .remove_boundaries(&[Boundary::LOWER_DIGIT, Boundary::UPPER_DIGIT]);
76    let normalized = converter.convert(input);
77
78    let replaced_segments = normalized
79        .split(' ')
80        .map(replace_well_known_segments)
81        .collect::<Vec<_>>();
82    replaced_segments.join(" ")
83}
84
85fn replace_well_known_segments(input: &str) -> String {
86    let as_lower = input.to_lowercase();
87    if let Some(replacement) = WELL_KNOWN_REPLACEMENTS.get(&as_lower) {
88        replacement.to_string()
89    } else if WELL_KNOWN_ACRONYMS.contains(&as_lower) {
90        input.to_uppercase()
91    } else {
92        input.to_string()
93    }
94}
95
96#[cfg(test)]
97mod tests {
98    use super::generate_human_friendly_string;
99
100    #[test]
101    fn autodetect_input_case() {
102        let pascal_input = "LogToMetric";
103        let snake_input = "log_to_metric";
104
105        let pascal_friendly = generate_human_friendly_string(pascal_input);
106        let snake_friendly = generate_human_friendly_string(snake_input);
107
108        let expected = "Log To Metric";
109        assert_eq!(expected, pascal_friendly);
110        assert_eq!(expected, snake_friendly);
111    }
112
113    #[test]
114    fn digit_letter_boundaries() {
115        let input1 = "Ec2Metadata";
116        let expected1 = "EC2 Metadata";
117        let actual1 = generate_human_friendly_string(input1);
118        assert_eq!(expected1, actual1);
119
120        let input2 = "AwsS3";
121        let expected2 = "AWS S3";
122        let actual2 = generate_human_friendly_string(input2);
123        assert_eq!(expected2, actual2);
124    }
125}