vrl/stdlib/
xxhash.rs

1use crate::compiler::prelude::*;
2use std::sync::LazyLock;
3use xxhash_rust::{xxh3, xxh32, xxh64};
4
5static DEFAULT_VARIANT: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from("XXH32")));
6
7const VALID_VARIANTS: &[&str] = &["XXH32", "XXH64", "XXH3-64", "XXH3-128"];
8
9static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
10    vec![
11        Parameter::required(
12            "value",
13            kind::BYTES,
14            "The string to calculate the hash for.",
15        ),
16        Parameter::optional(
17            "variant",
18            kind::BYTES,
19            "The xxHash hashing algorithm to use.",
20        )
21        .default(&DEFAULT_VARIANT),
22    ]
23});
24
25#[allow(clippy::cast_possible_wrap)]
26fn xxhash(value: Value, variant: &Value) -> Resolved {
27    let bytes = value.try_bytes()?;
28    let variant = variant.try_bytes_utf8_lossy()?.as_ref().to_uppercase();
29
30    match variant.as_str() {
31        "XXH32" => {
32            let result = xxh32::xxh32(&bytes, 0);
33            Ok(Value::from(i64::from(result)))
34        }
35        "XXH64" => {
36            let result = xxh64::xxh64(&bytes, 0);
37            Ok(Value::from(result as i64))
38        }
39        "XXH3-64" => {
40            let result = xxh3::xxh3_64(&bytes);
41            Ok(Value::from(result as i64))
42        }
43        "XXH3-128" => {
44            let result = xxh3::xxh3_128(&bytes);
45            // Convert u128 to string representation since VRL doesn't have native u128 support
46            Ok(Value::from(result.to_string()))
47        }
48        _ => Err("Variant must be either 'XXH32', 'XXH64', 'XXH3-64', or 'XXH3-128'".into()),
49    }
50}
51
52#[derive(Clone, Copy, Debug)]
53pub struct Xxhash;
54
55impl Function for Xxhash {
56    fn identifier(&self) -> &'static str {
57        "xxhash"
58    }
59
60    fn summary(&self) -> &'static str {
61        "calculate xxhash hash"
62    }
63
64    fn usage(&self) -> &'static str {
65        "Calculates a [xxHash](https://github.com/DoumanAsh/xxhash-rust) hash of the `value`."
66    }
67
68    fn category(&self) -> &'static str {
69        Category::Checksum.as_ref()
70    }
71
72    fn return_kind(&self) -> u16 {
73        kind::INTEGER | kind::BYTES
74    }
75
76    fn parameters(&self) -> &'static [Parameter] {
77        PARAMETERS.as_slice()
78    }
79
80    fn notices(&self) -> &'static [&'static str] {
81        &[indoc! {"
82            Due to limitations in the underlying VRL data types, this function converts the unsigned
83            64-bit integer hash result to a signed 64-bit integer. Results higher than the signed
84            64-bit integer maximum value wrap around to negative values. For the XXH3-128 hash
85            algorithm, values are returned as a string.
86        "}]
87    }
88
89    fn examples(&self) -> &'static [Example] {
90        &[
91            example! {
92                title: "Calculate a hash using the default (XXH32) algorithm",
93                source: r#"xxhash("foo")"#,
94                result: Ok("3792637401"),
95            },
96            example! {
97                title: "Calculate a hash using the XXH32 algorithm",
98                source: r#"xxhash("foo", "XXH32")"#,
99                result: Ok("3792637401"),
100            },
101            example! {
102                title: "Calculate a hash using the XXH64 algorithm",
103                source: r#"xxhash("foo", "XXH64")"#,
104                result: Ok("3728699739546630719"),
105            },
106            example! {
107                title: "Calculate a hash using the XXH3-64 algorithm",
108                source: r#"xxhash("foo", "XXH3-64")"#,
109                result: Ok("-6093828362558603894"),
110            },
111            example! {
112                title: "Calculate a hash using the XXH3-128 algorithm",
113                source: r#"xxhash("foo", "XXH3-128")"#,
114                result: Ok(r#""161745101148472925293886522910304009610""#),
115            },
116        ]
117    }
118
119    fn compile(
120        &self,
121        _: &state::TypeState,
122        _ctx: &mut FunctionCompileContext,
123        arguments: ArgumentList,
124    ) -> Compiled {
125        let value = arguments.required("value");
126        let variant = arguments.optional("variant");
127
128        Ok(XxhashFn { value, variant }.as_expr())
129    }
130}
131
132#[derive(Debug, Clone)]
133struct XxhashFn {
134    value: Box<dyn Expression>,
135    variant: Option<Box<dyn Expression>>,
136}
137
138impl FunctionExpression for XxhashFn {
139    fn resolve(&self, ctx: &mut Context) -> Resolved {
140        let value = self.value.resolve(ctx)?;
141        let variant = self
142            .variant
143            .map_resolve_with_default(ctx, || DEFAULT_VARIANT.clone())?;
144
145        xxhash(value, &variant)
146    }
147
148    fn type_def(&self, state: &state::TypeState) -> TypeDef {
149        let variant = self.variant.as_ref();
150        let valid_static_variant = variant.is_none()
151            || variant
152                .and_then(|variant| variant.resolve_constant(state))
153                .and_then(|variant| variant.try_bytes_utf8_lossy().map(|s| s.to_string()).ok())
154                .is_some_and(|variant| VALID_VARIANTS.contains(&variant.to_uppercase().as_str()));
155
156        if valid_static_variant {
157            TypeDef::bytes().infallible()
158        } else {
159            TypeDef::bytes().fallible()
160        }
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167    use crate::value;
168
169    test_function![
170        xxhash => Xxhash;
171
172    hash_xxh32_default {
173        args: func_args![value: "foo"],
174        want: Ok(value!(3_792_637_401_i64)),
175        tdef: TypeDef::bytes().infallible(),
176    }
177
178    hash_xxh32 {
179        args: func_args![value: "foo", variant: "XXH32"],
180        want: Ok(value!(3_792_637_401_i64)),
181        tdef: TypeDef::bytes().infallible(),
182    }
183
184    hash_xxh64 {
185        args: func_args![value: "foo", variant: "XXH64"],
186        want: Ok(value!(3_728_699_739_546_630_719_i64)),
187        tdef: TypeDef::bytes().infallible(),
188    }
189
190    hash_xxh3_64 {
191        args: func_args![value: "foo", variant: "XXH3-64"],
192        want: Ok(value!(-6_093_828_362_558_603_894_i64)),
193        tdef: TypeDef::bytes().infallible(),
194    }
195
196    hash_xxh3_128 {
197        args: func_args![value: "foo", variant: "XXH3-128"],
198        want: Ok(value!("161745101148472925293886522910304009610")),
199        tdef: TypeDef::bytes().infallible(),
200    }
201
202    long_string_xxh32 {
203        args: func_args![value: "vrl xxhash hash function"],
204        want: Ok(value!(919_261_294_i64)),
205        tdef: TypeDef::bytes().infallible(),
206    }
207
208    long_string_xxh64 {
209        args: func_args![value: "vrl xxhash hash function", variant: "XXH64"],
210        want: Ok(value!(7_826_295_616_420_964_813_i64)),
211        tdef: TypeDef::bytes().infallible(),
212    }
213
214    long_string_xxh3_64 {
215        args: func_args![value: "vrl xxhash hash function", variant: "XXH3-64"],
216        want: Ok(value!(-7_714_906_473_624_552_998_i64)),
217        tdef: TypeDef::bytes().infallible(),
218    }
219
220    long_string_xxh3_128 {
221        args: func_args![value: "vrl xxhash hash function", variant: "XXH3-128"],
222        want: Ok(value!("89621485359950851650871997518391357172")),
223        tdef: TypeDef::bytes().infallible(),
224    }
225
226    hash_invalid_variant {
227        args: func_args![value: "foo", variant: "XXH16"],
228        want: Err("Variant must be either 'XXH32', 'XXH64', 'XXH3-64', or 'XXH3-128'"),
229        tdef: TypeDef::bytes().fallible(),
230    }
231    ];
232}