1use crate::compiler::prelude::*;
2use std::sync::LazyLock;
3use xxhash_rust::{xxh3, xxh32, xxh64};
4
5static DEFAULT_VARIANT: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from("XXH32")));
6
7const VALID_VARIANTS: &[&str] = &["XXH32", "XXH64", "XXH3-64", "XXH3-128"];
8
9static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
10 vec![
11 Parameter::required(
12 "value",
13 kind::BYTES,
14 "The string to calculate the hash for.",
15 ),
16 Parameter::optional(
17 "variant",
18 kind::BYTES,
19 "The xxHash hashing algorithm to use.",
20 )
21 .default(&DEFAULT_VARIANT),
22 ]
23});
24
25#[allow(clippy::cast_possible_wrap)]
26fn xxhash(value: Value, variant: &Value) -> Resolved {
27 let bytes = value.try_bytes()?;
28 let variant = variant.try_bytes_utf8_lossy()?.as_ref().to_uppercase();
29
30 match variant.as_str() {
31 "XXH32" => {
32 let result = xxh32::xxh32(&bytes, 0);
33 Ok(Value::from(i64::from(result)))
34 }
35 "XXH64" => {
36 let result = xxh64::xxh64(&bytes, 0);
37 Ok(Value::from(result as i64))
38 }
39 "XXH3-64" => {
40 let result = xxh3::xxh3_64(&bytes);
41 Ok(Value::from(result as i64))
42 }
43 "XXH3-128" => {
44 let result = xxh3::xxh3_128(&bytes);
45 Ok(Value::from(result.to_string()))
47 }
48 _ => Err("Variant must be either 'XXH32', 'XXH64', 'XXH3-64', or 'XXH3-128'".into()),
49 }
50}
51
52#[derive(Clone, Copy, Debug)]
53pub struct Xxhash;
54
55impl Function for Xxhash {
56 fn identifier(&self) -> &'static str {
57 "xxhash"
58 }
59
60 fn summary(&self) -> &'static str {
61 "calculate xxhash hash"
62 }
63
64 fn usage(&self) -> &'static str {
65 "Calculates a [xxHash](https://github.com/DoumanAsh/xxhash-rust) hash of the `value`."
66 }
67
68 fn category(&self) -> &'static str {
69 Category::Checksum.as_ref()
70 }
71
72 fn return_kind(&self) -> u16 {
73 kind::INTEGER | kind::BYTES
74 }
75
76 fn parameters(&self) -> &'static [Parameter] {
77 PARAMETERS.as_slice()
78 }
79
80 fn notices(&self) -> &'static [&'static str] {
81 &[indoc! {"
82 Due to limitations in the underlying VRL data types, this function converts the unsigned
83 64-bit integer hash result to a signed 64-bit integer. Results higher than the signed
84 64-bit integer maximum value wrap around to negative values. For the XXH3-128 hash
85 algorithm, values are returned as a string.
86 "}]
87 }
88
89 fn examples(&self) -> &'static [Example] {
90 &[
91 example! {
92 title: "Calculate a hash using the default (XXH32) algorithm",
93 source: r#"xxhash("foo")"#,
94 result: Ok("3792637401"),
95 },
96 example! {
97 title: "Calculate a hash using the XXH32 algorithm",
98 source: r#"xxhash("foo", "XXH32")"#,
99 result: Ok("3792637401"),
100 },
101 example! {
102 title: "Calculate a hash using the XXH64 algorithm",
103 source: r#"xxhash("foo", "XXH64")"#,
104 result: Ok("3728699739546630719"),
105 },
106 example! {
107 title: "Calculate a hash using the XXH3-64 algorithm",
108 source: r#"xxhash("foo", "XXH3-64")"#,
109 result: Ok("-6093828362558603894"),
110 },
111 example! {
112 title: "Calculate a hash using the XXH3-128 algorithm",
113 source: r#"xxhash("foo", "XXH3-128")"#,
114 result: Ok(r#""161745101148472925293886522910304009610""#),
115 },
116 ]
117 }
118
119 fn compile(
120 &self,
121 _: &state::TypeState,
122 _ctx: &mut FunctionCompileContext,
123 arguments: ArgumentList,
124 ) -> Compiled {
125 let value = arguments.required("value");
126 let variant = arguments.optional("variant");
127
128 Ok(XxhashFn { value, variant }.as_expr())
129 }
130}
131
132#[derive(Debug, Clone)]
133struct XxhashFn {
134 value: Box<dyn Expression>,
135 variant: Option<Box<dyn Expression>>,
136}
137
138impl FunctionExpression for XxhashFn {
139 fn resolve(&self, ctx: &mut Context) -> Resolved {
140 let value = self.value.resolve(ctx)?;
141 let variant = self
142 .variant
143 .map_resolve_with_default(ctx, || DEFAULT_VARIANT.clone())?;
144
145 xxhash(value, &variant)
146 }
147
148 fn type_def(&self, state: &state::TypeState) -> TypeDef {
149 let variant = self.variant.as_ref();
150 let valid_static_variant = variant.is_none()
151 || variant
152 .and_then(|variant| variant.resolve_constant(state))
153 .and_then(|variant| variant.try_bytes_utf8_lossy().map(|s| s.to_string()).ok())
154 .is_some_and(|variant| VALID_VARIANTS.contains(&variant.to_uppercase().as_str()));
155
156 if valid_static_variant {
157 TypeDef::bytes().infallible()
158 } else {
159 TypeDef::bytes().fallible()
160 }
161 }
162}
163
164#[cfg(test)]
165mod tests {
166 use super::*;
167 use crate::value;
168
169 test_function![
170 xxhash => Xxhash;
171
172 hash_xxh32_default {
173 args: func_args![value: "foo"],
174 want: Ok(value!(3_792_637_401_i64)),
175 tdef: TypeDef::bytes().infallible(),
176 }
177
178 hash_xxh32 {
179 args: func_args![value: "foo", variant: "XXH32"],
180 want: Ok(value!(3_792_637_401_i64)),
181 tdef: TypeDef::bytes().infallible(),
182 }
183
184 hash_xxh64 {
185 args: func_args![value: "foo", variant: "XXH64"],
186 want: Ok(value!(3_728_699_739_546_630_719_i64)),
187 tdef: TypeDef::bytes().infallible(),
188 }
189
190 hash_xxh3_64 {
191 args: func_args![value: "foo", variant: "XXH3-64"],
192 want: Ok(value!(-6_093_828_362_558_603_894_i64)),
193 tdef: TypeDef::bytes().infallible(),
194 }
195
196 hash_xxh3_128 {
197 args: func_args![value: "foo", variant: "XXH3-128"],
198 want: Ok(value!("161745101148472925293886522910304009610")),
199 tdef: TypeDef::bytes().infallible(),
200 }
201
202 long_string_xxh32 {
203 args: func_args![value: "vrl xxhash hash function"],
204 want: Ok(value!(919_261_294_i64)),
205 tdef: TypeDef::bytes().infallible(),
206 }
207
208 long_string_xxh64 {
209 args: func_args![value: "vrl xxhash hash function", variant: "XXH64"],
210 want: Ok(value!(7_826_295_616_420_964_813_i64)),
211 tdef: TypeDef::bytes().infallible(),
212 }
213
214 long_string_xxh3_64 {
215 args: func_args![value: "vrl xxhash hash function", variant: "XXH3-64"],
216 want: Ok(value!(-7_714_906_473_624_552_998_i64)),
217 tdef: TypeDef::bytes().infallible(),
218 }
219
220 long_string_xxh3_128 {
221 args: func_args![value: "vrl xxhash hash function", variant: "XXH3-128"],
222 want: Ok(value!("89621485359950851650871997518391357172")),
223 tdef: TypeDef::bytes().infallible(),
224 }
225
226 hash_invalid_variant {
227 args: func_args![value: "foo", variant: "XXH16"],
228 want: Err("Variant must be either 'XXH32', 'XXH64', 'XXH3-64', or 'XXH3-128'"),
229 tdef: TypeDef::bytes().fallible(),
230 }
231 ];
232}