vrl/stdlib/
parse_bytes.rs

1use crate::compiler::function::EnumVariant;
2use crate::compiler::prelude::*;
3use crate::value;
4use core::convert::AsRef;
5use parse_size::Config;
6use rust_decimal::{Decimal, prelude::FromPrimitive, prelude::ToPrimitive};
7use std::collections::HashMap;
8use std::sync::LazyLock;
9
10static DEFAULT_BASE: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from("2")));
11
12static UNIT_ENUM: &[EnumVariant] = &[
13    EnumVariant {
14        value: "B",
15        description: "Bytes",
16    },
17    EnumVariant {
18        value: "kiB",
19        description: "Kilobytes (1024 bytes)",
20    },
21    EnumVariant {
22        value: "MiB",
23        description: "Megabytes (1024 ** 2 bytes)",
24    },
25    EnumVariant {
26        value: "GiB",
27        description: "Gigabytes (1024 ** 3 bytes)",
28    },
29    EnumVariant {
30        value: "TiB",
31        description: "Terabytes (1024 gigabytes)",
32    },
33    EnumVariant {
34        value: "PiB",
35        description: "Petabytes (1024 ** 2 gigabytes)",
36    },
37    EnumVariant {
38        value: "EiB",
39        description: "Exabytes (1024 ** 3 gigabytes)",
40    },
41    EnumVariant {
42        value: "kB",
43        description: "Kilobytes (1 thousand bytes in SI)",
44    },
45    EnumVariant {
46        value: "MB",
47        description: "Megabytes (1 million bytes in SI)",
48    },
49    EnumVariant {
50        value: "GB",
51        description: "Gigabytes (1 billion bytes in SI)",
52    },
53    EnumVariant {
54        value: "TB",
55        description: "Terabytes (1 thousand gigabytes in SI)",
56    },
57    EnumVariant {
58        value: "PB",
59        description: "Petabytes (1 million gigabytes in SI)",
60    },
61    EnumVariant {
62        value: "EB",
63        description: "Exabytes (1 billion gigabytes in SI)",
64    },
65];
66
67static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
68    vec![
69        Parameter::required(
70            "value",
71            kind::BYTES,
72            "The string of the duration with either binary or SI unit.",
73        ),
74        Parameter::required("unit", kind::BYTES, "The output units for the byte.")
75            .enum_variants(UNIT_ENUM),
76        Parameter::optional(
77            "base",
78            kind::BYTES,
79            "The base for the byte, either 2 or 10.",
80        )
81        .default(&DEFAULT_BASE),
82    ]
83});
84
85fn parse_bytes(bytes: &Value, unit: Value, base: &Bytes) -> Resolved {
86    let (units, parse_config) = match base.as_ref() {
87        b"2" => (&*BIN_UNITS, Config::new().with_binary()),
88        b"10" => (&*DEC_UNITS, Config::new().with_decimal()),
89        _ => unreachable!("enum invariant"),
90    };
91    let value = bytes.try_bytes_utf8_lossy()?;
92    let value: &str = value.as_ref();
93    let conversion_factor = {
94        let bytes = unit.try_bytes()?;
95        let string = String::from_utf8_lossy(&bytes);
96
97        units
98            .get(string.as_ref())
99            .ok_or(format!("unknown unit format: '{string}'"))?
100    };
101    let value = parse_config
102        .parse_size(value)
103        .map_err(|e| format!("unable to parse bytes: '{e}'"))?;
104    let value = Decimal::from_u64(value).ok_or(format!("unable to parse number: {value}"))?;
105    let number = value
106        .checked_div(*conversion_factor)
107        .ok_or("division by >1 divisor overflowed")?; // This should never ever happen
108    let number = number
109        .to_f64()
110        .ok_or(format!("unable to parse number: '{number}'"))?;
111    Ok(Value::from_f64_or_zero(number))
112}
113
114// The largest unit is EB, which is smaller than i64::MAX, so we can safely use Decimal
115// power of 2 units
116static BIN_UNITS: LazyLock<HashMap<String, Decimal>> = LazyLock::new(|| {
117    vec![
118        ("B", Decimal::new(1, 0)),
119        ("KiB", Decimal::new(1_024, 0)),
120        ("MiB", Decimal::new(1_048_576, 0)),
121        ("GiB", Decimal::new(1_073_741_824, 0)),
122        ("TiB", Decimal::new(1_099_511_627_776, 0)),
123        ("PiB", Decimal::new(1_125_899_906_842_624, 0)),
124        ("EiB", Decimal::new(1_152_921_504_606_846_976, 0)),
125        // binary units also support ambiguous units
126        ("KB", Decimal::new(1_024, 0)),
127        ("MB", Decimal::new(1_048_576, 0)),
128        ("GB", Decimal::new(1_073_741_824, 0)),
129        ("TB", Decimal::new(1_099_511_627_776, 0)),
130        ("PB", Decimal::new(1_125_899_906_842_624, 0)),
131        ("EB", Decimal::new(1_152_921_504_606_846_976, 0)),
132    ]
133    .into_iter()
134    .map(|(k, v)| (k.to_owned(), v))
135    .collect()
136});
137// power of 10 units
138static DEC_UNITS: LazyLock<HashMap<String, Decimal>> = LazyLock::new(|| {
139    vec![
140        ("B", Decimal::new(1, 0)),
141        ("kB", Decimal::new(1_000, 0)),
142        ("MB", Decimal::new(1_000_000, 0)),
143        ("GB", Decimal::new(1_000_000_000, 0)),
144        ("TB", Decimal::new(1_000_000_000_000, 0)),
145        ("PB", Decimal::new(1_000_000_000_000_000, 0)),
146        ("EB", Decimal::new(1_000_000_000_000_000_000, 0)),
147    ]
148    .into_iter()
149    .map(|(k, v)| (k.to_owned(), v))
150    .collect()
151});
152
153#[derive(Clone, Copy, Debug)]
154pub struct ParseBytes;
155
156fn base_sets() -> Vec<Value> {
157    vec![value!("2"), value!("10")]
158}
159
160impl Function for ParseBytes {
161    fn identifier(&self) -> &'static str {
162        "parse_bytes"
163    }
164
165    fn usage(&self) -> &'static str {
166        "Parses the `value` into a human-readable bytes format specified by `unit` and `base`."
167    }
168
169    fn category(&self) -> &'static str {
170        Category::Parse.as_ref()
171    }
172
173    fn internal_failure_reasons(&self) -> &'static [&'static str] {
174        &["`value` is not a properly formatted bytes."]
175    }
176
177    fn return_kind(&self) -> u16 {
178        kind::FLOAT
179    }
180
181    fn examples(&self) -> &'static [Example] {
182        &[
183            example! {
184                title: "Parse bytes (kilobytes)",
185                source: r#"parse_bytes!("1024KiB", unit: "MiB")"#,
186                result: Ok("1.0"),
187            },
188            example! {
189                title: "Parse kilobytes in default binary units",
190                source: r#"parse_bytes!("1KiB", unit: "B")"#,
191                result: Ok("1024.0"),
192            },
193            example! {
194                title: "Parse bytes in SI unit (terabytes)",
195                source: r#"parse_bytes!("4TB", unit: "MB", base: "10")"#,
196                result: Ok("4000000.0"),
197            },
198            example! {
199                title: "Parse gigabytes in decimal units",
200                source: r#"parse_bytes!("1GB", unit: "B", base: "10")"#,
201                result: Ok("1000000000.0"),
202            },
203            example! {
204                title: "Parse bytes in ambiguous unit (gigabytes)",
205                source: r#"parse_bytes!("1GB", unit: "B", base: "2")"#,
206                result: Ok("1073741824.0"),
207            },
208            example! {
209                title: "Parse gigabytes in ambiguous decimal units",
210                source: r#"parse_bytes!("1GB", unit: "MB", base: "2")"#,
211                result: Ok("1024.0"),
212            },
213        ]
214    }
215
216    fn compile(
217        &self,
218        state: &state::TypeState,
219        _ctx: &mut FunctionCompileContext,
220        arguments: ArgumentList,
221    ) -> Compiled {
222        let value = arguments.required("value");
223        let unit = arguments.required("unit");
224        let base = arguments
225            .optional_enum("base", &base_sets(), state)?
226            .unwrap_or_else(|| DEFAULT_BASE.clone())
227            .try_bytes()
228            .expect("base not bytes");
229
230        Ok(ParseBytesFn { value, unit, base }.as_expr())
231    }
232
233    fn parameters(&self) -> &'static [Parameter] {
234        PARAMETERS.as_slice()
235    }
236}
237
238#[derive(Debug, Clone)]
239struct ParseBytesFn {
240    value: Box<dyn Expression>,
241    unit: Box<dyn Expression>,
242    base: Bytes,
243}
244
245impl FunctionExpression for ParseBytesFn {
246    fn resolve(&self, ctx: &mut Context) -> Resolved {
247        let bytes = self.value.resolve(ctx)?;
248        let unit = self.unit.resolve(ctx)?;
249
250        parse_bytes(&bytes, unit, &self.base)
251    }
252
253    fn type_def(&self, _: &state::TypeState) -> TypeDef {
254        TypeDef::float().fallible()
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261    use crate::value;
262
263    test_function![
264        parse_bytes => ParseBytes;
265
266        mib_b {
267            args: func_args![value: "1MiB",
268                             unit: "B"],
269            want: Ok(value!(1_048_576.0)),
270            tdef: TypeDef::float().fallible(),
271        }
272
273        b_kib {
274            args: func_args![value: "512B",
275                             unit: "KiB"],
276            want: Ok(0.5),
277            tdef: TypeDef::float().fallible(),
278        }
279
280        gib_mib {
281            args: func_args![value: "3.5GiB",
282                             unit: "KiB"],
283            want: Ok(3_670_016.0),
284            tdef: TypeDef::float().fallible(),
285        }
286
287        tib_gib {
288            args: func_args![value: "12 TiB",
289                             unit: "GiB"],
290            want: Ok(12_288.0),
291            tdef: TypeDef::float().fallible(),
292        }
293
294        mib_pib {
295            args: func_args![value: "256TiB",
296                             unit: "PiB"],
297            want: Ok(0.25),
298            tdef: TypeDef::float().fallible(),
299        }
300
301        eib_tib {
302            args: func_args![value: "1EiB",
303                             unit: "TiB"],
304            want: Ok(value!(1_048_576.0)),
305            tdef: TypeDef::float().fallible(),
306        }
307
308        mib_b_ambiguous {
309            args: func_args![value: "1MB",
310                             unit: "B",],
311            want: Ok(value!(1_048_576.0)),
312            tdef: TypeDef::float().fallible(),
313        }
314
315        mb_b {
316            args: func_args![value: "1MB",
317                             unit: "B",
318                             base: "10"],
319            want: Ok(value!(1_000_000.0)),
320            tdef: TypeDef::float().fallible(),
321        }
322
323        b_kb {
324            args: func_args![value: "3B",
325                             unit: "kB",
326                             base: "10"],
327            want: Ok(0.003),
328            tdef: TypeDef::float().fallible(),
329        }
330
331        gb_mb {
332            args: func_args![value: "3.007GB",
333                             unit: "kB",
334                             base: "10"],
335            want: Ok(3_007_000.0),
336            tdef: TypeDef::float().fallible(),
337        }
338
339        tb_gb {
340            args: func_args![value: "12 TB",
341                             unit: "GB",
342                             base: "10"],
343            want: Ok(12_000.0),
344            tdef: TypeDef::float().fallible(),
345        }
346
347        mb_pb {
348            args: func_args![value: "768MB",
349                             unit: "PB",
350                             base: "10"],
351            want: Ok(0.000_000_768),
352            tdef: TypeDef::float().fallible(),
353        }
354
355        eb_tb {
356            args: func_args![value: "1EB",
357                             unit: "TB",
358                             base: "10"],
359            want: Ok(value!(1_000_000.0)),
360            tdef: TypeDef::float().fallible(),
361        }
362
363        error_invalid {
364            args: func_args![value: "foo",
365                             unit: "KiB"],
366            want: Err("unable to parse bytes: 'invalid digit found in string'"),
367            tdef: TypeDef::float().fallible(),
368        }
369
370        error_unit {
371            args: func_args![value: "1YiB",
372                             unit: "MiB"],
373            want: Err("unable to parse bytes: 'invalid digit found in string'"),
374            tdef: TypeDef::float().fallible(),
375        }
376
377        error_format {
378            args: func_args![value: "100KB",
379                             unit: "ZB",
380                             base: "10"],
381            want: Err("unknown unit format: 'ZB'"),
382            tdef: TypeDef::float().fallible(),
383        }
384    ];
385}