1use crate::compiler::function::EnumVariant;
2use crate::compiler::prelude::*;
3use crate::value;
4use core::convert::AsRef;
5use parse_size::Config;
6use rust_decimal::{Decimal, prelude::FromPrimitive, prelude::ToPrimitive};
7use std::collections::HashMap;
8use std::sync::LazyLock;
9
10static DEFAULT_BASE: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from("2")));
11
12static UNIT_ENUM: &[EnumVariant] = &[
13 EnumVariant {
14 value: "B",
15 description: "Bytes",
16 },
17 EnumVariant {
18 value: "kiB",
19 description: "Kilobytes (1024 bytes)",
20 },
21 EnumVariant {
22 value: "MiB",
23 description: "Megabytes (1024 ** 2 bytes)",
24 },
25 EnumVariant {
26 value: "GiB",
27 description: "Gigabytes (1024 ** 3 bytes)",
28 },
29 EnumVariant {
30 value: "TiB",
31 description: "Terabytes (1024 gigabytes)",
32 },
33 EnumVariant {
34 value: "PiB",
35 description: "Petabytes (1024 ** 2 gigabytes)",
36 },
37 EnumVariant {
38 value: "EiB",
39 description: "Exabytes (1024 ** 3 gigabytes)",
40 },
41 EnumVariant {
42 value: "kB",
43 description: "Kilobytes (1 thousand bytes in SI)",
44 },
45 EnumVariant {
46 value: "MB",
47 description: "Megabytes (1 million bytes in SI)",
48 },
49 EnumVariant {
50 value: "GB",
51 description: "Gigabytes (1 billion bytes in SI)",
52 },
53 EnumVariant {
54 value: "TB",
55 description: "Terabytes (1 thousand gigabytes in SI)",
56 },
57 EnumVariant {
58 value: "PB",
59 description: "Petabytes (1 million gigabytes in SI)",
60 },
61 EnumVariant {
62 value: "EB",
63 description: "Exabytes (1 billion gigabytes in SI)",
64 },
65];
66
67static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
68 vec![
69 Parameter::required(
70 "value",
71 kind::BYTES,
72 "The string of the duration with either binary or SI unit.",
73 ),
74 Parameter::required("unit", kind::BYTES, "The output units for the byte.")
75 .enum_variants(UNIT_ENUM),
76 Parameter::optional(
77 "base",
78 kind::BYTES,
79 "The base for the byte, either 2 or 10.",
80 )
81 .default(&DEFAULT_BASE),
82 ]
83});
84
85fn parse_bytes(bytes: &Value, unit: Value, base: &Bytes) -> Resolved {
86 let (units, parse_config) = match base.as_ref() {
87 b"2" => (&*BIN_UNITS, Config::new().with_binary()),
88 b"10" => (&*DEC_UNITS, Config::new().with_decimal()),
89 _ => unreachable!("enum invariant"),
90 };
91 let value = bytes.try_bytes_utf8_lossy()?;
92 let value: &str = value.as_ref();
93 let conversion_factor = {
94 let bytes = unit.try_bytes()?;
95 let string = String::from_utf8_lossy(&bytes);
96
97 units
98 .get(string.as_ref())
99 .ok_or(format!("unknown unit format: '{string}'"))?
100 };
101 let value = parse_config
102 .parse_size(value)
103 .map_err(|e| format!("unable to parse bytes: '{e}'"))?;
104 let value = Decimal::from_u64(value).ok_or(format!("unable to parse number: {value}"))?;
105 let number = value
106 .checked_div(*conversion_factor)
107 .ok_or("division by >1 divisor overflowed")?; let number = number
109 .to_f64()
110 .ok_or(format!("unable to parse number: '{number}'"))?;
111 Ok(Value::from_f64_or_zero(number))
112}
113
114static BIN_UNITS: LazyLock<HashMap<String, Decimal>> = LazyLock::new(|| {
117 vec![
118 ("B", Decimal::new(1, 0)),
119 ("KiB", Decimal::new(1_024, 0)),
120 ("MiB", Decimal::new(1_048_576, 0)),
121 ("GiB", Decimal::new(1_073_741_824, 0)),
122 ("TiB", Decimal::new(1_099_511_627_776, 0)),
123 ("PiB", Decimal::new(1_125_899_906_842_624, 0)),
124 ("EiB", Decimal::new(1_152_921_504_606_846_976, 0)),
125 ("KB", Decimal::new(1_024, 0)),
127 ("MB", Decimal::new(1_048_576, 0)),
128 ("GB", Decimal::new(1_073_741_824, 0)),
129 ("TB", Decimal::new(1_099_511_627_776, 0)),
130 ("PB", Decimal::new(1_125_899_906_842_624, 0)),
131 ("EB", Decimal::new(1_152_921_504_606_846_976, 0)),
132 ]
133 .into_iter()
134 .map(|(k, v)| (k.to_owned(), v))
135 .collect()
136});
137static DEC_UNITS: LazyLock<HashMap<String, Decimal>> = LazyLock::new(|| {
139 vec![
140 ("B", Decimal::new(1, 0)),
141 ("kB", Decimal::new(1_000, 0)),
142 ("MB", Decimal::new(1_000_000, 0)),
143 ("GB", Decimal::new(1_000_000_000, 0)),
144 ("TB", Decimal::new(1_000_000_000_000, 0)),
145 ("PB", Decimal::new(1_000_000_000_000_000, 0)),
146 ("EB", Decimal::new(1_000_000_000_000_000_000, 0)),
147 ]
148 .into_iter()
149 .map(|(k, v)| (k.to_owned(), v))
150 .collect()
151});
152
153#[derive(Clone, Copy, Debug)]
154pub struct ParseBytes;
155
156fn base_sets() -> Vec<Value> {
157 vec![value!("2"), value!("10")]
158}
159
160impl Function for ParseBytes {
161 fn identifier(&self) -> &'static str {
162 "parse_bytes"
163 }
164
165 fn usage(&self) -> &'static str {
166 "Parses the `value` into a human-readable bytes format specified by `unit` and `base`."
167 }
168
169 fn category(&self) -> &'static str {
170 Category::Parse.as_ref()
171 }
172
173 fn internal_failure_reasons(&self) -> &'static [&'static str] {
174 &["`value` is not a properly formatted bytes."]
175 }
176
177 fn return_kind(&self) -> u16 {
178 kind::FLOAT
179 }
180
181 fn examples(&self) -> &'static [Example] {
182 &[
183 example! {
184 title: "Parse bytes (kilobytes)",
185 source: r#"parse_bytes!("1024KiB", unit: "MiB")"#,
186 result: Ok("1.0"),
187 },
188 example! {
189 title: "Parse kilobytes in default binary units",
190 source: r#"parse_bytes!("1KiB", unit: "B")"#,
191 result: Ok("1024.0"),
192 },
193 example! {
194 title: "Parse bytes in SI unit (terabytes)",
195 source: r#"parse_bytes!("4TB", unit: "MB", base: "10")"#,
196 result: Ok("4000000.0"),
197 },
198 example! {
199 title: "Parse gigabytes in decimal units",
200 source: r#"parse_bytes!("1GB", unit: "B", base: "10")"#,
201 result: Ok("1000000000.0"),
202 },
203 example! {
204 title: "Parse bytes in ambiguous unit (gigabytes)",
205 source: r#"parse_bytes!("1GB", unit: "B", base: "2")"#,
206 result: Ok("1073741824.0"),
207 },
208 example! {
209 title: "Parse gigabytes in ambiguous decimal units",
210 source: r#"parse_bytes!("1GB", unit: "MB", base: "2")"#,
211 result: Ok("1024.0"),
212 },
213 ]
214 }
215
216 fn compile(
217 &self,
218 state: &state::TypeState,
219 _ctx: &mut FunctionCompileContext,
220 arguments: ArgumentList,
221 ) -> Compiled {
222 let value = arguments.required("value");
223 let unit = arguments.required("unit");
224 let base = arguments
225 .optional_enum("base", &base_sets(), state)?
226 .unwrap_or_else(|| DEFAULT_BASE.clone())
227 .try_bytes()
228 .expect("base not bytes");
229
230 Ok(ParseBytesFn { value, unit, base }.as_expr())
231 }
232
233 fn parameters(&self) -> &'static [Parameter] {
234 PARAMETERS.as_slice()
235 }
236}
237
238#[derive(Debug, Clone)]
239struct ParseBytesFn {
240 value: Box<dyn Expression>,
241 unit: Box<dyn Expression>,
242 base: Bytes,
243}
244
245impl FunctionExpression for ParseBytesFn {
246 fn resolve(&self, ctx: &mut Context) -> Resolved {
247 let bytes = self.value.resolve(ctx)?;
248 let unit = self.unit.resolve(ctx)?;
249
250 parse_bytes(&bytes, unit, &self.base)
251 }
252
253 fn type_def(&self, _: &state::TypeState) -> TypeDef {
254 TypeDef::float().fallible()
255 }
256}
257
258#[cfg(test)]
259mod tests {
260 use super::*;
261 use crate::value;
262
263 test_function![
264 parse_bytes => ParseBytes;
265
266 mib_b {
267 args: func_args![value: "1MiB",
268 unit: "B"],
269 want: Ok(value!(1_048_576.0)),
270 tdef: TypeDef::float().fallible(),
271 }
272
273 b_kib {
274 args: func_args![value: "512B",
275 unit: "KiB"],
276 want: Ok(0.5),
277 tdef: TypeDef::float().fallible(),
278 }
279
280 gib_mib {
281 args: func_args![value: "3.5GiB",
282 unit: "KiB"],
283 want: Ok(3_670_016.0),
284 tdef: TypeDef::float().fallible(),
285 }
286
287 tib_gib {
288 args: func_args![value: "12 TiB",
289 unit: "GiB"],
290 want: Ok(12_288.0),
291 tdef: TypeDef::float().fallible(),
292 }
293
294 mib_pib {
295 args: func_args![value: "256TiB",
296 unit: "PiB"],
297 want: Ok(0.25),
298 tdef: TypeDef::float().fallible(),
299 }
300
301 eib_tib {
302 args: func_args![value: "1EiB",
303 unit: "TiB"],
304 want: Ok(value!(1_048_576.0)),
305 tdef: TypeDef::float().fallible(),
306 }
307
308 mib_b_ambiguous {
309 args: func_args![value: "1MB",
310 unit: "B",],
311 want: Ok(value!(1_048_576.0)),
312 tdef: TypeDef::float().fallible(),
313 }
314
315 mb_b {
316 args: func_args![value: "1MB",
317 unit: "B",
318 base: "10"],
319 want: Ok(value!(1_000_000.0)),
320 tdef: TypeDef::float().fallible(),
321 }
322
323 b_kb {
324 args: func_args![value: "3B",
325 unit: "kB",
326 base: "10"],
327 want: Ok(0.003),
328 tdef: TypeDef::float().fallible(),
329 }
330
331 gb_mb {
332 args: func_args![value: "3.007GB",
333 unit: "kB",
334 base: "10"],
335 want: Ok(3_007_000.0),
336 tdef: TypeDef::float().fallible(),
337 }
338
339 tb_gb {
340 args: func_args![value: "12 TB",
341 unit: "GB",
342 base: "10"],
343 want: Ok(12_000.0),
344 tdef: TypeDef::float().fallible(),
345 }
346
347 mb_pb {
348 args: func_args![value: "768MB",
349 unit: "PB",
350 base: "10"],
351 want: Ok(0.000_000_768),
352 tdef: TypeDef::float().fallible(),
353 }
354
355 eb_tb {
356 args: func_args![value: "1EB",
357 unit: "TB",
358 base: "10"],
359 want: Ok(value!(1_000_000.0)),
360 tdef: TypeDef::float().fallible(),
361 }
362
363 error_invalid {
364 args: func_args![value: "foo",
365 unit: "KiB"],
366 want: Err("unable to parse bytes: 'invalid digit found in string'"),
367 tdef: TypeDef::float().fallible(),
368 }
369
370 error_unit {
371 args: func_args![value: "1YiB",
372 unit: "MiB"],
373 want: Err("unable to parse bytes: 'invalid digit found in string'"),
374 tdef: TypeDef::float().fallible(),
375 }
376
377 error_format {
378 args: func_args![value: "100KB",
379 unit: "ZB",
380 base: "10"],
381 want: Err("unknown unit format: 'ZB'"),
382 tdef: TypeDef::float().fallible(),
383 }
384 ];
385}