1use std::cmp::Ordering;
2
3use snafu::Snafu;
4
5use crate::event::metric::Sample;
6
7#[derive(Debug, Snafu)]
8pub enum ValidationError {
9 #[snafu(display("Quantiles must be in range [0.0,1.0]"))]
10 QuantileOutOfRange,
11}
12
13#[derive(Debug)]
14pub struct DistributionStatistic {
15 pub min: f64,
16 pub max: f64,
17 pub median: f64,
18 pub avg: f64,
19 pub sum: f64,
20 pub count: u64,
21 pub quantiles: Vec<(f64, f64)>,
23}
24
25impl DistributionStatistic {
26 pub fn from_samples(source: &[Sample], quantiles: &[f64]) -> Option<Self> {
27 let mut bins = source
28 .iter()
29 .filter(|sample| sample.rate > 0)
30 .copied()
31 .collect::<Vec<_>>();
32
33 match bins.len() {
34 0 => None,
35 1 => Some({
36 let val = bins[0].value;
37 let count = bins[0].rate;
38 Self {
39 min: val,
40 max: val,
41 median: val,
42 avg: val,
43 sum: val * count as f64,
44 count: count as u64,
45 quantiles: quantiles.iter().map(|&p| (p, val)).collect(),
46 }
47 }),
48 _ => Some({
49 bins.sort_unstable_by(|a, b| {
50 a.value.partial_cmp(&b.value).unwrap_or(Ordering::Equal)
51 });
52
53 let min = bins.first().unwrap().value;
54 let max = bins.last().unwrap().value;
55 let sum = bins
56 .iter()
57 .map(|sample| sample.value * sample.rate as f64)
58 .sum::<f64>();
59
60 for i in 1..bins.len() {
61 bins[i].rate += bins[i - 1].rate;
62 }
63
64 let count = bins.last().unwrap().rate;
65 let avg = sum / count as f64;
66
67 let median = find_quantile(&bins, 0.5);
68 let quantiles = quantiles
69 .iter()
70 .map(|&p| (p, find_quantile(&bins, p)))
71 .collect();
72
73 Self {
74 min,
75 max,
76 median,
77 avg,
78 sum,
79 count: count as u64,
80 quantiles,
81 }
82 }),
83 }
84 }
85}
86
87fn find_quantile(bins: &[Sample], p: f64) -> f64 {
94 let count = bins.last().expect("bins is empty").rate;
95 find_sample(bins, (p * count as f64).round() as u32)
96}
97
98fn find_sample(bins: &[Sample], i: u32) -> f64 {
103 let index = match bins.binary_search_by_key(&i, |sample| sample.rate) {
104 Ok(index) => index,
105 Err(index) => index,
106 };
107 bins[index].value
108}
109
110pub fn validate_quantiles(quantiles: &[f64]) -> Result<(), ValidationError> {
111 if quantiles
112 .iter()
113 .all(|&quantile| (0.0..=1.0).contains(&quantile))
114 {
115 Ok(())
116 } else {
117 Err(ValidationError::QuantileOutOfRange)
118 }
119}
120
121#[cfg(test)]
122mod test {
123 use super::*;
124
125 impl PartialEq<Self> for DistributionStatistic {
126 fn eq(&self, other: &Self) -> bool {
127 self.min == other.min
128 && self.max == other.max
129 && self.median == other.median
130 && self.avg == other.avg
131 && self.sum == other.sum
132 && self.count == other.count
133 && self
134 .quantiles
135 .iter()
136 .zip(other.quantiles.iter())
137 .all(|(this, other)| this.0 == other.0 && this.1 == other.1)
138 }
139 }
140
141 impl Eq for DistributionStatistic {}
142
143 fn samples(v: &[(f64, u32)]) -> Vec<Sample> {
144 v.iter()
145 .map(|&(value, rate)| Sample { value, rate })
146 .collect()
147 }
148
149 #[test]
150 fn test_distribution() {
151 assert_eq!(DistributionStatistic::from_samples(&[], &[0.5]), None);
153 assert_eq!(
154 DistributionStatistic::from_samples(&samples(&[(0.0, 0)]), &[0.5]),
155 None
156 );
157
158 assert_eq!(
160 DistributionStatistic::from_samples(&samples(&[(0.9, 100)]), &[0.5],).unwrap(),
161 DistributionStatistic {
162 min: 0.9,
163 max: 0.9,
164 median: 0.9,
165 avg: 0.9,
166 sum: 90.0,
167 count: 100,
168 quantiles: vec![(0.5, 0.9)],
169 }
170 );
171
172 assert_eq!(
173 DistributionStatistic::from_samples(
174 &samples(&[(1.0, 1), (2.0, 1), (3.0, 1), (4.0, 1), (5.0, 1)]),
175 &[]
176 )
177 .unwrap(),
178 DistributionStatistic {
179 min: 1.0,
180 max: 5.0,
181 median: 3.0,
182 avg: 3.0,
183 sum: 15.0,
184 count: 5,
185 quantiles: Vec::new(),
186 }
187 );
188
189 assert_eq!(
190 DistributionStatistic::from_samples(
191 &samples(&[(1.0, 1), (2.0, 1), (4.0, 1), (3.0, 1)]),
192 &[0.0, 1.0, 0.9]
193 )
194 .unwrap(),
195 DistributionStatistic {
196 min: 1.0,
197 max: 4.0,
198 median: 2.0,
199 avg: 2.5,
200 sum: 10.0,
201 count: 4,
202 quantiles: vec![(0.0, 1.0), (1.0, 4.0), (0.9, 4.0)],
203 }
204 );
205
206 assert_eq!(
207 DistributionStatistic::from_samples(
208 &samples(&[(1.0, 2), (2.0, 1), (3.0, 4), (4.0, 3)]),
209 &[0.75, 0.3, 0.31, 0.29, 0.24],
210 )
211 .unwrap(),
212 DistributionStatistic {
213 min: 1.0,
214 max: 4.0,
215 median: 3.0,
216 avg: 2.8,
217 sum: 28.0,
218 count: 10,
219 quantiles: vec![
220 (0.75, 4.0),
221 (0.3, 2.0),
222 (0.31, 2.0),
223 (0.29, 2.0),
224 (0.24, 1.0)
225 ],
226 }
227 );
228 }
229}