vector_buffers/topology/
builder.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
use std::{error::Error, num::NonZeroUsize};

use async_trait::async_trait;
use snafu::{ResultExt, Snafu};
use tracing::Span;

use super::channel::{ReceiverAdapter, SenderAdapter};
use crate::{
    buffer_usage_data::{BufferUsage, BufferUsageHandle},
    topology::channel::{BufferReceiver, BufferSender},
    variants::MemoryBuffer,
    Bufferable, WhenFull,
};

/// Value that can be used as a stage in a buffer topology.
#[async_trait]
pub trait IntoBuffer<T: Bufferable>: Send {
    /// Gets whether or not this buffer stage provides its own instrumentation, or if it should be
    /// instrumented from the outside.
    ///
    /// As some buffer stages, like the in-memory channel, never have a chance to catch the values
    /// in the middle of the channel without introducing an unnecessary hop, [`BufferSender`] and
    /// [`BufferReceiver`] can be configured to instrument all events flowing through directly.
    ///
    /// When instrumentation is provided in this way, [`vector_common::byte_size_of::ByteSizeOf`]
    ///  is used to calculate the size of the event going both into and out of the buffer.
    fn provides_instrumentation(&self) -> bool {
        false
    }

    /// Converts this value into a sender and receiver pair suitable for use in a buffer topology.
    async fn into_buffer_parts(
        self: Box<Self>,
        usage_handle: BufferUsageHandle,
    ) -> Result<(SenderAdapter<T>, ReceiverAdapter<T>), Box<dyn Error + Send + Sync>>;
}

#[derive(Debug, Snafu)]
pub enum TopologyError {
    #[snafu(display("buffer topology cannot be empty"))]
    EmptyTopology,
    #[snafu(display(
        "stage {} configured with block/drop newest behavior in front of subsequent stage",
        stage_idx
    ))]
    NextStageNotUsed { stage_idx: usize },
    #[snafu(display("last stage in buffer topology cannot be set to overflow mode"))]
    OverflowWhenLast,
    #[snafu(display("failed to build individual stage {}: {}", stage_idx, source))]
    FailedToBuildStage {
        stage_idx: usize,
        source: Box<dyn Error + Send + Sync>,
    },
    #[snafu(display(
        "multiple components with segmented acknowledgements cannot be used in the same buffer"
    ))]
    StackedAcks,
}

struct TopologyStage<T: Bufferable> {
    untransformed: Box<dyn IntoBuffer<T>>,
    when_full: WhenFull,
}

/// Builder for constructing buffer topologies.
pub struct TopologyBuilder<T: Bufferable> {
    stages: Vec<TopologyStage<T>>,
}

impl<T: Bufferable> TopologyBuilder<T> {
    /// Adds a new stage to the buffer topology.
    ///
    /// The "when full" behavior can be optionally configured here.  If no behavior is specified,
    /// and an overflow buffer is _not_ added to the topology after this, then the "when full"
    /// behavior will use a default value of "block".  If a "when full" behavior is specified, and
    /// an overflow buffer is added to the topology after this, then the specified "when full"
    /// behavior will be ignored and will be set to "overflow" mode.
    ///
    /// Callers can configure what to do when a buffer is full by setting `when_full`.  Three modes
    /// are available -- block, drop newest, and overflow -- which are documented in more detail by
    /// [`BufferSender`].
    ///
    /// Two notes about what modes are not valid in certain scenarios:
    /// - the innermost stage (the last stage given to the builder) cannot be set to "overflow" mode,
    ///   as there is no other stage to overflow to
    /// - a stage cannot use the "block" or "drop newest" mode when there is a subsequent stage, and
    ///   must user the "overflow" mode
    ///
    /// Any occurrence of either of these scenarios will result in an error during build.
    pub fn stage<S>(&mut self, stage: S, when_full: WhenFull) -> &mut Self
    where
        S: IntoBuffer<T> + 'static,
    {
        self.stages.push(TopologyStage {
            untransformed: Box::new(stage),
            when_full,
        });
        self
    }

    /// Consumes this builder, returning the sender and receiver that can be used by components.
    ///
    /// # Errors
    ///
    /// If there was a configuration error with one of the stages, an error variant will be returned
    /// explaining the issue.
    pub async fn build(
        self,
        buffer_id: String,
        span: Span,
    ) -> Result<(BufferSender<T>, BufferReceiver<T>), TopologyError> {
        // We pop stages off in reverse order to build from the inside out.
        let mut buffer_usage = BufferUsage::from_span(span.clone());
        let mut current_stage = None;

        for (stage_idx, stage) in self.stages.into_iter().enumerate().rev() {
            // Make sure the stage is valid for our current builder state.
            match stage.when_full {
                // The innermost stage can't be set to overflow, there's nothing else to overflow _to_.
                WhenFull::Overflow => {
                    if current_stage.is_none() {
                        return Err(TopologyError::OverflowWhenLast);
                    }
                }
                // If there's already an inner stage, then blocking or dropping the newest events
                // doesn't no sense.  Overflowing is the only valid transition to another stage.
                WhenFull::Block | WhenFull::DropNewest => {
                    if current_stage.is_some() {
                        return Err(TopologyError::NextStageNotUsed { stage_idx });
                    }
                }
            };

            // Create the buffer usage handle for this stage and initialize it as we create the
            // sender/receiver.  This is slightly awkward since we just end up actually giving
            // the handle to the `BufferSender`/`BufferReceiver` wrappers, but that's the price we
            // have to pay for letting each stage function in an opaque way when wrapped.
            let usage_handle = buffer_usage.add_stage(stage_idx);
            let provides_instrumentation = stage.untransformed.provides_instrumentation();
            let (sender, receiver) = stage
                .untransformed
                .into_buffer_parts(usage_handle.clone())
                .await
                .context(FailedToBuildStageSnafu { stage_idx })?;

            let (mut sender, mut receiver) = match current_stage.take() {
                None => (
                    BufferSender::new(sender, stage.when_full),
                    BufferReceiver::new(receiver),
                ),
                Some((current_sender, current_receiver)) => (
                    BufferSender::with_overflow(sender, current_sender),
                    BufferReceiver::with_overflow(receiver, current_receiver),
                ),
            };

            sender.with_send_duration_instrumentation(stage_idx, &span);
            if !provides_instrumentation {
                sender.with_usage_instrumentation(usage_handle.clone());
                receiver.with_usage_instrumentation(usage_handle);
            }

            current_stage = Some((sender, receiver));
        }

        let (sender, receiver) = current_stage.ok_or(TopologyError::EmptyTopology)?;

        // Install the buffer usage handler since we successfully created the buffer topology.  This
        // spawns it in the background and periodically emits aggregated metrics about each of the
        // buffer stages.
        buffer_usage.install(buffer_id.as_str());

        Ok((sender, receiver))
    }
}

impl<T: Bufferable> TopologyBuilder<T> {
    /// Creates a memory-only buffer topology.
    ///
    /// The overflow mode (i.e. `WhenFull`) can be configured to either block or drop the newest
    /// values, but cannot be configured to use overflow mode.  If overflow mode is selected, it
    /// will be changed to blocking mode.
    ///
    /// This is a convenience method for `vector` as it is used for inter-transform channels, and we
    /// can simplifying needing to require callers to do all the boilerplate to create the builder,
    /// create the stage, installing buffer usage metrics that aren't required, and so on.
    ///
    #[allow(clippy::print_stderr)]
    pub async fn standalone_memory(
        max_events: NonZeroUsize,
        when_full: WhenFull,
        receiver_span: &Span,
    ) -> (BufferSender<T>, BufferReceiver<T>) {
        let usage_handle = BufferUsageHandle::noop();

        let memory_buffer = Box::new(MemoryBuffer::new(max_events));
        let (sender, receiver) = memory_buffer
            .into_buffer_parts(usage_handle.clone())
            .await
            .unwrap_or_else(|_| unreachable!("should not fail to directly create a memory buffer"));

        let mode = match when_full {
            WhenFull::Overflow => WhenFull::Block,
            m => m,
        };
        let mut sender = BufferSender::new(sender, mode);
        sender.with_send_duration_instrumentation(0, receiver_span);
        let receiver = BufferReceiver::new(receiver);

        (sender, receiver)
    }

    /// Creates a memory-only buffer topology with the given buffer usage handle.
    ///
    /// This is specifically required for the tests that occur under `buffers`, as we assert things
    /// like channel capacity left, which cannot be done on in-memory v1 buffers as they use the
    /// more abstract `Sink`-based adapters.
    ///
    /// The overflow mode (i.e. `WhenFull`) can be configured to either block or drop the newest
    /// values, but cannot be configured to use overflow mode.  If overflow mode is selected, it
    /// will be changed to blocking mode.
    ///
    /// This is a convenience method for `vector` as it is used for inter-transform channels, and we
    /// can simplifying needing to require callers to do all the boilerplate to create the builder,
    /// create the stage, installing buffer usage metrics that aren't required, and so on.
    #[cfg(test)]
    pub async fn standalone_memory_test(
        max_events: NonZeroUsize,
        when_full: WhenFull,
        usage_handle: BufferUsageHandle,
    ) -> (BufferSender<T>, BufferReceiver<T>) {
        let memory_buffer = Box::new(MemoryBuffer::new(max_events));
        let (sender, receiver) = memory_buffer
            .into_buffer_parts(usage_handle.clone())
            .await
            .unwrap_or_else(|_| unreachable!("should not fail to directly create a memory buffer"));

        let mode = match when_full {
            WhenFull::Overflow => WhenFull::Block,
            m => m,
        };
        let mut sender = BufferSender::new(sender, mode);
        let mut receiver = BufferReceiver::new(receiver);

        sender.with_usage_instrumentation(usage_handle.clone());
        receiver.with_usage_instrumentation(usage_handle);

        (sender, receiver)
    }
}

impl<T: Bufferable> Default for TopologyBuilder<T> {
    fn default() -> Self {
        Self { stages: Vec::new() }
    }
}

#[cfg(test)]
mod tests {
    use std::num::NonZeroUsize;

    use tracing::Span;

    use super::TopologyBuilder;
    use crate::{
        topology::builder::TopologyError,
        topology::test_util::{assert_current_send_capacity, Sample},
        variants::MemoryBuffer,
        WhenFull,
    };

    #[tokio::test]
    async fn single_stage_topology_block() {
        let mut builder = TopologyBuilder::<Sample>::default();
        builder.stage(
            MemoryBuffer::new(NonZeroUsize::new(1).unwrap()),
            WhenFull::Block,
        );
        let result = builder.build(String::from("test"), Span::none()).await;
        assert!(result.is_ok());

        let (mut sender, _) = result.unwrap();
        assert_current_send_capacity(&mut sender, Some(1), None);
    }

    #[tokio::test]
    async fn single_stage_topology_drop_newest() {
        let mut builder = TopologyBuilder::<Sample>::default();
        builder.stage(
            MemoryBuffer::new(NonZeroUsize::new(1).unwrap()),
            WhenFull::DropNewest,
        );
        let result = builder.build(String::from("test"), Span::none()).await;
        assert!(result.is_ok());

        let (mut sender, _) = result.unwrap();
        assert_current_send_capacity(&mut sender, Some(1), None);
    }

    #[tokio::test]
    async fn single_stage_topology_overflow() {
        let mut builder = TopologyBuilder::<Sample>::default();
        builder.stage(
            MemoryBuffer::new(NonZeroUsize::new(1).unwrap()),
            WhenFull::Overflow,
        );
        let result = builder.build(String::from("test"), Span::none()).await;
        match result {
            Err(TopologyError::OverflowWhenLast) => {}
            r => panic!("unexpected build result: {r:?}"),
        }
    }

    #[tokio::test]
    async fn two_stage_topology_block() {
        let mut builder = TopologyBuilder::<Sample>::default();
        builder.stage(
            MemoryBuffer::new(NonZeroUsize::new(1).unwrap()),
            WhenFull::Block,
        );
        builder.stage(
            MemoryBuffer::new(NonZeroUsize::new(1).unwrap()),
            WhenFull::Block,
        );
        let result = builder.build(String::from("test"), Span::none()).await;
        match result {
            Err(TopologyError::NextStageNotUsed { stage_idx }) => assert_eq!(stage_idx, 0),
            r => panic!("unexpected build result: {r:?}"),
        }
    }

    #[tokio::test]
    async fn two_stage_topology_drop_newest() {
        let mut builder = TopologyBuilder::<Sample>::default();
        builder.stage(
            MemoryBuffer::new(NonZeroUsize::new(1).unwrap()),
            WhenFull::DropNewest,
        );
        builder.stage(
            MemoryBuffer::new(NonZeroUsize::new(1).unwrap()),
            WhenFull::Block,
        );
        let result = builder.build(String::from("test"), Span::none()).await;
        match result {
            Err(TopologyError::NextStageNotUsed { stage_idx }) => assert_eq!(stage_idx, 0),
            r => panic!("unexpected build result: {r:?}"),
        }
    }

    #[tokio::test]
    async fn two_stage_topology_overflow() {
        let mut builder = TopologyBuilder::<Sample>::default();
        builder.stage(
            MemoryBuffer::new(NonZeroUsize::new(1).unwrap()),
            WhenFull::Overflow,
        );
        builder.stage(
            MemoryBuffer::new(NonZeroUsize::new(1).unwrap()),
            WhenFull::Block,
        );

        let result = builder.build(String::from("test"), Span::none()).await;
        assert!(result.is_ok());

        let (mut sender, _) = result.unwrap();
        assert_current_send_capacity(&mut sender, Some(1), Some(1));
    }
}