vector/components/validation/resources/mod.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
mod event;
mod http;
use std::sync::Arc;
use tokio::sync::{mpsc, Mutex};
use vector_lib::{
codecs::{
decoding::{self, DeserializerConfig},
encoding::{
self, Framer, FramingConfig, JsonSerializerConfig, SerializerConfig,
TextSerializerConfig,
},
BytesEncoder,
},
config::LogNamespace,
};
use vector_lib::{config::DataType, event::Event};
use crate::codecs::{Decoder, DecodingConfig, Encoder, EncodingConfig, EncodingConfigWithFraming};
pub use self::event::{encode_test_event, TestEvent};
pub use self::http::HttpResourceConfig;
use self::http::HttpResourceOutputContext;
use super::{
sync::{Configuring, TaskCoordinator},
RunnerMetrics,
};
/// The codec used by the external resource.
///
/// This enum specifically exists to encapsulate the two main ways a component will configure the
/// codec it uses, which ends up being with directionally-specific codec configuration: "encoding"
/// when taking an `Event` and convert it to a raw output, and "decoding" when taking a raw output
/// and converting it to an `Event`.
///
/// Encoding and decoding is generally tied to sinks and sources, respectively.
#[derive(Clone)]
pub enum ResourceCodec {
/// Component encodes events.
///
/// As opposed to `EncodingWithFramer`, this variant uses the default framing method defined by
/// the encoding itself.
///
/// Generally speaking, only sinks encode: going from `Event` to an encoded form.
Encoding(EncodingConfig),
/// Component encodes events, with a specific framer.
///
/// Generally speaking, only sinks encode: going from `Event` to an encoded form.
EncodingWithFraming(EncodingConfigWithFraming),
/// Component decodes events.
///
/// Generally speaking, only sources decode: going from an encoded form to `Event`.
Decoding(DecodingConfig),
}
impl ResourceCodec {
/// Gets the allowed event data types for the configured codec.
///
/// Not all codecs support all possible event types (i.e. a codec has no means to losslessly
/// represent the data in a particular event type) so we must check at runtime to ensure that
/// we're only generating event payloads that can be encoded/decoded for the given component.
pub fn allowed_event_data_types(self) -> DataType {
match self {
Self::Encoding(encoding) => encoding.config().input_type(),
Self::EncodingWithFraming(encoding) => encoding.config().1.input_type(),
Self::Decoding(decoding) => decoding.config().output_type(),
}
}
/// Gets an encoder for this codec.
///
/// The encoder is generated as an inverse to the input codec: if a decoding configuration was
/// given, we generate an encoder that satisfies that decoding configuration, and vice versa.
pub fn into_encoder(&self) -> Encoder<encoding::Framer> {
let (framer, serializer) = match self {
Self::Encoding(config) => (
Framer::Bytes(BytesEncoder),
config.build().expect("should not fail to build serializer"),
),
Self::EncodingWithFraming(config) => {
let (maybe_framing, serializer) = config.config();
(
maybe_framing
.clone()
.unwrap_or(FramingConfig::Bytes)
.build(),
serializer
.build()
.expect("building serializer should never fail"),
)
}
Self::Decoding(config) => (
decoder_framing_to_encoding_framer(config.framing()),
deserializer_config_to_serializer(config.config()),
),
};
Encoder::<encoding::Framer>::new(framer, serializer)
}
/// Gets a decoder for this codec.
///
/// The decoder is generated as an inverse to the input codec: if an encoding configuration was
/// given, we generate a decoder that satisfies that encoding configuration, and vice versa.
pub fn into_decoder(&self, log_namespace: LogNamespace) -> vector_lib::Result<Decoder> {
let (framer, deserializer) = match self {
Self::Decoding(config) => return config.build(),
Self::Encoding(config) => (
encoder_framing_to_decoding_framer(config.config().default_stream_framing()),
serializer_config_to_deserializer(config.config())?,
),
Self::EncodingWithFraming(config) => {
let (maybe_framing, serializer) = config.config();
let framing = maybe_framing.clone().unwrap_or(FramingConfig::Bytes);
(
encoder_framing_to_decoding_framer(framing),
serializer_config_to_deserializer(serializer)?,
)
}
};
Ok(Decoder::new(framer, deserializer).with_log_namespace(log_namespace))
}
}
impl From<EncodingConfig> for ResourceCodec {
fn from(config: EncodingConfig) -> Self {
Self::Encoding(config)
}
}
impl From<EncodingConfigWithFraming> for ResourceCodec {
fn from(config: EncodingConfigWithFraming) -> Self {
Self::EncodingWithFraming(config)
}
}
impl From<DecodingConfig> for ResourceCodec {
fn from(config: DecodingConfig) -> Self {
Self::Decoding(config)
}
}
fn deserializer_config_to_serializer(config: &DeserializerConfig) -> encoding::Serializer {
let serializer_config = match config {
// TODO: This isn't necessarily a one-to-one conversion, at least not in the future when
// "bytes" can be a top-level field and we aren't implicitly decoding everything into the
// `message` field... but it's close enough for now.
DeserializerConfig::Bytes => SerializerConfig::Text(TextSerializerConfig::default()),
DeserializerConfig::Json { .. } => SerializerConfig::Json(JsonSerializerConfig::default()),
DeserializerConfig::Protobuf(config) => {
SerializerConfig::Protobuf(vector_lib::codecs::encoding::ProtobufSerializerConfig {
protobuf: vector_lib::codecs::encoding::ProtobufSerializerOptions {
desc_file: config.protobuf.desc_file.clone(),
message_type: config.protobuf.message_type.clone(),
},
})
}
// TODO: We need to create an Avro serializer because, certainly, for any source decoding
// the data as Avro, we can't possibly send anything else without the source just
// immediately barfing.
#[cfg(feature = "codecs-syslog")]
DeserializerConfig::Syslog { .. } => SerializerConfig::Logfmt,
DeserializerConfig::Native => SerializerConfig::Native,
DeserializerConfig::NativeJson { .. } => SerializerConfig::NativeJson,
DeserializerConfig::Gelf { .. } => SerializerConfig::Gelf,
DeserializerConfig::Avro { avro } => SerializerConfig::Avro { avro: avro.into() },
// TODO: Influxdb has no serializer yet
DeserializerConfig::Influxdb { .. } => todo!(),
DeserializerConfig::Vrl { .. } => unimplemented!(),
};
serializer_config
.build()
.expect("building serializer should never fail")
}
fn decoder_framing_to_encoding_framer(framing: &decoding::FramingConfig) -> encoding::Framer {
let framing_config = match framing {
decoding::FramingConfig::Bytes => encoding::FramingConfig::Bytes,
decoding::FramingConfig::CharacterDelimited(config) => {
encoding::FramingConfig::CharacterDelimited(encoding::CharacterDelimitedEncoderConfig {
character_delimited: encoding::CharacterDelimitedEncoderOptions {
delimiter: config.character_delimited.delimiter,
},
})
}
decoding::FramingConfig::LengthDelimited(config) => {
encoding::FramingConfig::LengthDelimited(encoding::LengthDelimitedEncoderConfig {
length_delimited: config.length_delimited.clone(),
})
}
decoding::FramingConfig::NewlineDelimited(_) => encoding::FramingConfig::NewlineDelimited,
// TODO: There's no equivalent octet counting framer for encoding... although
// there's no particular reason that would make it hard to write.
decoding::FramingConfig::OctetCounting(_) => todo!(),
// TODO: chunked gelf is not supported yet in encoding
decoding::FramingConfig::ChunkedGelf(_) => todo!(),
};
framing_config.build()
}
fn serializer_config_to_deserializer(
config: &SerializerConfig,
) -> vector_lib::Result<decoding::Deserializer> {
let deserializer_config = match config {
SerializerConfig::Avro { .. } => todo!(),
SerializerConfig::Cef { .. } => todo!(),
SerializerConfig::Csv { .. } => todo!(),
SerializerConfig::Gelf => DeserializerConfig::Gelf(Default::default()),
SerializerConfig::Json(_) => DeserializerConfig::Json(Default::default()),
SerializerConfig::Logfmt => todo!(),
SerializerConfig::Native => DeserializerConfig::Native,
SerializerConfig::NativeJson => DeserializerConfig::NativeJson(Default::default()),
SerializerConfig::Protobuf(config) => {
DeserializerConfig::Protobuf(vector_lib::codecs::decoding::ProtobufDeserializerConfig {
protobuf: vector_lib::codecs::decoding::ProtobufDeserializerOptions {
desc_file: config.protobuf.desc_file.clone(),
message_type: config.protobuf.message_type.clone(),
},
})
}
SerializerConfig::RawMessage | SerializerConfig::Text(_) => DeserializerConfig::Bytes,
};
deserializer_config.build()
}
fn encoder_framing_to_decoding_framer(framing: encoding::FramingConfig) -> decoding::Framer {
let framing_config = match framing {
encoding::FramingConfig::Bytes => decoding::FramingConfig::Bytes,
encoding::FramingConfig::CharacterDelimited(config) => {
decoding::FramingConfig::CharacterDelimited(decoding::CharacterDelimitedDecoderConfig {
character_delimited: decoding::CharacterDelimitedDecoderOptions {
delimiter: config.character_delimited.delimiter,
max_length: None,
},
})
}
encoding::FramingConfig::LengthDelimited(config) => {
decoding::FramingConfig::LengthDelimited(decoding::LengthDelimitedDecoderConfig {
length_delimited: config.length_delimited.clone(),
})
}
encoding::FramingConfig::NewlineDelimited => {
decoding::FramingConfig::NewlineDelimited(Default::default())
}
};
framing_config.build()
}
/// Direction that the resource is operating in.
#[derive(Clone)]
pub enum ResourceDirection {
/// Resource will have the component pull data from it, or pull data from the component.
///
/// For a source, where an external resource functions in "input" mode, this would be the
/// equivalent of the source calling out to the external resource (HTTP server, Kafka cluster,
/// etc) and asking for data, or expecting it to be returned in the response.
///
/// For a sink, where an external resource functions in "output" mode, this would be the
/// equivalent of the sink exposing a network endpoint and having the external resource be
/// responsible for connecting to the endpoint to grab the data.
Pull,
/// Resource will push data to the component, or have data pushed to it from the component.
///
/// For a source, where an external resource functions in "input" mode, this would be the
/// equivalent of the source waiting for data to be sent to either, whether it's listening on a
/// network endpoint for traffic, or polling files on disks for updates, and the external
/// resource would be responsible for initiating that communication, or writing to those files.
///
/// For a sink, where an external resource functions in "output" mode, this would be the
/// equivalent of the sink pushing its data to a network endpoint, or writing data to files,
/// where the external resource would be responsible for aggregating that data, or read from
/// those files.
Push,
}
/// A resource definition.
///
/// Resource definitions uniquely identify the resource, such as HTTP, or files, and so on. These
/// definitions generally include the bare minimum amount of information to allow the component
/// validation runner to create an instance of them, such as spawning an HTTP server if a source has
/// specified an HTTP resource in the "pull" direction.
#[derive(Clone)]
pub enum ResourceDefinition {
Http(HttpResourceConfig),
}
impl From<HttpResourceConfig> for ResourceDefinition {
fn from(config: HttpResourceConfig) -> Self {
Self::Http(config)
}
}
/// An external resource associated with a component.
///
/// External resources represent the hypothetical location where, depending on whether the component
/// is a source or sink, data would be generated from or collected at. This includes things like
/// network endpoints (raw sockets, HTTP servers, etc) as well as files on disk, and more. In other
/// words, an external resource is a data dependency associated with the component, whether the
/// component depends on data from the external resource, or the external resource depends on data
/// from the component.
///
/// An external resource includes a direction -- push or pull -- as well as the fundamental
/// definition of the resource, such as HTTP or file. The component type is used to further refine
/// the direction of the resource, such that a "pull" resource used with a source implies the source
/// will pull data from the external resource, whereas a "pull" resource used with a sink implies
/// the external resource must pull the data from the sink.
#[derive(Clone)]
pub struct ExternalResource {
pub direction: ResourceDirection,
definition: ResourceDefinition,
pub codec: ResourceCodec,
}
impl ExternalResource {
/// Creates a new `ExternalResource` based on the given `direction`, `definition`, and `codec`.
pub fn new<D, C>(direction: ResourceDirection, definition: D, codec: C) -> Self
where
D: Into<ResourceDefinition>,
C: Into<ResourceCodec>,
{
Self {
direction,
definition: definition.into(),
codec: codec.into(),
}
}
/// Spawns this resource for use as an input to a source.
pub fn spawn_as_input(
self,
input_rx: mpsc::Receiver<TestEvent>,
task_coordinator: &TaskCoordinator<Configuring>,
runner_metrics: &Arc<Mutex<RunnerMetrics>>,
) {
match self.definition {
ResourceDefinition::Http(http_config) => http_config.spawn_as_input(
self.direction,
self.codec,
input_rx,
task_coordinator,
runner_metrics,
),
}
}
/// Spawns this resource for use as an output for a sink.
pub fn spawn_as_output(
self,
output_tx: mpsc::Sender<Vec<Event>>,
task_coordinator: &TaskCoordinator<Configuring>,
input_events: Vec<TestEvent>,
runner_metrics: &Arc<Mutex<RunnerMetrics>>,
log_namespace: LogNamespace,
) -> vector_lib::Result<()> {
match self.definition {
ResourceDefinition::Http(http_config) => {
http_config.spawn_as_output(HttpResourceOutputContext {
direction: self.direction,
codec: self.codec,
output_tx,
task_coordinator,
input_events,
runner_metrics,
log_namespace,
})
}
}
}
}