vdev/commands/check/
events.rs

1//! Walks `src/**/*.rs` and `lib/**/*.rs`, extracts internal-event definitions
2//! and `tracing` log calls via `syn`'s AST, and validates them against the
3//! rules in `docs/specs/instrumentation.md`. Macro argument scraping (the
4//! contents of `counter!(...)`, `trace!(...)`, etc.) uses small targeted
5//! regexes on the macro's already-tokenised input — never on raw source.
6
7#![allow(clippy::print_stdout, clippy::print_stderr)]
8
9use std::{
10    collections::{BTreeMap, HashMap},
11    fs,
12    path::PathBuf,
13    process,
14    sync::LazyLock,
15};
16
17use anyhow::Result;
18use convert_case::{Case, Casing};
19use glob::glob;
20use proc_macro2::TokenStream;
21use quote::ToTokens;
22use regex::Regex;
23use syn::{
24    ItemImpl, ItemStruct, Type,
25    spanned::Spanned,
26    visit::{self, Visit},
27};
28
29const BYTE_SIZE_COUNT: &[&str] = &["byte_size", "count"];
30
31const METRIC_NAME_EVENTS_DROPPED: &str = "component_discarded_events_total";
32const METRIC_NAME_ERROR: &str = "component_errors_total";
33
34struct EventClass {
35    /// Required log message text for events with this suffix.
36    message: &'static str,
37    /// Counter suffixes (full name is `component_<suffix>_total`).
38    counters: &'static [&'static str],
39    /// Tags that must appear on logs and on counters (minus `BYTE_SIZE_COUNT`).
40    additional_tags: &'static [&'static str],
41}
42
43const EVENT_CLASSES: &[(&str, EventClass)] = &[
44    (
45        "BytesReceived",
46        EventClass {
47            message: "Bytes received.",
48            counters: &["received_bytes"],
49            additional_tags: &["byte_size", "protocol"],
50        },
51    ),
52    (
53        "EventsReceived",
54        EventClass {
55            message: "Events received.",
56            counters: &["received_events", "received_event_bytes"],
57            additional_tags: &["count", "byte_size"],
58        },
59    ),
60    (
61        "EventsSent",
62        EventClass {
63            message: "Events sent.",
64            counters: &["sent_events", "sent_event_bytes"],
65            additional_tags: &["count", "byte_size"],
66        },
67    ),
68    (
69        "BytesSent",
70        EventClass {
71            message: "Bytes sent.",
72            counters: &["sent_bytes"],
73            additional_tags: &["byte_size", "protocol"],
74        },
75    ),
76];
77
78#[derive(Debug, Default, Clone)]
79struct SkipFlags {
80    dropped_events: bool,
81    duplicate_check: bool,
82    validity_check: bool,
83}
84
85#[derive(Debug, Default, Clone)]
86struct Event {
87    path: Option<String>,
88    skip: SkipFlags,
89    emits_component_events_dropped: bool,
90    members: BTreeMap<String, String>,
91    counters: BTreeMap<String, BTreeMap<String, String>>,
92    metrics: BTreeMap<String, BTreeMap<String, String>>,
93    logs: Vec<LogCall>,
94    uses: u32,
95    internal_impl: bool,
96    register_impl: Option<String>,
97    impl_event_handle: bool,
98    reports: Vec<String>,
99}
100
101#[derive(Debug, Clone)]
102struct LogCall {
103    level: String,
104    message: String,
105    parameters: Vec<String>,
106}
107
108impl Event {
109    fn add_metric(&mut self, ty: &str, name: &str, tags: BTreeMap<String, String>) {
110        let key = format!("{ty}:{name}");
111        self.metrics.insert(key, tags.clone());
112        if ty == "counter" {
113            self.counters.insert(name.to_string(), tags);
114        }
115    }
116
117    fn add_log(&mut self, level: &str, message: &str, parameters: Vec<String>) {
118        self.logs.push(LogCall {
119            level: level.to_string(),
120            message: message.to_string(),
121            parameters,
122        });
123    }
124
125    fn append(&mut self, report: impl Into<String>) {
126        self.reports.push(report.into());
127    }
128
129    fn signature(&self) -> Option<String> {
130        if self.metrics.is_empty() && self.logs.is_empty() {
131            return None;
132        }
133        let members: Vec<String> = self
134            .members
135            .iter()
136            .map(|(name, ty)| format!("{name}:{ty}"))
137            .collect();
138        let mut metrics: Vec<String> = self
139            .metrics
140            .iter()
141            .map(|(name, tags)| {
142                let mut keys: Vec<&str> = tags.keys().map(String::as_str).collect();
143                keys.sort_unstable();
144                format!("{name}({})", keys.join(","))
145            })
146            .collect();
147        metrics.sort();
148        let mut logs: Vec<String> = self
149            .logs
150            .iter()
151            .map(|l| format!("[\"{}\", \"{}\", {:?}]", l.level, l.message, l.parameters))
152            .collect();
153        logs.sort();
154        Some(format!(
155            "{}[{}][{}]",
156            members.join(":"),
157            logs.join(";"),
158            metrics.join(";")
159        ))
160    }
161}
162
163// ---- Validation ------------------------------------------------------------
164
165fn log_level_one_of(reports: &mut Vec<String>, logs: &[LogCall], levels: &[&str]) {
166    if !logs.iter().any(|l| levels.contains(&l.level.as_str())) {
167        reports.push(format!(
168            "This event MUST log with one of these levels: [{}].",
169            levels
170                .iter()
171                .map(|l| format!("\"{l}\""))
172                .collect::<Vec<_>>()
173                .join(", ")
174        ));
175    }
176}
177
178fn counters_must_include_exclude_tags(
179    reports: &mut Vec<String>,
180    counters: &BTreeMap<String, BTreeMap<String, String>>,
181    name: &str,
182    required_tags: &[&str],
183    exclude_tags: &[&str],
184) {
185    let Some(tags) = counters.get(name) else {
186        reports.push(format!("This event MUST increment counter \"{name}\"."));
187        return;
188    };
189    for tag in required_tags {
190        if !tags.contains_key(*tag) {
191            reports.push(format!("Counter \"{name}\" MUST include tag \"{tag}\"."));
192        }
193    }
194    for tag in exclude_tags {
195        if tags.contains_key(*tag) {
196            reports.push(format!(
197                "Counter \"{name}\" MUST NOT include tag \"{tag}\"."
198            ));
199        }
200    }
201}
202
203fn check_event_class(reports: &mut Vec<String>, name: &str, event: &Event, handle: &Event) {
204    for (suffix, class) in EVENT_CLASSES {
205        if !name.ends_with(suffix) {
206            continue;
207        }
208        for log in &handle.logs {
209            if log.level != "trace" {
210                reports.push("Log type MUST be \"trace!\".".to_string());
211            }
212            if log.message != class.message {
213                reports.push(format!(
214                    "Log message MUST be \"{}\" (is \"{}\").",
215                    class.message, log.message
216                ));
217            }
218            for tag in class.additional_tags {
219                if !log.parameters.iter().any(|p| p == tag) {
220                    reports.push(format!("Log MUST contain tag \"{tag}\""));
221                }
222            }
223        }
224        for counter in class.counters {
225            let counter_name = format!("component_{counter}_total");
226            let required: Vec<&str> = class
227                .additional_tags
228                .iter()
229                .copied()
230                .filter(|t| !BYTE_SIZE_COUNT.contains(t))
231                .collect();
232            counters_must_include_exclude_tags(
233                reports,
234                &event.counters,
235                &counter_name,
236                &required,
237                &[],
238            );
239        }
240    }
241}
242
243fn check_error_event(reports: &mut Vec<String>, name: &str, event: &Event, handle: &Event) {
244    if !name.ends_with("Error") {
245        reports.push("Error events MUST be named \"___Error\".".to_string());
246    }
247    log_level_one_of(reports, &handle.logs, &["error"]);
248    counters_must_include_exclude_tags(
249        reports,
250        &event.counters,
251        METRIC_NAME_ERROR,
252        &["error_type", "stage"],
253        &[],
254    );
255    for log in &handle.logs {
256        if log.level != "error" {
257            continue;
258        }
259        for parameter in ["error_type", "stage"] {
260            if !log.parameters.iter().any(|p| p == parameter) {
261                reports.push(format!(
262                    "Error log for Error event MUST include parameter \"{parameter}\"."
263                ));
264            }
265        }
266        for parameter in ["error_code", "error_type", "stage"] {
267            if log.parameters.iter().any(|p| p == parameter)
268                && !event
269                    .counters
270                    .get(METRIC_NAME_ERROR)
271                    .is_some_and(|m| m.contains_key(parameter))
272            {
273                reports.push(format!(
274                    "Counter \"{METRIC_NAME_ERROR}\" must include \"{parameter}\" to match error log."
275                ));
276            }
277        }
278    }
279}
280
281fn check_events_dropped(reports: &mut Vec<String>, name: &str, event: &Event, handle: &Event) {
282    if event.emits_component_events_dropped {
283        if event.counters.contains_key(METRIC_NAME_EVENTS_DROPPED) {
284            reports.push(format!(
285                "Event emitting ComponentEventsDropped should not also increment counter `{METRIC_NAME_EVENTS_DROPPED}`"
286            ));
287        }
288        return;
289    }
290    if !name.ends_with("EventsDropped") {
291        reports.push("EventsDropped events MUST be named \"___EventsDropped\".".to_string());
292    }
293    log_level_one_of(reports, &handle.logs, &["error", "debug"]);
294    counters_must_include_exclude_tags(
295        reports,
296        &event.counters,
297        METRIC_NAME_EVENTS_DROPPED,
298        &["intentional"],
299        &["reason", "count"],
300    );
301    for log in &handle.logs {
302        if log.level != "error" {
303            continue;
304        }
305        for parameter in ["count", "intentional", "reason"] {
306            if !log.parameters.iter().any(|p| p == parameter) {
307                reports.push(format!(
308                    "Error log for EventsDropped event MUST include parameter \"{parameter}\"."
309                ));
310            }
311        }
312        if log.parameters.iter().any(|p| p == "intentional")
313            && !event
314                .counters
315                .get(METRIC_NAME_EVENTS_DROPPED)
316                .is_some_and(|m| m.contains_key("intentional"))
317        {
318            reports.push(format!(
319                "Counter \"{METRIC_NAME_EVENTS_DROPPED}\" must include \"intentional\" to match error log."
320            ));
321        }
322    }
323}
324
325fn check_error_counter_tag_constants(reports: &mut Vec<String>, event: &Event) {
326    for (cname, tags) in &event.counters {
327        if cname != METRIC_NAME_ERROR && cname != METRIC_NAME_EVENTS_DROPPED {
328            continue;
329        }
330        for (tag, value) in tags {
331            if tag == "stage" && !value.starts_with("error_stage::") {
332                reports.push(format!(
333                    "Counter \"{cname}\" tag \"{tag}\" value must be an \"error_stage\" constant."
334                ));
335            } else if tag == "error_type" && !value.starts_with("error_type::") {
336                reports.push(format!(
337                    "Counter \"{cname}\" tag \"{tag}\" value must be an \"error_type\" constant."
338                ));
339            }
340        }
341    }
342}
343
344fn validate_event(events: &HashMap<String, Event>, name: &str, handle_name: &str) -> Vec<String> {
345    let event = events.get(name).expect("event present");
346    let handle = events.get(handle_name).expect("handle present");
347    let mut reports: Vec<String> = Vec::new();
348
349    if event.uses == 0 {
350        reports.push("Event has no uses.".to_string());
351    }
352
353    check_event_class(&mut reports, name, event, handle);
354
355    let has_error_logs = handle.logs.iter().filter(|l| l.level == "error").count() == 1;
356    let is_events_dropped_event =
357        name.ends_with("EventsDropped") || event.counters.contains_key(METRIC_NAME_EVENTS_DROPPED);
358
359    if (has_error_logs && !is_events_dropped_event) || name.ends_with("Error") {
360        check_error_event(&mut reports, name, event, handle);
361    }
362
363    if is_events_dropped_event && !event.skip.dropped_events {
364        check_events_dropped(&mut reports, name, event, handle);
365    }
366
367    check_error_counter_tag_constants(&mut reports, event);
368
369    for r in &event.reports {
370        reports.push(r.clone());
371    }
372
373    reports
374}
375
376// ---- Macro arg parsers (operate on small token strings) --------------------
377
378/// `emit!(ComponentEventsDropped...)` detection regex, applied to the raw
379/// source slice of an impl block (which preserves comments and original
380/// formatting that `to_token_stream` strips).
381static RE_EMIT_DROPPED: LazyLock<Regex> = LazyLock::new(|| {
382    Regex::new(r"(?:emit|register)!\([ \t\r\n]*ComponentEventsDropped(?:[^A-Za-z0-9_]|$)").unwrap()
383});
384
385/// `emit!(EventName)` / `register!(Path::EventName)` use-counting regex,
386/// applied to the raw file text so it sees calls nested inside other macros
387/// (e.g. `tokio::select!`) that `syn` does not descend into.
388static RE_USES: LazyLock<Regex> = LazyLock::new(|| {
389    Regex::new(
390        r"(?:^|[^A-Za-z0-9_])(?:emit!?|register!?)\((?:[a-z][a-z0-9_:]+)?([A-Z][A-Za-z0-9]+)",
391    )
392    .unwrap()
393});
394
395/// Locator for `tracing` log-macro calls (`trace!(`, `debug!(`, `info!(`,
396/// `warn!(`, `error!(`) in raw source text. Used by the format-check pass
397/// because the AST visitor cannot see log calls nested inside opaque outer
398/// macros like `tokio::select!`.
399static RE_LOG_CALL_OPEN: LazyLock<Regex> =
400    LazyLock::new(|| Regex::new(r"(?:^|[^A-Za-z0-9_])(trace|debug|info|warn|error)!\(").unwrap());
401
402/// `"key" => value` tag-pair regex. Used inside `counter!(...)` arg lists.
403/// Note: syn's `TokenStream` rendering may produce `=>` as `= >`; the regex
404/// accepts either form via `=[ \t]*>`.
405static RE_TAG_PAIR: LazyLock<Regex> = LazyLock::new(|| {
406    Regex::new(r#""([^"]+)"[ \t\r\n]*=[ \t\r\n]*>[ \t\r\n]*(.+?)(?:,|$)"#).unwrap()
407});
408
409/// Strip whitespace introduced by `TokenStream::to_string` around `::` so
410/// `error_stage :: PROCESSING` becomes `error_stage::PROCESSING` for the
411/// constant-prefix validation (`starts_with("error_stage::")`).
412fn normalize_value(s: &str) -> String {
413    let trimmed = s.trim();
414    let collapsed = Regex::new(r"[ \t\r\n]*::[ \t\r\n]*")
415        .unwrap()
416        .replace_all(trimmed, "::");
417    collapsed.into_owned()
418}
419
420/// Split a token stream that represents a comma-separated argument list into
421/// per-argument substrings, respecting bracket/paren/brace nesting and string
422/// literals. Operates on the (already-bounded) macro-arg token text.
423///
424/// Angle brackets are tracked as a separate depth so that generic-type commas
425/// like `Registered<ComponentEventsDropped<'static, INTENTIONAL>>` don't
426/// fragment a `registered_event!` field. `>` only decrements when there is a
427/// matching `<`, so the `>` in `key => value` tag pairs (used by `counter!`
428/// args) doesn't underflow.
429fn split_comma_args(s: &str) -> Vec<String> {
430    let mut out = Vec::new();
431    let mut depth: i32 = 0;
432    let mut angle_depth: i32 = 0;
433    let mut in_str = false;
434    let mut esc = false;
435    let mut start = 0;
436    let bytes = s.as_bytes();
437    for (i, &b) in bytes.iter().enumerate() {
438        if in_str {
439            if esc {
440                esc = false;
441            } else if b == b'\\' {
442                esc = true;
443            } else if b == b'"' {
444                in_str = false;
445            }
446            continue;
447        }
448        match b {
449            b'"' => in_str = true,
450            b'(' | b'[' | b'{' => depth += 1,
451            b')' | b']' | b'}' => depth -= 1,
452            b'<' => angle_depth += 1,
453            b'>' if angle_depth > 0 => angle_depth -= 1,
454            b',' if depth == 0 && angle_depth == 0 => {
455                out.push(s[start..i].trim().to_string());
456                start = i + 1;
457            }
458            _ => {}
459        }
460    }
461    let last = s[start..].trim().to_string();
462    if !last.is_empty() {
463        out.push(last);
464    }
465    out
466}
467
468#[derive(Debug)]
469struct ParsedMetric {
470    ty: String,
471    name: String,
472    tags: BTreeMap<String, String>,
473}
474
475/// Parse a `counter!(...)` / `gauge!(...)` / `histogram!(...)` invocation's
476/// already-tokenised args into a name (string literal or `CamelCase` variant
477/// of `<X>Name::Variant`) and its `"key" => value` tag pairs.
478fn parse_metric_args(ty: &str, tokens: &TokenStream) -> Option<ParsedMetric> {
479    let raw = tokens.to_string();
480    let args = split_comma_args(&raw);
481    if args.is_empty() {
482        return None;
483    }
484    let name = parse_metric_name(args[0].as_str())?;
485    let mut tags = BTreeMap::new();
486    let rest = args[1..].join(",");
487    for caps in RE_TAG_PAIR.captures_iter(&rest) {
488        tags.insert(caps[1].to_string(), normalize_value(&caps[2]));
489    }
490    if tags.is_empty() {
491        for caps in RE_TAG_PAIR.captures_iter(&raw) {
492            tags.insert(caps[1].to_string(), normalize_value(&caps[2]));
493        }
494    }
495    Some(ParsedMetric {
496        ty: ty.to_string(),
497        name,
498        tags,
499    })
500}
501
502/// Extract the metric name from the first arg of a `counter!`/`gauge!`/`histogram!`.
503/// Accepts `"literal"` or `<TypeName>::<Variant>`.
504fn parse_metric_name(arg: &str) -> Option<String> {
505    let arg = arg.trim();
506    if let Some(stripped) = arg.strip_prefix('"').and_then(|s| s.strip_suffix('"')) {
507        return Some(stripped.to_string());
508    }
509    // path::Variant — Ruby matched `\w+Name::(\w+)` and snake-cased the variant.
510    let re = Regex::new(r"^[A-Za-z0-9_]+Name[ \t]*::[ \t]*([A-Za-z0-9_]+)").unwrap();
511    re.captures(arg).map(|c| (&c[1]).to_case(Case::Snake))
512}
513
514#[derive(Debug)]
515struct ParsedLog {
516    /// The captured message text (string literal contents *or* variable name
517    /// if the message was passed as an expression). Always set when the log
518    /// has any message-shaped argument.
519    message: String,
520    /// Whether the message came from a string literal (`"..."`). Format
521    /// checks (capitalised, trailing period) only run on literal messages.
522    has_literal_message: bool,
523    parameters: Vec<String>,
524}
525
526/// Parse a `trace!(...)` / `debug!(...)` / `info!(...)` / `warn!(...)` /
527/// `error!(...)` invocation's already-stringified args into the message text
528/// and the list of parameter names it carries.
529///
530/// `tracing` allows the message in any position: leading positional literal,
531/// trailing positional literal, or `message = "..."` named field. We mirror
532/// the Ruby script: take the first string-literal value across all args (be
533/// it a positional `"..."` or a `message = "..."` named field) and treat it
534/// as the message. Only when no literal is present do we fall back to the
535/// first bare positional expression — otherwise patterns like
536/// `warn!(%error, "Failed to flush.")` would never have their message format
537/// checked because the bare `%error` would be claimed as the message first.
538fn parse_log_args(raw: &str) -> ParsedLog {
539    let args = split_comma_args(raw);
540
541    let mut literal_message: Option<String> = None;
542    let mut named_var_message: Option<String> = None;
543    let mut bare_positional_message: Option<String> = None;
544    let mut parameters: Vec<String> = Vec::new();
545
546    for arg in &args {
547        let trimmed = arg.trim();
548        if trimmed.starts_with("target :") || trimmed.starts_with("parent :") {
549            continue;
550        }
551
552        // `message = ...` (named field). The value may or may not be a literal.
553        if let Some(rest) = trimmed.strip_prefix("message")
554            && let Some(value) = rest.trim_start().strip_prefix('=').map(str::trim_start)
555        {
556            let value = value.trim();
557            if let Some(stripped) = value.strip_prefix('"').and_then(|s| s.strip_suffix('"')) {
558                if literal_message.is_none() {
559                    literal_message = Some(stripped.to_string());
560                }
561            } else if named_var_message.is_none() {
562                named_var_message = Some(value.to_string());
563            }
564            continue;
565        }
566
567        // Leading/trailing positional string literal (`"..."` standalone arg).
568        if let Some(stripped) = trimmed.strip_prefix('"').and_then(|s| s.strip_suffix('"')) {
569            if literal_message.is_none() {
570                literal_message = Some(stripped.to_string());
571            }
572            continue;
573        }
574
575        // Bare positional expression (no `=`). Track the first one as a
576        // candidate message in case no literal is found later, and *also*
577        // record it as a parameter — `tracing` lets the same expression
578        // serve both roles.
579        if !trimmed.contains('=') {
580            if bare_positional_message.is_none() {
581                bare_positional_message = Some(trimmed.to_string());
582            }
583            if let Some(name) = parameter_name(trimmed) {
584                parameters.push(name);
585            }
586            continue;
587        }
588
589        // Any other `key = value` named field: just a parameter.
590        if let Some(name) = parameter_name(trimmed) {
591            parameters.push(name);
592        }
593    }
594
595    let (message, has_literal_message) = if let Some(m) = literal_message {
596        (m, true)
597    } else if let Some(m) = named_var_message {
598        (m, false)
599    } else if let Some(m) = bare_positional_message {
600        (m, false)
601    } else {
602        (String::new(), false)
603    };
604
605    ParsedLog {
606        message,
607        has_literal_message,
608        parameters,
609    }
610}
611
612/// Extract the parameter name from a log-macro arg. `tracing` accepts:
613/// `name = expr`, `?name`, `%name`, and bare `name`. Token-stream
614/// serialisation puts whitespace around punctuation (`% protocol`) so we
615/// trim after stripping each prefix.
616fn parameter_name(arg: &str) -> Option<String> {
617    let s = arg.trim();
618    if s.is_empty() {
619        return None;
620    }
621    if let Some((lhs, _)) = s.split_once('=') {
622        let lhs = lhs
623            .trim()
624            .trim_start_matches('?')
625            .trim_start_matches('%')
626            .trim();
627        if is_identifier(lhs) {
628            return Some(lhs.to_string());
629        }
630    }
631    let stripped = s
632        .trim_start_matches('?')
633        .trim_start_matches('%')
634        .trim_start();
635    let head: String = stripped
636        .chars()
637        .take_while(|c| c.is_ascii_alphanumeric() || *c == '_' || *c == '.')
638        .collect();
639    if !head.is_empty() && head.chars().any(|c| c.is_ascii_alphabetic() || c == '_') {
640        return Some(head);
641    }
642    None
643}
644
645fn is_identifier(s: &str) -> bool {
646    !s.is_empty()
647        && s.chars()
648            .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
649        && s.chars()
650            .next()
651            .is_some_and(|c| c.is_ascii_alphabetic() || c == '_')
652}
653
654// ---- AST scanner -----------------------------------------------------------
655
656#[derive(Clone)]
657struct ImplCtx {
658    event_name: String,
659}
660
661struct Scanner<'a> {
662    events: &'a mut HashMap<String, Event>,
663    path_str: String,
664    in_internal_events_dir: bool,
665    skip_dropped_for_file: bool,
666    text: &'a str,
667    impl_stack: Vec<ImplCtx>,
668}
669
670impl<'ast> Visit<'ast> for Scanner<'_> {
671    fn visit_item_struct(&mut self, node: &'ast ItemStruct) {
672        if self.in_internal_events_dir {
673            let name = node.ident.to_string();
674            let event = self.events.entry(name).or_default();
675            event.path = Some(self.path_str.clone());
676            event.skip.dropped_events = self.skip_dropped_for_file;
677            for field in &node.fields {
678                if let Some(ident) = &field.ident {
679                    let ty = field.ty.to_token_stream().to_string();
680                    event.members.insert(ident.to_string(), ty);
681                }
682            }
683        }
684        visit::visit_item_struct(self, node);
685    }
686
687    fn visit_item_impl(&mut self, node: &'ast ItemImpl) {
688        let trait_name = node
689            .trait_
690            .as_ref()
691            .and_then(|(_, path, _)| path.segments.last())
692            .map(|s| s.ident.to_string());
693        let event_name = match &*node.self_ty {
694            Type::Path(tp) => tp.path.segments.last().map(|s| s.ident.to_string()),
695            _ => None,
696        };
697
698        if self.in_internal_events_dir
699            && let (Some(trait_name), Some(event_name)) = (trait_name.as_deref(), event_name)
700        {
701            let mut handled = false;
702            if matches!(
703                trait_name,
704                "InternalEvent" | "RegisterInternalEvent" | "InternalEventHandle"
705            ) {
706                // The token-stream form of the impl block has all comments
707                // stripped by syn, so the `## skip ##` markers (which live in
708                // line comments) are missing. Read them from the original
709                // source text within the span instead.
710                let raw_block = source_slice(self.text, node.span());
711                let registers_inside = node.to_token_stream().to_string().contains("register (");
712
713                let event = self.events.entry(event_name.clone()).or_default();
714                event.path = Some(self.path_str.clone());
715                event.skip.duplicate_check |=
716                    raw_block.contains("## skip check-duplicate-events ##");
717                event.skip.validity_check |= raw_block.contains("## skip check-validity-events ##");
718
719                match trait_name {
720                    "InternalEvent" => {
721                        if !registers_inside {
722                            event.internal_impl = true;
723                        }
724                    }
725                    "RegisterInternalEvent" => {
726                        event.register_impl = Some(event_name.clone());
727                        event.append(
728                            "Do not implement RegisterInternalEvent manually. Use the registered_event! macro instead.",
729                        );
730                    }
731                    "InternalEventHandle" => event.impl_event_handle = true,
732                    _ => {}
733                }
734                if RE_EMIT_DROPPED.is_match(&raw_block) {
735                    event.emits_component_events_dropped = true;
736                }
737                self.impl_stack.push(ImplCtx { event_name });
738                handled = true;
739            }
740            visit::visit_item_impl(self, node);
741            if handled {
742                self.impl_stack.pop();
743            }
744            return;
745        }
746        visit::visit_item_impl(self, node);
747    }
748
749    fn visit_macro(&mut self, node: &'ast syn::Macro) {
750        let name = node
751            .path
752            .segments
753            .last()
754            .map(|s| s.ident.to_string())
755            .unwrap_or_default();
756
757        // Use-counting, ComponentEventsDropped detection, and the
758        // log-message format check all run via a separate raw-source pass
759        // because `syn` does not descend into the bodies of arbitrary
760        // `tokio::select!` / `cfg_if!` / etc. macro invocations.
761
762        // Inside an InternalEvent-family impl: capture logs / metrics /
763        //    ComponentEventsDropped emissions for the active event.
764        if let Some(ctx) = self.impl_stack.last().cloned() {
765            match name.as_str() {
766                "trace" | "debug" | "info" | "warn" | "error" => {
767                    let parsed = parse_log_args(&node.tokens.to_string());
768                    let event = self.events.entry(ctx.event_name.clone()).or_default();
769                    event.add_log(&name, &parsed.message, parsed.parameters);
770                }
771                "counter" | "gauge" | "histogram" => {
772                    if let Some(metric) = parse_metric_args(&name, &node.tokens) {
773                        let event = self.events.entry(ctx.event_name.clone()).or_default();
774                        event.add_metric(&metric.ty, &metric.name, metric.tags);
775                    }
776                }
777                _ => {}
778            }
779        }
780        if name == "registered_event" {
781            self.handle_registered_event(node);
782        }
783
784        visit::visit_macro(self, node);
785    }
786}
787
788impl Scanner<'_> {
789    /// Parse a `registered_event!` invocation's tokens to extract the event
790    /// name, members, handle metrics, and emit-block log calls.
791    fn handle_registered_event(&mut self, mac: &syn::Macro) {
792        let raw = mac.tokens.to_string();
793        // Event name: first ident.
794        let Some(event_name) = first_ident(&raw) else {
795            return;
796        };
797        let event = self.events.entry(event_name.clone()).or_default();
798        event.path = Some(self.path_str.clone());
799
800        // Pull out the optional `{ event_fields }` immediately after the name,
801        // then `=> { handle_fields }`, and the `fn emit(...)  { body }`.
802        let after_name = match raw.find(&event_name) {
803            Some(idx) => &raw[idx + event_name.len()..],
804            None => return,
805        };
806        let after_name = after_name.trim_start();
807
808        // Extract `{ ... }` after name, if any (optional event fields).
809        let (event_fields_text, after_fields): (Option<String>, &str) =
810            if after_name.starts_with('{') {
811                let (block, rest) = split_brace_block(after_name);
812                (Some(block.to_string()), rest)
813            } else {
814                (None, after_name)
815            };
816
817        // Parse member fields from the event-fields block.
818        if let Some(block) = event_fields_text {
819            for arg in split_comma_args(&block) {
820                if let Some((name, ty)) = arg.split_once(':') {
821                    event
822                        .members
823                        .insert(name.trim().to_string(), ty.trim().to_string());
824                }
825            }
826        }
827
828        // Skip past `=> { handle_fields }`.
829        let after_arrow = after_fields.trim_start();
830        let after_arrow = after_arrow
831            .strip_prefix("=>")
832            .unwrap_or(after_arrow)
833            .trim_start();
834        let (handle_block, _after_handle) = if after_arrow.starts_with('{') {
835            let (block, rest) = split_brace_block(after_arrow);
836            (block.to_string(), rest)
837        } else {
838            return;
839        };
840
841        // Each handle field: `name : type = expr ,`. Pick out metric calls
842        // inside the `expr` portion to register on the event.
843        for arg in split_comma_args(&handle_block) {
844            let arg = arg.trim();
845            if arg.is_empty() {
846                continue;
847            }
848            // Attempt to parse the assignment.
849            let after_colon = match arg.find(':') {
850                Some(i) => &arg[i + 1..],
851                None => continue,
852            };
853            let Some((_ty, expr)) = after_colon.split_once('=') else {
854                continue;
855            };
856            let expr = expr.trim();
857
858            // Look for embedded `counter!` / `gauge!` / `histogram!` calls.
859            for ty in ["counter", "gauge", "histogram"] {
860                let needle = format!("{ty} ! (");
861                if let Some(idx) = expr.find(&needle) {
862                    // Find the matching `)` from the `(` after `!`.
863                    let after = &expr[idx + needle.len()..];
864                    if let Some(end) = match_paren_end(after) {
865                        let inside = &after[..end];
866                        let toks: TokenStream = inside.parse().unwrap_or_default();
867                        if let Some(metric) = parse_metric_args(ty, &toks) {
868                            event.add_metric(&metric.ty, &metric.name, metric.tags);
869                        }
870                    }
871                }
872            }
873
874            // Component-events-dropped emission.
875            if expr.contains("emit ! (ComponentEventsDropped")
876                || expr.contains("register ! (ComponentEventsDropped")
877            {
878                event.emits_component_events_dropped = true;
879            }
880        }
881
882        // The emit-fn body. Find `fn emit (...) { ... }` after the handle block.
883        // We re-scan the original tokens for any log macros within the impl
884        // body via the AST visitor — simpler than reparsing here. The handle
885        // block above already covers metric extraction. Logs registered to the
886        // outer event come from the visit_macro handling above when the visitor
887        // descends into nested macros (note: macros aren't normal items, so
888        // visit_macro won't recurse into a parent macro's tokens). To capture
889        // log calls inside `registered_event!`, we parse them out by scanning
890        // the macro's full token text for log macro signatures.
891        for ty in ["trace", "debug", "info", "warn", "error"] {
892            let needle = format!("{ty} ! (");
893            let mut start = 0;
894            while let Some(idx) = raw[start..].find(&needle) {
895                let after = &raw[start + idx + needle.len()..];
896                if let Some(end) = match_paren_end(after) {
897                    let inside = &after[..end];
898                    let parsed = parse_log_args(inside);
899                    let event = self.events.entry(event_name.clone()).or_default();
900                    event.add_log(ty, &parsed.message, parsed.parameters);
901                    start = start + idx + needle.len() + end;
902                } else {
903                    break;
904                }
905            }
906        }
907    }
908}
909
910/// Format-check every `tracing` log macro invocation in `text` (a Rust
911/// source file's contents). Returns a list of human-readable report lines.
912///
913/// Operates on the raw source rather than via the AST so that log calls
914/// nested inside opaque outer macros (`tokio::select!`, `cfg_if!`, …) are
915/// also covered — those bodies are not visited by `syn`.
916fn format_check_log_messages(text: &str, path_str: &str) -> Vec<String> {
917    let mut reports = Vec::new();
918    for caps in RE_LOG_CALL_OPEN.captures_iter(text) {
919        let level_match = caps.get(1).expect("group 1 is the level");
920        let level = level_match.as_str();
921        // The match ends with the literal `(`. Walk forward from there to
922        // find the matching `)` accounting for nested parens / braces /
923        // brackets and string literals; this is the inside of the macro.
924        let after_paren = caps.get(0).expect("full match").end();
925        if after_paren > text.len() {
926            continue;
927        }
928        let body_start = after_paren; // position right after `(`
929        let Some(close_offset) = match_paren_end(&text[body_start..]) else {
930            continue;
931        };
932        let inside = &text[body_start..body_start + close_offset];
933        let parsed = parse_log_args(inside);
934        if !parsed.has_literal_message {
935            continue;
936        }
937        let message = parsed.message;
938        if message.is_empty() {
939            continue;
940        }
941        let is_capitalized = message.starts_with('{')
942            || !message
943                .chars()
944                .next()
945                .is_some_and(|c| c.is_ascii_alphabetic())
946            || message
947                .chars()
948                .next()
949                .is_some_and(|c| c.is_ascii_uppercase());
950        let has_trailing_period = message.ends_with('}') || message.ends_with('.');
951        if is_capitalized && has_trailing_period {
952            continue;
953        }
954        let line_no = text[..level_match.start()].matches('\n').count() + 1;
955        if !is_capitalized {
956            reports.push(format!(
957                "    Message must start with a capital. (`{level}` call on {path_str}:{line_no})"
958            ));
959        }
960        if !has_trailing_period {
961            reports.push(format!(
962                "    Message must end with a period. (`{level}` call on {path_str}:{line_no})"
963            ));
964        }
965    }
966    reports
967}
968
969/// Extract the source slice covered by a `proc_macro2::Span`. Used to read
970/// line-comment skip markers (e.g. `## skip check-validity-events ##`) which
971/// `syn` discards from the AST.
972fn source_slice(text: &str, span: proc_macro2::Span) -> String {
973    let start = span.start();
974    let end = span.end();
975    let mut out = String::new();
976    for (i, line) in text.lines().enumerate() {
977        let line_no = i + 1;
978        if line_no >= start.line && line_no <= end.line {
979            out.push_str(line);
980            out.push('\n');
981        }
982        if line_no > end.line {
983            break;
984        }
985    }
986    out
987}
988
989/// Given a string starting with `(`, find the index of the matching `)`.
990fn match_paren_end(s: &str) -> Option<usize> {
991    // `s` here is the text right after the opening `(`. Walk it tracking depth.
992    let mut depth: i32 = 1;
993    let mut in_str = false;
994    let mut esc = false;
995    for (i, b) in s.bytes().enumerate() {
996        if in_str {
997            if esc {
998                esc = false;
999            } else if b == b'\\' {
1000                esc = true;
1001            } else if b == b'"' {
1002                in_str = false;
1003            }
1004            continue;
1005        }
1006        match b {
1007            b'"' => in_str = true,
1008            b'(' => depth += 1,
1009            b')' => {
1010                depth -= 1;
1011                if depth == 0 {
1012                    return Some(i);
1013                }
1014            }
1015            _ => {}
1016        }
1017    }
1018    None
1019}
1020
1021/// Split a `{ ... }` block off the front of `s`, returning `(inside, rest)`.
1022fn split_brace_block(s: &str) -> (&str, &str) {
1023    if !s.starts_with('{') {
1024        return ("", s);
1025    }
1026    let mut depth = 0i32;
1027    let mut in_str = false;
1028    let mut esc = false;
1029    for (i, b) in s.bytes().enumerate() {
1030        if in_str {
1031            if esc {
1032                esc = false;
1033            } else if b == b'\\' {
1034                esc = true;
1035            } else if b == b'"' {
1036                in_str = false;
1037            }
1038            continue;
1039        }
1040        match b {
1041            b'"' => in_str = true,
1042            b'{' => depth += 1,
1043            b'}' => {
1044                depth -= 1;
1045                if depth == 0 {
1046                    return (&s[1..i], &s[i + 1..]);
1047                }
1048            }
1049            _ => {}
1050        }
1051    }
1052    ("", s)
1053}
1054
1055/// Pull the first identifier-shaped substring out of a token text.
1056fn first_ident(s: &str) -> Option<String> {
1057    for tok in s.split(|c: char| !c.is_ascii_alphanumeric() && c != '_') {
1058        if !tok.is_empty()
1059            && tok
1060                .chars()
1061                .next()
1062                .is_some_and(|c| c.is_ascii_alphabetic() || c == '_')
1063        {
1064            return Some(tok.to_string());
1065        }
1066    }
1067    None
1068}
1069
1070// ---- CLI -------------------------------------------------------------------
1071
1072/// Check that internal events satisfy the patterns set in
1073/// <https://github.com/vectordotdev/vector/blob/master/docs/specs/instrumentation.md>.
1074#[derive(clap::Args, Debug)]
1075#[command()]
1076pub(super) struct Cli {}
1077
1078fn collect_source_paths() -> Result<Vec<PathBuf>> {
1079    let mut paths: Vec<PathBuf> = Vec::new();
1080    for pattern in ["src/**/*.rs", "lib/**/*.rs"] {
1081        for entry in glob(pattern)? {
1082            paths.push(entry?);
1083        }
1084    }
1085    paths.sort();
1086    Ok(paths)
1087}
1088
1089fn scan_file(path: &PathBuf, events: &mut HashMap<String, Event>) -> Result<usize> {
1090    let path_str = path.to_string_lossy().replace('\\', "/");
1091    let text = fs::read_to_string(path)?;
1092    let lower = text.to_ascii_lowercase();
1093
1094    let in_internal_events = path_str.starts_with("src/internal_events/")
1095        || path_str.starts_with("lib/vector-common/src/internal_event/");
1096    let in_src = path_str.starts_with("src/");
1097    let skip_dropped = lower.contains("## skip check-dropped-events ##");
1098
1099    for caps in RE_USES.captures_iter(&text) {
1100        let name = caps[1].to_string();
1101        events.entry(name).or_default().uses += 1;
1102    }
1103
1104    let mut errors = 0usize;
1105    if in_src {
1106        let format_reports = format_check_log_messages(&text, &path_str);
1107        if !format_reports.is_empty() {
1108            for r in &format_reports {
1109                println!("{r}");
1110            }
1111            errors += format_reports.len();
1112        }
1113    }
1114
1115    let file = match syn::parse_file(&text) {
1116        Ok(f) => f,
1117        Err(e) => {
1118            eprintln!("warning: failed to parse {path_str}: {e}");
1119            return Ok(errors);
1120        }
1121    };
1122
1123    let mut scanner = Scanner {
1124        events,
1125        path_str: path_str.clone(),
1126        in_internal_events_dir: in_internal_events,
1127        skip_dropped_for_file: skip_dropped,
1128        text: &text,
1129        impl_stack: Vec::new(),
1130    };
1131    visit::visit_file(&mut scanner, &file);
1132    Ok(errors)
1133}
1134
1135fn report_event_errors(events: &HashMap<String, Event>, name: &str, handle_name: &str) -> bool {
1136    let reports = validate_event(events, name, handle_name);
1137    if reports.is_empty() {
1138        return false;
1139    }
1140    let path = events
1141        .get(name)
1142        .and_then(|e| e.path.as_deref())
1143        .unwrap_or("?");
1144    println!("{path}: Errors in event {name}:");
1145    for r in &reports {
1146        println!("    {r}");
1147    }
1148    true
1149}
1150
1151fn validate_all(events: &HashMap<String, Event>) -> usize {
1152    let mut names: Vec<String> = events.keys().cloned().collect();
1153    names.sort();
1154    let mut duplicates: HashMap<String, Vec<String>> = HashMap::new();
1155    let mut error_count = 0usize;
1156
1157    for name in &names {
1158        let event = events.get(name).expect("present");
1159        if !event.skip.duplicate_check
1160            && (event.internal_impl || event.impl_event_handle)
1161            && let Some(sig) = event.signature()
1162        {
1163            duplicates.entry(sig).or_default().push(name.clone());
1164        }
1165        if event.skip.validity_check {
1166            continue;
1167        }
1168        if event.internal_impl {
1169            if report_event_errors(events, name, name) {
1170                error_count += 1;
1171            }
1172        } else if let Some(handle_name) = event.register_impl.as_deref() {
1173            if events.contains_key(handle_name) {
1174                if report_event_errors(events, name, handle_name) {
1175                    error_count += 1;
1176                }
1177            } else {
1178                println!("Registered event {name} references nonexistent handle {handle_name}");
1179                error_count += 1;
1180            }
1181        }
1182    }
1183
1184    let mut dup_keys: Vec<&String> = duplicates.keys().collect();
1185    dup_keys.sort();
1186    for sig in dup_keys {
1187        let dupes = &duplicates[sig];
1188        if dupes.len() > 1 {
1189            println!("Duplicate events detected: {}", dupes.join(", "));
1190            error_count += 1;
1191        }
1192    }
1193
1194    error_count
1195}
1196
1197impl Cli {
1198    pub(super) fn exec(self) -> Result<()> {
1199        // Resolve all `src/**` / `lib/**` globs against the repo root rather
1200        // than the caller's CWD. Without this, running the binary from
1201        // anywhere outside the repo root silently scans nothing and reports
1202        // `0 error(s)` — which the previous Ruby script wrapper avoided
1203        // because it was always invoked with the repo as the working dir.
1204        let repo_root = crate::utils::paths::find_repo_root()?;
1205        std::env::set_current_dir(&repo_root)?;
1206
1207        let mut events: HashMap<String, Event> = HashMap::new();
1208        let mut error_count = 0usize;
1209
1210        for path in &collect_source_paths()? {
1211            error_count += scan_file(path, &mut events)?;
1212        }
1213
1214        error_count += validate_all(&events);
1215
1216        println!("{error_count} error(s)");
1217        if error_count > 0 {
1218            process::exit(1);
1219        }
1220        Ok(())
1221    }
1222}
1223
1224#[cfg(test)]
1225mod tests {
1226    use super::*;
1227
1228    #[test]
1229    fn split_comma_args_respects_nesting() {
1230        assert_eq!(
1231            split_comma_args(r#""a", "b" => "c, d", e"#),
1232            vec![
1233                r#""a""#.to_string(),
1234                r#""b" => "c, d""#.to_string(),
1235                "e".to_string(),
1236            ]
1237        );
1238    }
1239
1240    #[test]
1241    fn split_comma_args_respects_angle_brackets() {
1242        // A `registered_event!` handle field with a generic type containing a
1243        // comma must not be split at the comma inside `<...>`.
1244        let input = "events_dropped : Registered<ComponentEventsDropped<'static, INTENTIONAL>> = register!(X)";
1245        assert_eq!(split_comma_args(input), vec![input.to_string()]);
1246    }
1247
1248    #[test]
1249    fn parse_metric_name_string_or_variant() {
1250        assert_eq!(
1251            parse_metric_name(r#""my_metric""#),
1252            Some("my_metric".to_string())
1253        );
1254        assert_eq!(
1255            parse_metric_name("CounterName::ComponentErrorsTotal"),
1256            Some("component_errors_total".to_string())
1257        );
1258        assert_eq!(parse_metric_name("not_a_metric"), None);
1259    }
1260
1261    #[test]
1262    fn signature_none_when_empty() {
1263        assert!(Event::default().signature().is_none());
1264    }
1265
1266    fn parse(src: &str) -> ParsedLog {
1267        let mac: syn::Macro = syn::parse_str(src).expect("parse macro");
1268        parse_log_args(&mac.tokens.to_string())
1269    }
1270
1271    #[test]
1272    fn parse_log_args_literal_message_first() {
1273        let p = parse(r#"trace!("Hello there.", count = 1)"#);
1274        assert_eq!(p.message, "Hello there.");
1275        assert!(p.has_literal_message);
1276        assert_eq!(p.parameters, vec!["count".to_string()]);
1277    }
1278
1279    #[test]
1280    fn parse_log_args_literal_message_named() {
1281        let p = parse(r#"error!(message = "Stuff broke.", error_type = err)"#);
1282        assert_eq!(p.message, "Stuff broke.");
1283        assert!(p.has_literal_message);
1284        assert_eq!(p.parameters, vec!["error_type".to_string()]);
1285    }
1286
1287    #[test]
1288    fn parse_log_args_variable_message_named() {
1289        let p = parse("error!(message = exec_reason, error_type = err, stage = stg)");
1290        assert_eq!(p.message, "exec_reason");
1291        assert!(!p.has_literal_message);
1292        assert_eq!(
1293            p.parameters,
1294            vec!["error_type".to_string(), "stage".to_string()]
1295        );
1296    }
1297
1298    #[test]
1299    fn parse_log_args_trailing_string_literal() {
1300        // Some sites pass key=values first and the literal message last —
1301        // tracing accepts this.
1302        let p = parse(r#"error!(path = req.uri().path(), "Bad request.")"#);
1303        assert_eq!(p.message, "Bad request.");
1304        assert!(p.has_literal_message);
1305        assert!(p.parameters.contains(&"path".to_string()));
1306    }
1307
1308    #[test]
1309    fn format_check_finds_nested_log_calls() {
1310        // `syn` does not descend into the bodies of opaque outer macros like
1311        // `tokio::select!`, which is why this check runs over the raw source
1312        // rather than via the AST visitor. Fixture covers both a top-level
1313        // violation and one nested inside `tokio::select!`.
1314        let src = r#"
1315            fn _f() {
1316                error!("missing period");
1317                tokio::select! {
1318                    _ = something() => {
1319                        info!("lowercase first.");
1320                    }
1321                }
1322            }
1323        "#;
1324        let reports = format_check_log_messages(src, "fixture.rs");
1325        let joined = reports.join("\n");
1326        assert!(
1327            joined.contains("Message must end with a period.") && joined.contains("`error` call"),
1328            "expected period-violation report, got: {joined}"
1329        );
1330        assert!(
1331            joined.contains("Message must start with a capital.") && joined.contains("`info` call"),
1332            "expected capital-violation report on the nested info!, got: {joined}"
1333        );
1334    }
1335
1336    #[test]
1337    fn format_check_skips_non_literal_messages() {
1338        // `error!(?err, "Plain text.")` — literal is fine, no report.
1339        let src = r#"fn _f() { error!(?err, "Plain text."); }"#;
1340        let reports = format_check_log_messages(src, "fixture.rs");
1341        assert!(reports.is_empty(), "expected no reports, got: {reports:?}");
1342    }
1343
1344    #[test]
1345    fn parse_log_args_bare_field_then_trailing_literal() {
1346        // `warn!(%error, "Message.")` — the bare field comes first but the
1347        // string literal is the real message. The parser must scan all args
1348        // for a literal before falling back to the bare expression as the
1349        // message, otherwise the format checks never fire here.
1350        let p = parse(r#"warn!(%error, "Failed to flush.")"#);
1351        assert_eq!(p.message, "Failed to flush.");
1352        assert!(p.has_literal_message);
1353        assert!(p.parameters.contains(&"error".to_string()));
1354    }
1355
1356    #[test]
1357    fn parse_log_args_percent_capture() {
1358        let p = parse(r#"trace!(message = "Bytes received.", byte_size = bs, %protocol)"#);
1359        assert!(p.has_literal_message);
1360        assert_eq!(p.message, "Bytes received.");
1361        assert!(p.parameters.contains(&"byte_size".to_string()));
1362        assert!(p.parameters.contains(&"protocol".to_string()));
1363    }
1364
1365    fn check(message: &str) -> (bool, bool) {
1366        // Replicates the gating in `format_check_log_messages` for a literal message.
1367        let is_capitalized = message.starts_with('{')
1368            || !message
1369                .chars()
1370                .next()
1371                .is_some_and(|c| c.is_ascii_alphabetic())
1372            || message
1373                .chars()
1374                .next()
1375                .is_some_and(|c| c.is_ascii_uppercase());
1376        let has_trailing_period = message.ends_with('}') || message.ends_with('.');
1377        (is_capitalized, has_trailing_period)
1378    }
1379
1380    #[test]
1381    fn message_format_capital_period_pass() {
1382        assert_eq!(check("Hello there."), (true, true));
1383    }
1384
1385    #[test]
1386    fn message_format_lowercase_first_fails() {
1387        let (cap, _) = check("hello there.");
1388        assert!(!cap);
1389    }
1390
1391    #[test]
1392    fn message_format_no_period_fails() {
1393        let (_, period) = check("Hello there");
1394        assert!(!period);
1395    }
1396
1397    #[test]
1398    fn message_format_interpolation_passes() {
1399        // `{...}` at start or end is fine — we can't see what it expands to.
1400        assert_eq!(check("{count} dropped."), (true, true));
1401        assert_eq!(check("Dropped {count}"), (true, true));
1402    }
1403
1404    #[test]
1405    fn message_format_non_alpha_first_passes() {
1406        // E.g. starts with a number — no capitalisation requirement.
1407        assert_eq!(check("42 things happened."), (true, true));
1408    }
1409
1410    // ---- validate_event branch coverage --------------------------------
1411    //
1412    // Each test builds a synthetic `Event` (or a `(event, handle)` pair for
1413    // registered events), inserts it into a HashMap, calls `validate_event`,
1414    // and asserts on the returned report list. This covers the rule branches
1415    // independently of the parsing/scanning layer.
1416
1417    fn mk_event() -> Event {
1418        Event {
1419            uses: 1, // default to "has uses" so that branch isn't always firing
1420            internal_impl: true,
1421            ..Default::default()
1422        }
1423    }
1424
1425    fn one_log(level: &str, message: &str, params: &[&str]) -> Vec<LogCall> {
1426        vec![LogCall {
1427            level: level.to_string(),
1428            message: message.to_string(),
1429            parameters: params.iter().map(|s| (*s).to_string()).collect(),
1430        }]
1431    }
1432
1433    fn counter(tags: &[(&str, &str)]) -> BTreeMap<String, String> {
1434        tags.iter()
1435            .map(|(k, v)| ((*k).to_string(), (*v).to_string()))
1436            .collect()
1437    }
1438
1439    fn run(name: &str, event: Event) -> Vec<String> {
1440        let mut events = HashMap::new();
1441        events.insert(name.to_string(), event);
1442        validate_event(&events, name, name)
1443    }
1444
1445    #[test]
1446    fn validate_event_no_uses_reported() {
1447        let mut e = mk_event();
1448        e.uses = 0;
1449        let r = run("Foo", e);
1450        assert!(r.iter().any(|m| m == "Event has no uses."));
1451    }
1452
1453    #[test]
1454    fn validate_bytes_received_log_type_must_be_trace() {
1455        let mut e = mk_event();
1456        e.logs = one_log("info", "Bytes received.", &["byte_size", "protocol"]);
1457        e.counters.insert(
1458            "component_received_bytes_total".to_string(),
1459            counter(&[("protocol", "tcp")]),
1460        );
1461        let r = run("FooBytesReceived", e);
1462        assert!(r.iter().any(|m| m == "Log type MUST be \"trace!\"."));
1463    }
1464
1465    #[test]
1466    fn validate_bytes_received_log_message_exact() {
1467        let mut e = mk_event();
1468        e.logs = one_log(
1469            "trace",
1470            "Bytes were received here.",
1471            &["byte_size", "protocol"],
1472        );
1473        e.counters.insert(
1474            "component_received_bytes_total".to_string(),
1475            counter(&[("protocol", "tcp")]),
1476        );
1477        let r = run("FooBytesReceived", e);
1478        assert!(
1479            r.iter()
1480                .any(|m| m.contains("Log message MUST be \"Bytes received.\""))
1481        );
1482    }
1483
1484    #[test]
1485    fn validate_bytes_received_log_required_tag() {
1486        let mut e = mk_event();
1487        e.logs = one_log("trace", "Bytes received.", &["byte_size"]); // missing protocol
1488        e.counters.insert(
1489            "component_received_bytes_total".to_string(),
1490            counter(&[("protocol", "tcp")]),
1491        );
1492        let r = run("FooBytesReceived", e);
1493        assert!(r.iter().any(|m| m == "Log MUST contain tag \"protocol\""));
1494    }
1495
1496    #[test]
1497    fn validate_bytes_received_counter_required_tag() {
1498        let mut e = mk_event();
1499        e.logs = one_log("trace", "Bytes received.", &["byte_size", "protocol"]);
1500        e.counters
1501            .insert("component_received_bytes_total".to_string(), counter(&[])); // missing protocol
1502        let r = run("FooBytesReceived", e);
1503        assert!(r.iter().any(|m| {
1504            m == "Counter \"component_received_bytes_total\" MUST include tag \"protocol\"."
1505        }));
1506    }
1507
1508    #[test]
1509    fn validate_events_received_class() {
1510        let mut e = mk_event();
1511        e.logs = one_log("trace", "Wrong message.", &["count", "byte_size"]);
1512        let r = run("FooEventsReceived", e);
1513        assert!(
1514            r.iter()
1515                .any(|m| m.contains("Log message MUST be \"Events received.\""))
1516        );
1517        assert!(
1518            r.iter()
1519                .any(|m| m
1520                    == "This event MUST increment counter \"component_received_events_total\".")
1521        );
1522    }
1523
1524    #[test]
1525    fn validate_error_event_must_be_named_error() {
1526        let mut e = mk_event();
1527        e.logs = one_log("error", "Something failed.", &["error_type", "stage"]);
1528        e.counters.insert(
1529            METRIC_NAME_ERROR.to_string(),
1530            counter(&[
1531                ("error_type", "error_type::CONNECTION_FAILED"),
1532                ("stage", "error_stage::PROCESSING"),
1533            ]),
1534        );
1535        let r = run("BadlyNamed", e);
1536        assert!(
1537            r.iter()
1538                .any(|m| m == "Error events MUST be named \"___Error\".")
1539        );
1540    }
1541
1542    #[test]
1543    fn validate_error_event_log_level_must_be_error() {
1544        let mut e = mk_event();
1545        // info-level log when name ends with Error
1546        e.logs = one_log("info", "Something failed.", &["error_type", "stage"]);
1547        e.counters.insert(
1548            METRIC_NAME_ERROR.to_string(),
1549            counter(&[
1550                ("error_type", "error_type::CONNECTION_FAILED"),
1551                ("stage", "error_stage::PROCESSING"),
1552            ]),
1553        );
1554        let r = run("FooError", e);
1555        assert!(
1556            r.iter()
1557                .any(|m| m.contains("MUST log with one of these levels: [\"error\"]"))
1558        );
1559    }
1560
1561    #[test]
1562    fn validate_error_event_log_must_include_error_type_and_stage() {
1563        let mut e = mk_event();
1564        e.logs = one_log("error", "Something failed.", &[]); // no error_type, no stage
1565        e.counters.insert(
1566            METRIC_NAME_ERROR.to_string(),
1567            counter(&[
1568                ("error_type", "error_type::CONNECTION_FAILED"),
1569                ("stage", "error_stage::PROCESSING"),
1570            ]),
1571        );
1572        let r = run("FooError", e);
1573        assert!(
1574            r.iter()
1575                .any(|m| m == "Error log for Error event MUST include parameter \"error_type\".")
1576        );
1577        assert!(
1578            r.iter()
1579                .any(|m| m == "Error log for Error event MUST include parameter \"stage\".")
1580        );
1581    }
1582
1583    #[test]
1584    fn validate_error_counter_must_match_error_log_params() {
1585        let mut e = mk_event();
1586        // Log mentions error_code but counter doesn't
1587        e.logs = one_log("error", "Failed.", &["error_type", "stage", "error_code"]);
1588        e.counters.insert(
1589            METRIC_NAME_ERROR.to_string(),
1590            counter(&[
1591                ("error_type", "error_type::CONNECTION_FAILED"),
1592                ("stage", "error_stage::PROCESSING"),
1593            ]),
1594        );
1595        let r = run("FooError", e);
1596        assert!(r.iter().any(|m| {
1597            m == "Counter \"component_errors_total\" must include \"error_code\" to match error log."
1598        }));
1599    }
1600
1601    #[test]
1602    fn validate_error_stage_must_be_constant() {
1603        let mut e = mk_event();
1604        e.logs = one_log("error", "Failed.", &["error_type", "stage"]);
1605        e.counters.insert(
1606            METRIC_NAME_ERROR.to_string(),
1607            counter(&[
1608                ("error_type", "error_type::CONNECTION_FAILED"),
1609                ("stage", "\"processing\""),
1610            ]),
1611        );
1612        let r = run("FooError", e);
1613        assert!(
1614            r.iter()
1615                .any(|m| m.contains("must be an \"error_stage\" constant"))
1616        );
1617    }
1618
1619    #[test]
1620    fn validate_error_type_must_be_constant() {
1621        let mut e = mk_event();
1622        e.logs = one_log("error", "Failed.", &["error_type", "stage"]);
1623        e.counters.insert(
1624            METRIC_NAME_ERROR.to_string(),
1625            counter(&[
1626                ("error_type", "\"connection_failed\""),
1627                ("stage", "error_stage::PROCESSING"),
1628            ]),
1629        );
1630        let r = run("FooError", e);
1631        assert!(
1632            r.iter()
1633                .any(|m| m.contains("must be an \"error_type\" constant"))
1634        );
1635    }
1636
1637    #[test]
1638    fn validate_events_dropped_must_be_named_events_dropped() {
1639        let mut e = mk_event();
1640        e.logs = one_log(
1641            "error",
1642            "Events dropped.",
1643            &["count", "intentional", "reason"],
1644        );
1645        e.counters.insert(
1646            METRIC_NAME_EVENTS_DROPPED.to_string(),
1647            counter(&[("intentional", "false")]),
1648        );
1649        let r = run("BadlyNamed", e);
1650        assert!(
1651            r.iter()
1652                .any(|m| m == "EventsDropped events MUST be named \"___EventsDropped\".")
1653        );
1654    }
1655
1656    #[test]
1657    fn validate_events_dropped_log_level_error_or_debug() {
1658        let mut e = mk_event();
1659        e.logs = one_log("info", "Dropped.", &["count", "intentional", "reason"]);
1660        e.counters.insert(
1661            METRIC_NAME_EVENTS_DROPPED.to_string(),
1662            counter(&[("intentional", "false")]),
1663        );
1664        let r = run("FooEventsDropped", e);
1665        assert!(
1666            r.iter().any(|m| {
1667                m.contains("MUST log with one of these levels: [\"error\", \"debug\"]")
1668            })
1669        );
1670    }
1671
1672    #[test]
1673    fn validate_events_dropped_counter_required_and_excluded_tags() {
1674        let mut e = mk_event();
1675        e.logs = one_log("error", "Dropped.", &["count", "intentional", "reason"]);
1676        // Missing intentional, has reason and count (which it must NOT)
1677        e.counters.insert(
1678            METRIC_NAME_EVENTS_DROPPED.to_string(),
1679            counter(&[("reason", "\"r\""), ("count", "1")]),
1680        );
1681        let r = run("FooEventsDropped", e);
1682        assert!(r.iter().any(|m| {
1683            m == "Counter \"component_discarded_events_total\" MUST include tag \"intentional\"."
1684        }));
1685        assert!(r.iter().any(|m| {
1686            m == "Counter \"component_discarded_events_total\" MUST NOT include tag \"reason\"."
1687        }));
1688        assert!(r.iter().any(|m| {
1689            m == "Counter \"component_discarded_events_total\" MUST NOT include tag \"count\"."
1690        }));
1691    }
1692
1693    #[test]
1694    fn validate_events_dropped_log_required_params() {
1695        let mut e = mk_event();
1696        // Error log present but missing required params (count, intentional, reason)
1697        e.logs = one_log("error", "Dropped.", &[]);
1698        e.counters.insert(
1699            METRIC_NAME_EVENTS_DROPPED.to_string(),
1700            counter(&[("intentional", "false")]),
1701        );
1702        let r = run("FooEventsDropped", e);
1703        for p in ["count", "intentional", "reason"] {
1704            assert!(
1705                r.iter().any(|m| m
1706                    == &format!(
1707                        "Error log for EventsDropped event MUST include parameter \"{p}\"."
1708                    )),
1709                "missing report for parameter {p} in: {r:?}"
1710            );
1711        }
1712    }
1713
1714    #[test]
1715    fn validate_emits_dropped_must_not_also_increment_counter() {
1716        let mut e = mk_event();
1717        e.emits_component_events_dropped = true;
1718        e.counters.insert(
1719            METRIC_NAME_EVENTS_DROPPED.to_string(),
1720            counter(&[("intentional", "false")]),
1721        );
1722        let r = run("FooEventsDropped", e);
1723        assert!(r.iter().any(|m| {
1724            m.contains("should not also increment counter")
1725                && m.contains(METRIC_NAME_EVENTS_DROPPED)
1726        }));
1727    }
1728
1729    #[test]
1730    fn validate_clean_event_no_reports() {
1731        // A correctly-shaped Error event should produce no reports.
1732        let mut e = mk_event();
1733        e.logs = one_log("error", "Connection failed.", &["error_type", "stage"]);
1734        e.counters.insert(
1735            METRIC_NAME_ERROR.to_string(),
1736            counter(&[
1737                ("error_type", "error_type::CONNECTION_FAILED"),
1738                ("stage", "error_stage::PROCESSING"),
1739            ]),
1740        );
1741        let r = run("ConnectionFailedError", e);
1742        assert!(r.is_empty(), "expected no reports, got: {r:?}");
1743    }
1744}