vrl/compiler/
unused_expression_checker.rs

1/// # Unused Expression Checker
2///
3/// This module provides functionality for traversing VRL (Vector Remap Language) Abstract Syntax Trees (AST).
4/// It's designed to detect and report unused expressions, helping users clean up and optimize their VRL scripts.
5/// Initially, it will generate warnings for unused expressions. These warnings might be escalated to errors
6/// in future versions, once the module has been battle-tested.
7///
8/// ## How it works
9///
10/// - **Traversal**: Recursively explores each node of the AST. This process begins after the program has
11///   been successfully compiled.
12/// - **Stateful Context**: Builds context on the fly to determine whether an expression is unused. The context
13///   takes into account variable scopes, assignments, and the flow of the program.
14/// - **Detection**: Identifies and reports expressions that do not contribute to assignments,
15///   affect external events, or influence the outcome of function calls.
16/// - **Ignored Variables**: Variable names prefixed with '_' are ignored.
17///
18/// ## Caveats
19/// - **Closures**: Closure support is minimal. For now, we are only ensuring that there are no false positives.
20/// - **Variable Shadowing**: Variable shadowing is not supported. Unused variables will not be detected in this case.
21use crate::compiler::codes::WARNING_UNUSED_CODE;
22use crate::compiler::parser::{Ident, Node};
23use crate::diagnostic::{Diagnostic, DiagnosticList, Label, Note, Severity};
24use crate::parser::ast::{
25    Array, Assignment, AssignmentOp, AssignmentTarget, Block, Container, Expr, FunctionCall,
26    IfStatement, Object, Predicate, QueryTarget, Return, RootExpr, Unary,
27};
28use crate::parser::template_string::StringSegment;
29use crate::parser::{Literal, Program, Span};
30use std::collections::{BTreeMap, HashMap};
31use tracing::warn;
32
33const SIDE_EFFECT_FUNCTIONS: [&str; 5] =
34    ["del", "log", "assert", "assert_eq", "set_semantic_meaning"];
35
36#[must_use]
37pub fn check_for_unused_results(ast: &Program) -> DiagnosticList {
38    let expression_visitor = AstVisitor { ast };
39    expression_visitor.check_for_unused_results()
40}
41
42pub struct AstVisitor<'a> {
43    ast: &'a Program,
44}
45
46#[derive(Default, Debug, Clone)]
47struct IdentState {
48    span: Span,
49    pending_usage: bool,
50    used_in_closure: bool,
51}
52
53#[derive(Default, Debug, Clone)]
54struct VisitorState {
55    level: usize,
56    expecting_result: HashMap<usize, bool>,
57    within_block_expression: HashMap<usize, bool>,
58    ident_to_state: BTreeMap<Ident, IdentState>,
59    visiting_closure: bool,
60    diagnostics: DiagnosticList,
61}
62
63impl VisitorState {
64    fn is_unused(&self) -> bool {
65        let pending_result = self
66            .expecting_result
67            .get(&self.level)
68            .is_some_and(|active| *active);
69        !pending_result
70    }
71
72    fn is_within_block(&self) -> bool {
73        self.within_block_expression
74            .get(&self.level)
75            .is_some_and(|within_block| *within_block)
76    }
77
78    fn increase_level(&mut self) {
79        self.level += 1;
80    }
81
82    fn decrease_level(&mut self) {
83        self.level -= 1;
84    }
85
86    fn enter_block(&mut self) {
87        self.increase_level();
88        self.within_block_expression.insert(self.level, true);
89    }
90
91    fn exiting_block(&mut self) {
92        self.within_block_expression.insert(self.level, false);
93        self.decrease_level();
94    }
95
96    fn mark_level_as_expecting_result(&mut self) {
97        self.expecting_result.insert(self.level, true);
98    }
99
100    fn mark_level_as_not_expecting_result(&mut self) {
101        self.expecting_result.insert(self.level, false);
102    }
103
104    fn mark_identifier_pending_usage(&mut self, ident: &Ident, span: &Span) {
105        if ident.is_empty() || ident.starts_with('_') {
106            return;
107        }
108
109        self.ident_to_state
110            .entry(ident.clone())
111            .and_modify(|state| {
112                state.pending_usage = true;
113                if self.visiting_closure {
114                    state.used_in_closure = true;
115                }
116            })
117            .or_insert(IdentState {
118                span: *span,
119                pending_usage: true,
120                used_in_closure: self.visiting_closure,
121            });
122    }
123
124    fn mark_identifier_used(&mut self, ident: &Ident) {
125        if ident.is_empty() || ident.starts_with('_') {
126            return;
127        }
128
129        if let Some(entry) = self.ident_to_state.get_mut(ident) {
130            entry.pending_usage = false;
131            if self.visiting_closure {
132                entry.used_in_closure = true;
133            }
134        } else {
135            warn!("unexpected identifier `{}` reported as used", ident);
136        }
137    }
138
139    fn mark_query_target_pending_usage(&mut self, query_target: &Node<QueryTarget>) {
140        match &query_target.node {
141            QueryTarget::Internal(ident) => {
142                self.mark_identifier_pending_usage(ident, &query_target.span);
143            }
144            QueryTarget::External(_) | QueryTarget::FunctionCall(_) | QueryTarget::Container(_) => {
145            }
146        }
147    }
148
149    fn mark_visiting_closure(&mut self) {
150        self.visiting_closure = true;
151    }
152
153    fn mark_not_visiting_closure(&mut self) {
154        self.visiting_closure = false;
155    }
156
157    fn append_diagnostic(&mut self, message: String, span: &Span) {
158        self.diagnostics.push(Diagnostic {
159            severity: Severity::Warning,
160            code: WARNING_UNUSED_CODE,
161            message,
162            labels: Vec::from([Label::primary(
163                "help: use the result of this expression or remove it",
164                span,
165            )]),
166            notes: Vec::from([Note::Basic(
167                "this expression has no side-effects".to_owned(),
168            )]),
169        });
170    }
171
172    fn extend_diagnostics_for_unused_variables(&mut self) {
173        for (ident, state) in self.ident_to_state.clone() {
174            // Remove the closure check after https://github.com/vectordotdev/vrl/issues/1216 is resolved.
175            if state.pending_usage && !state.used_in_closure {
176                self.append_diagnostic(format!("unused variable `{ident}`"), &state.span);
177            }
178        }
179    }
180}
181
182fn scoped_visit(state: &mut VisitorState, f: impl FnOnce(&mut VisitorState)) {
183    state.increase_level();
184    state.mark_level_as_expecting_result();
185    f(state);
186    state.mark_level_as_not_expecting_result();
187    state.decrease_level();
188}
189
190impl AstVisitor<'_> {
191    fn visit_node(&self, node: &Node<Expr>, state: &mut VisitorState) {
192        let expression = node.inner();
193
194        match expression {
195            Expr::Literal(literal) => {
196                if let Literal::String(template) = &literal.node {
197                    for segment in &template.0 {
198                        if let StringSegment::Template(ident, _) = segment {
199                            state.mark_identifier_used(&Ident::from(ident.clone()));
200                        }
201                    }
202                }
203                if state.is_unused() {
204                    state.append_diagnostic(format!("unused literal `{literal}`"), &node.span());
205                }
206            }
207            Expr::Container(container) => {
208                self.visit_container(container, state);
209            }
210            Expr::IfStatement(if_statement) => {
211                scoped_visit(state, |state| {
212                    self.visit_if_statement(if_statement, state);
213                });
214            }
215            Expr::Op(op) => {
216                self.visit_node(&op.0, state);
217                scoped_visit(state, |state| {
218                    self.visit_node(&op.2, state);
219                });
220            }
221            Expr::Unary(unary) => match &unary.node {
222                Unary::Not(not) => {
223                    self.visit_node(&not.1, state);
224                }
225            },
226            Expr::Assignment(assignment) => {
227                self.visit_assignment(assignment, state);
228            }
229            Expr::Query(query) => match &query.node.target.node {
230                QueryTarget::Internal(ident) => {
231                    if !state.is_unused() {
232                        state.mark_identifier_used(ident);
233                    }
234                }
235                QueryTarget::External(_) | QueryTarget::Container(_) => {}
236                QueryTarget::FunctionCall(function_call) => {
237                    self.visit_function_call(function_call, &query.node.target.span, state);
238                }
239            },
240            Expr::FunctionCall(function_call) => {
241                self.visit_function_call(function_call, &function_call.span, state);
242            }
243            Expr::Variable(variable) => {
244                state.mark_identifier_used(&variable.node);
245            }
246            Expr::Abort(_) => {}
247            Expr::Return(r#return) => self.visit_return(r#return, state),
248        }
249    }
250
251    fn visit_container(&self, node: &Node<Container>, state: &mut VisitorState) {
252        match &node.node {
253            Container::Group(group) => self.visit_node(&group.node.0, state),
254            Container::Block(block) => self.visit_block(block, state),
255            Container::Array(array) => self.visit_array(array, state),
256            Container::Object(object) => self.visit_object(object, state),
257        }
258    }
259
260    fn visit_array(&self, array: &Node<Array>, state: &mut VisitorState) {
261        for expr in &array.0 {
262            self.visit_node(expr, state);
263        }
264    }
265
266    fn visit_block(&self, block: &Node<Block>, state: &mut VisitorState) {
267        let block_expressions = &block.node.0;
268        if block_expressions.is_empty() {
269            return;
270        }
271        state.enter_block();
272
273        for (i, expr) in block_expressions.iter().enumerate() {
274            if i == block_expressions.len() - 1 {
275                state.exiting_block();
276            }
277            self.visit_node(expr, state);
278        }
279    }
280
281    fn visit_object(&self, object: &Node<Object>, state: &mut VisitorState) {
282        if state.is_unused() {
283            state.append_diagnostic(format!("unused object `{object}`"), &object.span);
284        }
285        for value in object.0.values() {
286            scoped_visit(state, |state| {
287                self.visit_node(value, state);
288            });
289        }
290    }
291
292    fn visit_if_statement(&self, if_statement: &Node<IfStatement>, state: &mut VisitorState) {
293        match &if_statement.predicate.node {
294            Predicate::One(expr) => self.visit_node(expr, state),
295            Predicate::Many(exprs) => {
296                for expr in exprs {
297                    self.visit_node(expr, state);
298                }
299            }
300        }
301
302        scoped_visit(state, |state| {
303            self.visit_block(&if_statement.if_node, state);
304        });
305
306        if let Some(else_block) = &if_statement.else_node {
307            scoped_visit(state, |state| {
308                self.visit_block(else_block, state);
309            });
310        }
311    }
312
313    fn visit_assignment(&self, assignment: &Node<Assignment>, state: &mut VisitorState) {
314        state.increase_level();
315        let level = state.level;
316        state.expecting_result.insert(level, true);
317
318        // All targets needs to be used later.
319        let (op, targets): (&AssignmentOp, &[_]) = match &assignment.node {
320            Assignment::Single { target, op, .. } => (op, &[target]),
321            Assignment::Infallible { ok, err, op, .. } => (op, &[ok, err]),
322        };
323        for target in targets {
324            match &target.node {
325                AssignmentTarget::Noop => {}
326                AssignmentTarget::Query(query) => {
327                    state.mark_query_target_pending_usage(&query.target);
328                }
329                AssignmentTarget::Internal(ident, path) => {
330                    if *op == AssignmentOp::Assign && path.is_none() {
331                        state.mark_identifier_pending_usage(ident, &target.span);
332                    } else if *op == AssignmentOp::Merge {
333                        // The following example: `x |= {}` falls under shadowing and is not handled.
334                        state.mark_identifier_used(ident);
335                    }
336                }
337                AssignmentTarget::External(_path) => {}
338            }
339        }
340
341        // Visit the assignment right hand side.
342        match &assignment.node {
343            Assignment::Infallible { expr, .. } | Assignment::Single { expr, .. } => {
344                self.visit_node(expr, state);
345            }
346        }
347        state.expecting_result.insert(level, false);
348        state.decrease_level();
349    }
350
351    fn visit_function_call(
352        &self,
353        function_call: &FunctionCall,
354        span: &Span,
355        state: &mut VisitorState,
356    ) {
357        for argument in &function_call.arguments {
358            state.increase_level();
359            state.mark_level_as_expecting_result();
360            self.visit_node(&argument.node.expr, state);
361            state.mark_level_as_not_expecting_result();
362            state.decrease_level();
363        }
364
365        // This function call might be part of fallible block.
366        if !function_call.abort_on_error && state.is_within_block() {
367            state.mark_level_as_expecting_result();
368        }
369
370        if !SIDE_EFFECT_FUNCTIONS.contains(&function_call.ident.0.as_str()) {
371            if let Some(closure) = &function_call.closure {
372                state.mark_visiting_closure();
373                for variable in &closure.variables {
374                    state.mark_identifier_pending_usage(&variable.node, &variable.span);
375                }
376                state.mark_level_as_expecting_result();
377                self.visit_block(&closure.block, state);
378                state.mark_level_as_not_expecting_result();
379            } else if state.is_unused() {
380                state.append_diagnostic(
381                    format!("unused result for function call `{function_call}`"),
382                    span,
383                );
384            }
385            state.mark_not_visiting_closure();
386        }
387
388        if !function_call.abort_on_error && state.is_within_block() {
389            state.mark_level_as_not_expecting_result();
390        }
391    }
392
393    fn visit_return(&self, r#return: &Node<Return>, state: &mut VisitorState) {
394        state.increase_level();
395        let level = state.level;
396        state.expecting_result.insert(level, true);
397        self.visit_node(&r#return.node.expr, state);
398        state.expecting_result.insert(level, false);
399        state.decrease_level();
400    }
401
402    /// This function traverses the VRL AST and detects unused results.
403    /// An expression might have side-effects, in that case we do not except its result to be used.
404    ///
405    /// We want to detect the following cases:
406    /// * Unused Variables: a variable which is assigned a value but never used in any expression
407    /// * Unused Expressions: an expression without side-effects with an unused result
408    fn check_for_unused_results(&self) -> DiagnosticList {
409        let mut unused_warnings = DiagnosticList::default();
410        let mut state = VisitorState::default();
411        let root_expressions = &self.ast.0;
412        for (i, root_node) in root_expressions.iter().enumerate() {
413            let is_last = i == root_expressions.len() - 1;
414            if is_last {
415                state.increase_level();
416                state.mark_level_as_expecting_result();
417            }
418            match root_node.inner() {
419                RootExpr::Expr(node) => self.visit_node(node, &mut state),
420                RootExpr::Error(_) => {}
421            }
422            if is_last {
423                state.decrease_level();
424                state.mark_level_as_not_expecting_result();
425            }
426        }
427        state.extend_diagnostics_for_unused_variables();
428        unused_warnings.extend(state.diagnostics);
429        unused_warnings
430    }
431}
432
433#[cfg(test)]
434mod test {
435    use crate::compiler::codes::WARNING_UNUSED_CODE;
436    use crate::stdlib;
437    use indoc::indoc;
438
439    fn unused_test(source: &str, expected_warnings: &[String]) {
440        let warnings = crate::compiler::compile(source, &stdlib::all())
441            .unwrap()
442            .warnings;
443
444        assert_eq!(warnings.len(), expected_warnings.len());
445
446        for (i, content) in expected_warnings.iter().enumerate() {
447            let warning = warnings.get(i).unwrap();
448            assert_eq!(warning.code, WARNING_UNUSED_CODE);
449            assert!(
450                warning.message.contains(content),
451                "expected message `{}` to contain `{content}`",
452                warning.message
453            );
454        }
455    }
456
457    #[test]
458    fn unused_top_level_literal() {
459        let source = indoc! {r#"
460            "foo"
461            "program result"
462        "#};
463        unused_test(source, &[r#"unused literal `"foo"`"#.to_string()]);
464    }
465
466    #[test]
467    fn unused_variable_in_assignment() {
468        let source = indoc! {"
469            foo = 5
470        "};
471        unused_test(source, &["unused variable `foo`".to_string()]);
472    }
473
474    #[test]
475    fn unused_literal() {
476        let source = indoc! {r#"
477            . = {
478                "unused"
479                "a"
480            }
481        "#};
482        unused_test(source, &[r#"unused literal `"unused"`"#.to_string()]);
483    }
484
485    #[test]
486    fn unused_top_level_variable() {
487        let source = indoc! {r#"
488            x = "bar"
489        "#};
490        unused_test(source, &["unused variable `x`".to_string()]);
491    }
492
493    #[test]
494    fn test_nested_blocks() {
495        let source = indoc! {r#"
496            . = {
497                "1"
498                {
499                    "2"
500                    {
501                        "3"
502                    }
503                }
504
505                . = {{{ x = 42; x }}}
506
507                "4"
508                "5"
509            }
510        "#};
511
512        let expected_warnings: Vec<String> = (1..5)
513            .map(|i| format!("unused literal `\"{i}\"`"))
514            .collect();
515        unused_test(source, &expected_warnings);
516    }
517
518    #[test]
519    fn unused_object() {
520        let source = indoc! {r#"
521            .o = { "key": 1 }
522            { "array": [{"a": "b"}], "b": 2}
523            "program result"
524        "#};
525        unused_test(
526            source,
527            &[r#"unused object `{ "array": [{ "a": "b" }], "b": 2 }`"#.to_string()],
528        );
529    }
530
531    #[test]
532    fn unused_variables() {
533        let source = indoc! {r#"
534            a = "1"
535            b = {
536                c = "2"
537                "3"
538            }
539            d = random_bool()
540            . = d
541        "#};
542
543        let expected_warnings: Vec<String> = ('a'..'d')
544            .map(|ident| format!("unused variable `{ident}`"))
545            .collect();
546        unused_test(source, &expected_warnings);
547    }
548
549    #[test]
550    fn unused_function_result() {
551        let source = indoc! {r#"
552            .r = random_int(0,1)
553            random_bool()
554            "program result"
555        "#};
556        unused_test(
557            source,
558            &["unused result for function call `random_bool()`".to_string()],
559        );
560    }
561
562    #[test]
563    fn unused_ident_with_path() {
564        let source = indoc! {"
565            x = {}
566            .f1 = x
567            y = {}
568            y.a = 1
569        "};
570        unused_test(source, &["unused variable `y`".to_string()]);
571    }
572
573    #[test]
574    fn used_queries() {
575        let source = indoc! {r#"
576            _i_am_ignored = 42
577            x = {}
578            x.foo = 1
579            x.bar = 2
580            .bar = remove!(x, ["foo"]).bar
581
582            y = {"foo": 3}.foo
583        "#};
584        unused_test(source, &["unused variable `y`".to_string()]);
585    }
586
587    #[test]
588    fn used_in_if_condition() {
589        let source = indoc! {r#"
590            if starts_with!(.a, "foo") {
591                .a = "foo"
592            } else if starts_with!(.a, "bar") {
593                .a = "bar"
594            }
595
596            x = 1
597            .b = if (x < 1) { 0 } else { 1 }
598
599            y = 2
600            z = 3
601            if (y < 2 && random_int(0, 4) < 3 ) { 0 } else { .c = z }
602
603            x = {}
604            x.a = 1
605            .d = if (x.a < 1) { 0 } else { 1 }
606        "#};
607        unused_test(source, &[]);
608    }
609
610    #[test]
611    fn used_in_function_arguments() {
612        let source = indoc! {"
613            x = {}
614            x.foo = 1
615            .r = random_int!({x.foo}, x.foo + 1)
616
617            x.bar = 2
618            exists(field: x.bar)
619            del(x.bar, compact: false)
620        "};
621        unused_test(
622            source,
623            &["unused result for function call `exists(field: xbar)`".to_string()],
624        );
625    }
626
627    #[test]
628    fn closure_shadows_unused_variable() {
629        let source = indoc! {r#"
630            count = 0;
631            value = 42
632            for_each({ "a": 1, "b": 2 }) -> |_key, value| { count = count + value };
633            count
634        "#};
635        // Note that the `value` outside of the closure block is unused but not detected.
636        unused_test(source, &[]);
637    }
638
639    #[test]
640    fn used_closure_result() {
641        let source = indoc! {"
642            patterns = [r'foo', r'bar']
643            matched = false
644            for_each(patterns) -> |_, pattern| {
645              if !matched && match!(.message, pattern) {
646                matched = true
647              }
648            }
649            matched
650        "};
651        // Note that the `value` outside of the closure block is unused but not detected.
652        unused_test(source, &[]);
653    }
654
655    #[test]
656    fn used_function_result_in_fallible_block() {
657        let source = indoc! {r#"
658            {
659              parse_json("invalid")
660              2
661            } ?? 1
662        "#};
663        unused_test(source, &[]);
664    }
665
666    #[test]
667    fn unused_shadow_variable_not_detected() {
668        // TODO: Support variable shadowing. A potential solution is to introduce the following type:
669        // type IdentState = HashMap<usize, (bool, Span)>;
670        let source = indoc! {"
671            x = 1
672            x = 2
673            {
674                x = {
675                    x = {
676                        x = 3
677                        4
678                    }
679                    x
680                }
681                x
682            }
683        "};
684        unused_test(source, &[]);
685    }
686
687    #[test]
688    fn undetected_merge_assignment() {
689        // `x` is not used after the merging operation. This case is not detected.
690        let source = indoc! {r#"
691            x = {}
692            x |= { "a" : 1}
693            .
694        "#};
695        unused_test(source, &[]);
696    }
697
698    #[test]
699    fn false_closure_variable_unused_warning() {
700        let source = indoc! {r"
701           done = false
702            for_each([1]) -> |_i, _v| {
703                if !done {
704                    done = true
705                }
706            }
707        "};
708        unused_test(source, &[]);
709    }
710}