vrl/parser/
lex.rs

1use std::{fmt, iter::Peekable, str::CharIndices};
2
3use crate::diagnostic::{DiagnosticMessage, Label, Span};
4use ordered_float::NotNan;
5
6use super::template_string::{StringSegment, TemplateString};
7
8pub(crate) type Tok<'input> = Token<&'input str>;
9pub(crate) type SpannedResult<'input, Loc> = Result<Spanned<'input, Loc>, Error>;
10pub(crate) type Spanned<'input, Loc> = (Loc, Tok<'input>, Loc);
11
12#[derive(thiserror::Error, Clone, Debug, PartialEq, Eq)]
13pub enum Error {
14    #[error("syntax error")]
15    ParseError {
16        span: Span,
17        source: lalrpop_util::ParseError<usize, Token<String>, String>,
18        dropped_tokens: Vec<(usize, Token<String>, usize)>,
19    },
20
21    #[error("reserved keyword")]
22    ReservedKeyword {
23        start: usize,
24        keyword: String,
25        end: usize,
26    },
27
28    #[error("invalid numeric literal")]
29    NumericLiteral {
30        start: usize,
31        error: String,
32        end: usize,
33    },
34
35    #[error("invalid string literal")]
36    StringLiteral { start: usize },
37
38    #[error("invalid literal")]
39    Literal { start: usize },
40
41    #[error("invalid escape character: \\{}", .ch.unwrap_or_default())]
42    EscapeChar { start: usize, ch: Option<char> },
43
44    #[error("unexpected parse error")]
45    UnexpectedParseError(String),
46}
47
48impl Error {
49    /// Offset all position fields in the error by the given amount.
50    /// This is needed when errors are returned from nested lexers that
51    /// operate on a slice of the original input.
52    fn offset_by(self, offset: usize) -> Self {
53        match self {
54            Error::ParseError {
55                span,
56                source,
57                dropped_tokens,
58            } => Error::ParseError {
59                span: Span::new(span.start() + offset, span.end() + offset),
60                source,
61                dropped_tokens,
62            },
63            Error::ReservedKeyword {
64                start,
65                keyword,
66                end,
67            } => Error::ReservedKeyword {
68                start: start + offset,
69                keyword,
70                end: end + offset,
71            },
72            Error::NumericLiteral { start, error, end } => Error::NumericLiteral {
73                start: start + offset,
74                error,
75                end: end + offset,
76            },
77            Error::StringLiteral { start } => Error::StringLiteral {
78                start: start + offset,
79            },
80            Error::Literal { start } => Error::Literal {
81                start: start + offset,
82            },
83            Error::EscapeChar { start, ch } => Error::EscapeChar {
84                start: start + offset,
85                ch,
86            },
87            Error::UnexpectedParseError(s) => Error::UnexpectedParseError(s),
88        }
89    }
90}
91
92impl DiagnosticMessage for Error {
93    fn code(&self) -> usize {
94        use Error::{
95            EscapeChar, Literal, NumericLiteral, ParseError, ReservedKeyword, StringLiteral,
96            UnexpectedParseError,
97        };
98
99        match self {
100            ParseError { source, .. } => match source {
101                lalrpop_util::ParseError::InvalidToken { .. } => 200,
102                lalrpop_util::ParseError::ExtraToken { .. } => 201,
103                lalrpop_util::ParseError::User { .. } => 202,
104                lalrpop_util::ParseError::UnrecognizedToken { .. } => 203,
105                lalrpop_util::ParseError::UnrecognizedEof { .. } => 204,
106            },
107            ReservedKeyword { .. } => 205,
108            NumericLiteral { .. } => 206,
109            StringLiteral { .. } => 207,
110            Literal { .. } => 208,
111            EscapeChar { .. } => 209,
112            UnexpectedParseError(..) => 210,
113        }
114    }
115
116    fn labels(&self) -> Vec<Label> {
117        use Error::{
118            EscapeChar, Literal, NumericLiteral, ParseError, ReservedKeyword, StringLiteral,
119            UnexpectedParseError,
120        };
121
122        fn update_expected(expected: Vec<String>) -> Vec<String> {
123            expected
124                .into_iter()
125                .map(|expect| match expect.as_str() {
126                    "LQuery" => r#""path literal""#.to_owned(),
127                    _ => expect,
128                })
129                .collect::<Vec<_>>()
130        }
131
132        match self {
133            ParseError { span, source, .. } => match source {
134                lalrpop_util::ParseError::InvalidToken { location } => vec![Label::primary(
135                    "invalid token",
136                    Span::new(*location, *location + 1),
137                )],
138                lalrpop_util::ParseError::ExtraToken { token } => {
139                    let (start, token, end) = token;
140                    vec![Label::primary(
141                        format!("unexpected extra token: {token}"),
142                        Span::new(*start, *end),
143                    )]
144                }
145                lalrpop_util::ParseError::User { error } => {
146                    vec![Label::primary(format!("unexpected error: {error}"), span)]
147                }
148                lalrpop_util::ParseError::UnrecognizedToken { token, expected } => {
149                    let (start, token, end) = token;
150                    let span = Span::new(*start, *end);
151                    let got = token.to_string();
152                    let mut expected = update_expected(expected.clone());
153
154                    // Temporary hack to improve error messages for `AnyIdent`
155                    // parser rule.
156                    let any_ident = [
157                        r#""reserved identifier""#,
158                        r#""else""#,
159                        r#""false""#,
160                        r#""null""#,
161                        r#""true""#,
162                        r#""if""#,
163                    ];
164                    let is_any_ident = any_ident
165                        .iter()
166                        .all(|i| expected.contains(&(*i).to_string()));
167                    if is_any_ident {
168                        expected = expected
169                            .into_iter()
170                            .filter(|e| !any_ident.contains(&e.as_str()))
171                            .collect::<Vec<_>>();
172                    }
173
174                    if token == &Token::RQuery {
175                        return vec![
176                            Label::primary("unexpected end of query path", span),
177                            Label::context(
178                                format!("expected one of: {}", expected.join(", ")),
179                                span,
180                            ),
181                        ];
182                    }
183
184                    vec![
185                        Label::primary(format!(r#"unexpected syntax token: "{got}""#), span),
186                        Label::context(format!("expected one of: {}", expected.join(", ")), span),
187                    ]
188                }
189                lalrpop_util::ParseError::UnrecognizedEof { location, expected } => {
190                    let span = Span::new(*location, *location);
191                    let expected = update_expected(expected.clone());
192
193                    vec![
194                        Label::primary("unexpected end of program", span),
195                        Label::context(format!("expected one of: {}", expected.join(", ")), span),
196                    ]
197                }
198            },
199
200            ReservedKeyword { start, end, .. } => {
201                let span = Span::new(*start, *end);
202
203                vec![
204                    Label::primary(
205                        "this identifier name is reserved for future use in the language",
206                        span,
207                    ),
208                    Label::context("use a different name instead", span),
209                ]
210            }
211
212            NumericLiteral { start, error, end } => vec![Label::primary(
213                format!("invalid numeric literal: {error}"),
214                Span::new(*start, *end),
215            )],
216
217            StringLiteral { start } => vec![Label::primary(
218                "invalid string literal",
219                Span::new(*start, *start + 1),
220            )],
221
222            Literal { start } => vec![Label::primary(
223                "invalid literal",
224                Span::new(*start, *start + 1),
225            )],
226
227            EscapeChar { start, ch } => vec![Label::primary(
228                format!(
229                    "invalid escape character: {}",
230                    ch.map(|ch| ch.to_string())
231                        .unwrap_or_else(|| "none".to_string())
232                ),
233                Span::new(*start, *start + 1),
234            )],
235
236            UnexpectedParseError(string) => vec![Label::primary(string, Span::default())],
237        }
238    }
239}
240
241// -----------------------------------------------------------------------------
242// lexer
243// -----------------------------------------------------------------------------
244
245#[derive(Debug)]
246pub(crate) struct Lexer<'input> {
247    input: &'input str,
248    chars: Peekable<CharIndices<'input>>,
249
250    // state
251    open_brackets: usize,
252    open_braces: usize,
253    open_parens: usize,
254    query_start: Option<usize>,
255
256    /// Keep track of when the lexer is supposed to emit an `RQuery` token.
257    ///
258    /// For example:
259    ///
260    ///   [.foo].bar
261    ///
262    /// In this example, if `[` is at index `0`, then this value will contain:
263    ///
264    ///   [10, 5]
265    ///
266    /// Or:
267    ///
268    ///   [.foo].bar
269    ///   ~~~~~~~~~~  0..10
270    ///    ~~~~       1..5
271    rquery_indices: Vec<usize>,
272}
273
274impl<'input> Lexer<'input> {
275    fn next_token(&mut self) -> Option<SpannedResult<'input, usize>> {
276        use Token::{
277            Ampersand, Arrow, Bang, Colon, Comma, Dot, Escape, InvalidToken, LBrace, LBracket,
278            LParen, LQuery, Newline, Percent, RBrace, RBracket, RParen, RQuery, SemiColon,
279            Underscore,
280        };
281
282        loop {
283            let start = self.next_index();
284
285            // Check if we need to emit a `LQuery` token.
286            //
287            // We don't advance the internal iterator, because this token does not
288            // represent a physical character, instead it is a boundary marker.
289            let query_start_result = self.query_start(start);
290            match query_start_result {
291                Err(err) => return Some(Err(err)),
292                Ok(true) => {
293                    self.query_start = Some(start);
294                    // dbg!("LQuery"); // NOTE: uncomment this for debugging
295                    return Some(Ok((start, LQuery, start + 1)));
296                }
297                Ok(false) => {}
298            }
299
300            // Check if we need to emit a `RQuery` token.
301            //
302            // We don't advance the internal iterator, because this token does not
303            // represent a physical character, instead it is a boundary marker.
304            if let Some(pos) = self.query_end(start) {
305                // dbg!("RQuery"); // NOTE: uncomment this for debugging
306                return Some(Ok((pos, RQuery, pos + 1)));
307            }
308
309            // Advance the internal iterator and emit the next token, or loop
310            // again if we encounter a token we want to ignore (e.g. whitespace).
311            if let Some((start, ch)) = self.bump() {
312                let result = match ch {
313                    '"' => Some(self.string_literal(start)),
314
315                    ';' => Some(Ok(self.token(start, SemiColon))),
316                    '\n' => Some(Ok(self.token(start, Newline))),
317                    '\\' => Some(Ok(self.token(start, Escape))),
318
319                    '(' => Some(Ok(self.open(start, LParen))),
320                    '[' => Some(Ok(self.open(start, LBracket))),
321                    '{' => Some(Ok(self.open(start, LBrace))),
322                    '}' => Some(Ok(self.close(start, RBrace))),
323                    ']' => Some(Ok(self.close(start, RBracket))),
324                    ')' => Some(Ok(self.close(start, RParen))),
325                    '.' => Some(Ok(self.token(start, Dot))),
326                    '%' => Some(Ok(self.token(start, Percent))),
327                    '&' if !matches!(self.peek(), Some((_, '&'))) => {
328                        Some(Ok(self.token(start, Ampersand)))
329                    }
330                    ':' => Some(Ok(self.token(start, Colon))),
331                    ',' => Some(Ok(self.token(start, Comma))),
332
333                    '_' if !self.test_peek(is_ident_continue) => {
334                        Some(Ok(self.token(start, Underscore)))
335                    }
336
337                    '!' if self.test_peek(|ch| ch == '!' || !is_operator(ch)) => {
338                        Some(Ok(self.token(start, Bang)))
339                    }
340
341                    '-' if self.test_peek(|ch| ch == '>') => {
342                        self.bump();
343                        Some(Ok(self.token(start, Arrow)))
344                    }
345
346                    '#' => {
347                        self.take_until(start, |ch| ch == '\n');
348                        continue;
349                    }
350
351                    'r' if self.test_peek(|ch| ch == '\'') => Some(self.regex_literal(start)),
352                    's' if self.test_peek(|ch| ch == '\'') => Some(self.raw_string_literal(start)),
353                    't' if self.test_peek(|ch| ch == '\'') => Some(self.timestamp_literal(start)),
354
355                    ch if is_ident_start(ch) => Some(Ok(self.identifier_or_function_call(start))),
356                    ch if is_digit(ch) || (ch == '-' && self.test_peek(is_digit)) => {
357                        Some(self.numeric_literal_or_identifier(start))
358                    }
359                    ch if is_operator(ch) => Some(Ok(self.operator(start))),
360                    ch if ch.is_whitespace() => continue,
361
362                    ch => Some(Ok(self.token(start, InvalidToken(ch)))),
363                };
364
365                // dbg!(&result); // NOTE: uncomment this for debugging
366
367                return result;
368
369                // If we've parsed the final character, and there are still open
370                // queries, we need to keep the iterator going and close those
371                // queries.
372            } else if let Some(end) = self.rquery_indices.pop() {
373                // dbg!("RQuery"); // NOTE: uncomment this for debugging
374                return Some(Ok((end, RQuery, end + 1)));
375            }
376
377            return None;
378        }
379    }
380}
381
382#[derive(Clone, Eq, PartialEq, Hash, Debug)]
383pub enum Token<S> {
384    Identifier(S),
385    PathField(S),
386    FunctionCall(S),
387    Operator(S),
388
389    // literals
390    StringLiteral(StringLiteralToken<S>),
391    RawStringLiteral(RawStringLiteralToken<S>),
392    IntegerLiteral(i64),
393    FloatLiteral(NotNan<f64>),
394    RegexLiteral(S),
395    TimestampLiteral(S),
396
397    // Reserved for future use.
398    ReservedIdentifier(S),
399
400    InvalidToken(char),
401
402    // keywords
403    If,
404    Else,
405    Null,
406    False,
407    True,
408    Abort,
409    Return,
410
411    // tokens
412    Colon,
413    Comma,
414    Dot,
415    LBrace,
416    LBracket,
417    LParen,
418    Newline,
419    RBrace,
420    RBracket,
421    RParen,
422    SemiColon,
423    Underscore,
424    Escape,
425    Arrow,
426    Ampersand,
427    Percent,
428
429    Equals,
430    MergeEquals,
431    Bang,
432    Question,
433
434    /// The {L,R}Query token is an "instruction" token. It does not represent
435    /// any character in the source, instead it represents the start or end of a
436    /// sequence of tokens that together form a "query".
437    ///
438    /// Some examples:
439    ///
440    /// ```text
441    /// .          => LQuery, Dot, RQuery
442    /// .foo       => LQuery, Dot, Ident, RQuery
443    /// foo.bar[2] => LQuery, Ident, Dot, Ident, LBracket, Integer, RBracket, RQuery
444    /// foo().bar  => LQuery, FunctionCall, LParen, RParen, Dot, Ident, RQuery
445    /// [1].foo    => LQuery, LBracket, Integer, RBracket, Dot, Ident, RQuery
446    /// { .. }[0]  => LQuery, LBrace, ..., RBrace, LBracket, ... RBracket, RQuery
447    /// ```
448    ///
449    /// The final example shows how the lexer does not care about the semantic
450    /// validity of a query (as in, getting the index from an object does not
451    /// work), it only signals that one exists.
452    ///
453    /// Some non-matching examples:
454    ///
455    /// ```text
456    /// . foo      => Dot, Identifier
457    /// foo() .a   => FunctionCall, LParen, RParen, LQuery, Dot, Ident, RQuery
458    /// [1] [2]    => RBracket, Integer, LBracket, RBracket, Integer, RBracket
459    /// ```
460    ///
461    /// The reason these tokens exist is to allow the parser to remain
462    /// whitespace-agnostic, while still being able to distinguish between the
463    /// above two groups of examples.
464    LQuery,
465    RQuery,
466}
467
468impl<S> Token<S> {
469    pub(crate) fn map<R>(self, f: impl Fn(S) -> R) -> Token<R> {
470        use self::Token::{
471            Abort, Ampersand, Arrow, Bang, Colon, Comma, Dot, Else, Equals, Escape, False,
472            FloatLiteral, FunctionCall, Identifier, If, IntegerLiteral, InvalidToken, LBrace,
473            LBracket, LParen, LQuery, MergeEquals, Newline, Null, Operator, PathField, Percent,
474            Question, RBrace, RBracket, RParen, RQuery, RawStringLiteral, RegexLiteral,
475            ReservedIdentifier, Return, SemiColon, StringLiteral, TimestampLiteral, True,
476            Underscore,
477        };
478
479        match self {
480            Identifier(s) => Identifier(f(s)),
481            PathField(s) => PathField(f(s)),
482            FunctionCall(s) => FunctionCall(f(s)),
483            Operator(s) => Operator(f(s)),
484
485            StringLiteral(StringLiteralToken(s)) => StringLiteral(StringLiteralToken(f(s))),
486            RawStringLiteral(RawStringLiteralToken(s)) => {
487                RawStringLiteral(RawStringLiteralToken(f(s)))
488            }
489
490            IntegerLiteral(s) => IntegerLiteral(s),
491            FloatLiteral(s) => FloatLiteral(s),
492            RegexLiteral(s) => RegexLiteral(f(s)),
493            TimestampLiteral(s) => TimestampLiteral(f(s)),
494
495            ReservedIdentifier(s) => ReservedIdentifier(f(s)),
496
497            InvalidToken(s) => InvalidToken(s),
498
499            Else => Else,
500            False => False,
501            If => If,
502            Null => Null,
503            True => True,
504            Abort => Abort,
505            Return => Return,
506
507            // tokens
508            Colon => Colon,
509            Comma => Comma,
510            Dot => Dot,
511            LBrace => LBrace,
512            LBracket => LBracket,
513            LParen => LParen,
514            Newline => Newline,
515            RBrace => RBrace,
516            RBracket => RBracket,
517            RParen => RParen,
518            SemiColon => SemiColon,
519            Underscore => Underscore,
520            Escape => Escape,
521            Arrow => Arrow,
522            Ampersand => Ampersand,
523            Percent => Percent,
524
525            Equals => Equals,
526            MergeEquals => MergeEquals,
527            Bang => Bang,
528            Question => Question,
529
530            LQuery => LQuery,
531            RQuery => RQuery,
532        }
533    }
534}
535
536impl<S> fmt::Display for Token<S>
537where
538    S: fmt::Display,
539{
540    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
541        use self::Token::{
542            Abort, Ampersand, Arrow, Bang, Colon, Comma, Dot, Else, Equals, Escape, False,
543            FloatLiteral, FunctionCall, Identifier, If, IntegerLiteral, InvalidToken, LBrace,
544            LBracket, LParen, LQuery, MergeEquals, Newline, Null, Operator, PathField, Percent,
545            Question, RBrace, RBracket, RParen, RQuery, RawStringLiteral, RegexLiteral,
546            ReservedIdentifier, Return, SemiColon, StringLiteral, TimestampLiteral, True,
547            Underscore,
548        };
549
550        let s = match *self {
551            Identifier(_) => "Identifier",
552            PathField(_) => "PathField",
553            FunctionCall(_) => "FunctionCall",
554            Operator(_) => "Operator",
555            StringLiteral(_) => "StringLiteral",
556            RawStringLiteral(_) => "RawStringLiteral",
557            IntegerLiteral(_) => "IntegerLiteral",
558            FloatLiteral(_) => "FloatLiteral",
559            RegexLiteral(_) => "RegexLiteral",
560            TimestampLiteral(_) => "TimestampLiteral",
561            ReservedIdentifier(_) => "ReservedIdentifier",
562            InvalidToken(_) => "InvalidToken",
563
564            Else => "Else",
565            False => "False",
566            If => "If",
567            Null => "Null",
568            True => "True",
569            Abort => "Abort",
570            Return => "Return",
571
572            // tokens
573            Colon => "Colon",
574            Comma => "Comma",
575            Dot => "Dot",
576            LBrace => "LBrace",
577            LBracket => "LBracket",
578            LParen => "LParen",
579            Newline => "Newline",
580            RBrace => "RBrace",
581            RBracket => "RBracket",
582            RParen => "RParen",
583            SemiColon => "SemiColon",
584            Underscore => "Underscore",
585            Escape => "Escape",
586            Arrow => "Arrow",
587            Ampersand => "Ampersand",
588            Percent => "Percent",
589
590            Equals => "Equals",
591            MergeEquals => "MergeEquals",
592            Bang => "Bang",
593            Question => "Question",
594
595            LQuery => "LQuery",
596            RQuery => "RQuery",
597        };
598
599        s.fmt(f)
600    }
601}
602
603impl<'input> Token<&'input str> {
604    /// Returns either a literal, reserved, or generic identifier.
605    fn ident(s: &'input str) -> Self {
606        use Token::{
607            Abort, Else, False, Identifier, If, Null, PathField, ReservedIdentifier, Return, True,
608        };
609
610        match s {
611            "if" => If,
612            "else" => Else,
613            "true" => True,
614            "false" => False,
615            "null" => Null,
616            "abort" => Abort,
617            "return" => Return,
618
619            // reserved identifiers
620            "array" | "bool" | "boolean" | "break" | "continue" | "do" | "emit" | "float"
621            | "for" | "forall" | "foreach" | "all" | "each" | "any" | "try" | "undefined"
622            | "int" | "integer" | "iter" | "object" | "regex" | "string" | "traverse"
623            | "timestamp" | "duration" | "unless" | "walk" | "while" | "loop" => {
624                ReservedIdentifier(s)
625            }
626
627            _ if s.contains('@') => PathField(s),
628
629            _ => Identifier(s),
630        }
631    }
632}
633
634#[derive(Clone, PartialEq, Eq, Debug, Hash)]
635pub struct StringLiteralToken<S>(pub S);
636
637#[derive(Clone, PartialEq, Eq, Debug, Hash)]
638pub struct RawStringLiteralToken<S>(pub S);
639
640impl StringLiteralToken<&str> {
641    /// Takes the string and splits it into segments of literals and templates.
642    /// A templated section is delimited by `{{..}}`. `{{` can be escaped using
643    /// `\{{...\}}`.
644    pub fn template(&self, span: Span) -> TemplateString {
645        let mut segments = Vec::new();
646
647        let chars = self.0.chars().collect::<Vec<_>>();
648        let mut template = false;
649        let mut current = String::new();
650
651        let mut pos = 0;
652        while pos < chars.len() {
653            match chars[pos] {
654                '}' if template && chars.get(pos + 1) == Some(&'}') => {
655                    // Handle closing template `}}`.
656                    if !current.is_empty() {
657                        let seg = std::mem::take(&mut current);
658                        segments.push(StringSegment::Template(
659                            seg.trim().to_string(),
660                            Span::new(pos - seg.chars().count() - 1, pos + 3) + span.start(),
661                        ));
662                    }
663                    template = false;
664                    pos += 2;
665                }
666                '\\' if !template
667                    && chars.get(pos + 1) == Some(&'{')
668                    && chars.get(pos + 2) == Some(&'{') =>
669                {
670                    // Handle open escape `/{{`.
671                    current.push_str("{{");
672                    pos += 3;
673                }
674                '\\' if !template
675                    && chars.get(pos + 1) == Some(&'}')
676                    && chars.get(pos + 2) == Some(&'}') =>
677                {
678                    // Handle close escape `\}}`
679                    current.push_str("}}");
680                    pos += 3;
681                }
682                '{' if !template && chars.get(pos + 1) == Some(&'{') => {
683                    // Handle start of template.
684                    if !current.is_empty() {
685                        let seg = std::mem::take(&mut current);
686                        segments.push(StringSegment::Literal(
687                            unescape_string_literal(&seg),
688                            Span::new(pos - seg.chars().count() + 1, pos + 1) + span.start(),
689                        ));
690                    }
691                    template = true;
692                    pos += 2;
693                }
694                chr => {
695                    current.push(chr);
696                    pos += 1;
697                }
698            }
699        }
700
701        if !template && !current.is_empty() {
702            segments.push(StringSegment::Literal(
703                unescape_string_literal(&current),
704                Span::new(pos - current.chars().count() + 1, pos + 1) + span.start(),
705            ));
706        }
707
708        TemplateString(segments)
709    }
710
711    pub fn unescape(&self) -> String {
712        unescape_string_literal(self.0)
713    }
714}
715
716impl RawStringLiteralToken<&str> {
717    pub fn unescape(&self) -> String {
718        self.0.to_string()
719    }
720}
721
722// -----------------------------------------------------------------------------
723// lexing iterator
724// -----------------------------------------------------------------------------
725
726impl<'input> Iterator for Lexer<'input> {
727    type Item = SpannedResult<'input, usize>;
728
729    fn next(&mut self) -> Option<Self::Item> {
730        self.next_token()
731    }
732}
733
734// -----------------------------------------------------------------------------
735// lexing logic
736// -----------------------------------------------------------------------------
737
738impl<'input> Lexer<'input> {
739    fn open(&mut self, start: usize, token: Token<&'input str>) -> Spanned<'input, usize> {
740        match &token {
741            Token::LParen => self.open_parens += 1,
742            Token::LBracket => self.open_brackets += 1,
743            Token::LBrace => self.open_braces += 1,
744            _ => {}
745        }
746
747        self.token(start, token)
748    }
749
750    fn close(&mut self, start: usize, token: Token<&'input str>) -> Spanned<'input, usize> {
751        match &token {
752            Token::RParen => self.open_parens = self.open_parens.saturating_sub(1),
753            Token::RBracket => self.open_brackets = self.open_brackets.saturating_sub(1),
754            Token::RBrace => self.open_braces = self.open_braces.saturating_sub(1),
755            _ => {}
756        }
757
758        self.token(start, token)
759    }
760
761    fn token(&mut self, start: usize, token: Token<&'input str>) -> Spanned<'input, usize> {
762        (start, token, self.next_index())
763    }
764
765    fn query_end(&mut self, start: usize) -> Option<usize> {
766        match self.rquery_indices.last() {
767            Some(end) if start > 0 && start.saturating_sub(1) == *end => self.rquery_indices.pop(),
768            _ => None,
769        }
770    }
771
772    fn query_start(&mut self, start: usize) -> Result<bool, Error> {
773        // If we already opened a query for the current position, we don't want
774        // to open another one.
775        if self.rquery_indices.last() == Some(&start) {
776            return Ok(false);
777        }
778
779        // If the iterator is at the end, we don't want to open another one
780        if self.peek().is_none() {
781            return Ok(false);
782        }
783
784        // Take a clone of the existing chars iterator, to allow us to look
785        // ahead without advancing the lexer's iterator. This is cheap, since
786        // the original iterator only holds references.
787        let mut chars = self.chars.clone();
788        debug_assert!(chars.peek().is_some());
789
790        // Only continue if the current character is a valid query start
791        // character. We know there's at least one more char, given the above
792        // assertion.
793
794        let query_start_char = chars.peek().unwrap().1;
795        if !is_query_start(query_start_char) {
796            return Ok(false);
797        }
798
799        // Track if the current chain is a valid one.
800        //
801        // A valid chain consists of a target, and a path to query that target.
802        //
803        // Valid examples:
804        //
805        //   .foo         (target = external, path = .foo)
806        //   foo.bar      (target = internal, path = .bar)
807        //   { .. }.bar   (target = object, path = .bar)
808        //   [1][2]       (target = array, path = [2])
809        //
810        // Invalid examples:
811        //
812        //   foo          (target = internal, no path)
813        //   { .. }       (target = object, no path)
814        //   [1]          (target = array, no path)
815        let mut valid = false;
816
817        // Track the last char, so that we know if the next one is valid or not.
818        let mut last_char = None;
819
820        // We need to manually track for even open/close characters, to
821        // determine when the span will end.
822        let mut braces = 0;
823        let mut brackets = 0;
824        let mut parens = 0;
825
826        let mut end = 0;
827        while let Some((pos, ch)) = chars.next() {
828            let take_until_end =
829                |result: SpannedResult<'input, usize>,
830                 last_char: &mut Option<char>,
831                 end: &mut usize,
832                 chars: &mut Peekable<CharIndices<'input>>| {
833                    result.map(|(_, _, new)| {
834                        for (i, ch) in chars {
835                            *last_char = Some(ch);
836                            if i == new + pos {
837                                break;
838                            }
839                        }
840
841                        *end = pos + new;
842                    })
843                };
844
845            match ch {
846                // containers
847                '{' => braces += 1,
848                '(' => parens += 1,
849                '[' if braces == 0 && parens == 0 && brackets == 0 => {
850                    brackets += 1;
851
852                    if last_char == Some(']') {
853                        valid = true
854                    }
855
856                    if last_char == Some('}') {
857                        valid = true
858                    }
859
860                    if last_char == Some(')') {
861                        valid = true
862                    }
863
864                    if last_char.is_some_and(is_ident_continue) {
865                        valid = true
866                    }
867                }
868                '[' => brackets += 1,
869
870                // literals
871                '"' => {
872                    let result = Lexer::new(&self.input[pos + 1..]).string_literal(0);
873                    match take_until_end(result, &mut last_char, &mut end, &mut chars) {
874                        Ok(()) => continue,
875                        Err(_) => break,
876                    }
877                }
878                's' if chars.peek().map(|(_, ch)| ch) == Some(&'\'') => {
879                    let result = Lexer::new(&self.input[pos + 1..]).raw_string_literal(0);
880                    match take_until_end(result, &mut last_char, &mut end, &mut chars) {
881                        Ok(()) => continue,
882                        Err(_) => break,
883                    }
884                }
885                'r' if chars.peek().map(|(_, ch)| ch) == Some(&'\'') => {
886                    let result = Lexer::new(&self.input[pos + 1..]).regex_literal(0);
887                    match take_until_end(result, &mut last_char, &mut end, &mut chars) {
888                        Ok(()) => continue,
889                        Err(_) => break,
890                    }
891                }
892                't' if chars.peek().map(|(_, ch)| ch) == Some(&'\'') => {
893                    let result = Lexer::new(&self.input[pos + 1..]).timestamp_literal(0);
894                    match take_until_end(result, &mut last_char, &mut end, &mut chars) {
895                        Ok(()) => continue,
896                        Err(_) => break,
897                    }
898                }
899
900                '}' if braces == 0 => break,
901                '}' => braces -= 1,
902
903                ')' if parens == 0 => break,
904                ')' => parens -= 1,
905
906                ']' if brackets == 0 => break,
907                ']' => brackets -= 1,
908
909                // the lexer doesn't care about the semantic validity inside
910                // delimited regions in a query.
911                _ if braces > 0 || brackets > 0 || parens > 0 => {
912                    let (start_delim, end_delim) = if braces > 0 {
913                        ('{', '}')
914                    } else if brackets > 0 {
915                        ('[', ']')
916                    } else {
917                        ('(', ')')
918                    };
919
920                    let mut skip_delim = 0;
921                    while let Some((pos, ch)) = chars.peek() {
922                        let pos = *pos;
923
924                        let literal_check = |result: Spanned<'input, usize>, chars: &mut Peekable<CharIndices<'input>>| {
925                            let (_, _, new) = result;
926
927                            #[allow(clippy::while_let_on_iterator)]
928                            while let Some((i, _)) = chars.next() {
929                                if i == new + pos {
930                                    break;
931                                }
932                            }
933                            match chars.peek().map(|(_, ch)| ch) {
934                                Some(ch) => Ok(*ch),
935                                None => Err(()),
936                            }
937                        };
938
939                        let ch = match &self.input[pos..] {
940                            s if s.starts_with('#') => {
941                                for (_, chr) in chars.by_ref() {
942                                    if chr == '\n' {
943                                        break;
944                                    }
945                                }
946                                match chars.peek().map(|(_, ch)| ch) {
947                                    Some(ch) => *ch,
948                                    None => {
949                                        return Err(Error::UnexpectedParseError(
950                                            "Expected characters at end of comment.".to_string(),
951                                        ));
952                                    }
953                                }
954                            }
955                            s if s.starts_with('"') => {
956                                let r = Lexer::new(&self.input[pos + 1..])
957                                    .string_literal(0)
958                                    .map_err(|e| e.offset_by(pos + 1))?;
959                                match literal_check(r, &mut chars) {
960                                    Ok(ch) => ch,
961                                    Err(()) => {
962                                        // The call to lexer above should have raised an appropriate error by now,
963                                        // so these errors should only occur if there is a bug somewhere previously.
964                                        return Err(Error::UnexpectedParseError(
965                                            "Expected characters at end of string literal."
966                                                .to_string(),
967                                        ));
968                                    }
969                                }
970                            }
971                            s if s.starts_with("s'") => {
972                                let r = Lexer::new(&self.input[pos + 1..])
973                                    .raw_string_literal(0)
974                                    .map_err(|e| e.offset_by(pos + 1))?;
975                                match literal_check(r, &mut chars) {
976                                    Ok(ch) => ch,
977                                    Err(()) => {
978                                        return Err(Error::UnexpectedParseError(
979                                            "Expected characters at end of raw string literal."
980                                                .to_string(),
981                                        ));
982                                    }
983                                }
984                            }
985                            s if s.starts_with("r'") => {
986                                let r = Lexer::new(&self.input[pos + 1..])
987                                    .regex_literal(0)
988                                    .map_err(|e| e.offset_by(pos + 1))?;
989                                match literal_check(r, &mut chars) {
990                                    Ok(ch) => ch,
991                                    Err(()) => {
992                                        return Err(Error::UnexpectedParseError(
993                                            "Expected characters at end of regex literal."
994                                                .to_string(),
995                                        ));
996                                    }
997                                }
998                            }
999                            s if s.starts_with("t'") => {
1000                                let r = Lexer::new(&self.input[pos + 1..])
1001                                    .timestamp_literal(0)
1002                                    .map_err(|e| e.offset_by(pos + 1))?;
1003                                match literal_check(r, &mut chars) {
1004                                    Ok(ch) => ch,
1005                                    Err(()) => {
1006                                        return Err(Error::UnexpectedParseError(
1007                                            "Expected characters at end of timestamp literal."
1008                                                .to_string(),
1009                                        ));
1010                                    }
1011                                }
1012                            }
1013                            _ => *ch,
1014                        };
1015
1016                        if skip_delim == 0 && ch == end_delim {
1017                            break;
1018                        }
1019                        if let Some((_, c)) = chars.next() {
1020                            if c == start_delim {
1021                                skip_delim += 1;
1022                            }
1023                            if c == end_delim {
1024                                skip_delim -= 1;
1025                            }
1026                        }
1027                    }
1028                }
1029                '.' | '%' if last_char.is_none() => valid = true,
1030                '.' if last_char == Some(')') => valid = true,
1031                '.' if last_char == Some('}') => valid = true,
1032                '.' if last_char == Some(']') => valid = true,
1033                '.' if last_char == Some('"') => valid = true,
1034                '.' if last_char.is_some_and(is_ident_continue) => {
1035                    // we need to make sure we're not dealing with a float here
1036                    let digits = self.input[..pos]
1037                        .chars()
1038                        .rev()
1039                        .take_while(|ch| !ch.is_whitespace())
1040                        .all(|ch| is_digit(ch) || ch == '_');
1041
1042                    if !digits {
1043                        valid = true
1044                    }
1045                }
1046
1047                // function-call-abort
1048                '!' => {}
1049
1050                // comments
1051                '#' => {
1052                    #[allow(clippy::while_let_on_iterator)]
1053                    while let Some((pos, ch)) = chars.next() {
1054                        if ch == '\n' {
1055                            break;
1056                        }
1057                        end = pos;
1058                    }
1059                    continue;
1060                }
1061
1062                ch if is_ident_continue(ch) => {}
1063
1064                // Any other character breaks the query chain.
1065                _ => break,
1066            }
1067
1068            last_char = Some(ch);
1069            end = pos;
1070        }
1071
1072        // Skip invalid query chains
1073        if !valid {
1074            return Ok(false);
1075        }
1076
1077        // If we already tracked the current chain, we want to ignore another one.
1078        if self.rquery_indices.contains(&end) {
1079            return Ok(false);
1080        }
1081
1082        self.rquery_indices.push(end);
1083        Ok(true)
1084    }
1085
1086    fn string_literal(&mut self, start: usize) -> SpannedResult<'input, usize> {
1087        let content_start = self.next_index();
1088
1089        loop {
1090            let scan_start = self.next_index();
1091            self.take_until(scan_start, |c| c == '"' || c == '\\');
1092
1093            match self.bump() {
1094                Some((escape_start, '\\')) => self.escape_code(escape_start)?,
1095                Some((content_end, '"')) => {
1096                    let end = self.next_index();
1097                    let slice = self.slice(content_start, content_end);
1098                    let token = Token::StringLiteral(StringLiteralToken(slice));
1099                    return Ok((start, token, end));
1100                }
1101                _ => break,
1102            }
1103        }
1104
1105        Err(Error::StringLiteral { start })
1106    }
1107
1108    fn regex_literal(&mut self, start: usize) -> SpannedResult<'input, usize> {
1109        self.quoted_literal(start, Token::RegexLiteral)
1110    }
1111
1112    fn raw_string_literal(&mut self, start: usize) -> SpannedResult<'input, usize> {
1113        self.quoted_literal(start, |c| Token::RawStringLiteral(RawStringLiteralToken(c)))
1114    }
1115
1116    fn timestamp_literal(&mut self, start: usize) -> SpannedResult<'input, usize> {
1117        self.quoted_literal(start, Token::TimestampLiteral)
1118    }
1119
1120    fn numeric_literal_or_identifier(&mut self, start: usize) -> SpannedResult<'input, usize> {
1121        let (end, int) = self.take_while(start, |ch| is_digit(ch) || ch == '_');
1122
1123        let negative = self.input.get(start..=start) == Some("-");
1124        match self.peek() {
1125            Some((_, ch)) if is_ident_continue(ch) && !negative => {
1126                self.bump();
1127                let (end, ident) = self.take_while(start, is_ident_continue);
1128                Ok((start, Token::ident(ident), end))
1129            }
1130            Some((_, '.')) => {
1131                self.bump();
1132                let (end, float) = self.take_while(start, |ch| is_digit(ch) || ch == '_');
1133
1134                match float.replace('_', "").parse() {
1135                    Ok(float) => {
1136                        let float = NotNan::new(float).unwrap();
1137                        Ok((start, Token::FloatLiteral(float), end))
1138                    }
1139                    Err(err) => Err(Error::NumericLiteral {
1140                        start,
1141                        end,
1142                        error: err.to_string(),
1143                    }),
1144                }
1145            }
1146            None | Some(_) => match int.replace('_', "").parse() {
1147                Ok(int) => Ok((start, Token::IntegerLiteral(int), end)),
1148                Err(err) => Err(Error::NumericLiteral {
1149                    start,
1150                    end,
1151                    error: err.to_string(),
1152                }),
1153            },
1154        }
1155    }
1156
1157    fn identifier_or_function_call(&mut self, start: usize) -> Spanned<'input, usize> {
1158        let (end, ident) = self.take_while(start, is_ident_continue);
1159
1160        let token = if self.test_peek(|ch| ch == '(' || ch == '!') {
1161            Token::FunctionCall(ident)
1162        } else {
1163            Token::ident(ident)
1164        };
1165
1166        (start, token, end)
1167    }
1168
1169    fn operator(&mut self, start: usize) -> Spanned<'input, usize> {
1170        let (end, op) = self.take_while(start, is_operator);
1171
1172        let token = match op {
1173            "=" => Token::Equals,
1174            "|=" => Token::MergeEquals,
1175            "?" => Token::Question,
1176            op => Token::Operator(op),
1177        };
1178
1179        (start, token, end)
1180    }
1181
1182    fn quoted_literal(
1183        &mut self,
1184        start: usize,
1185        tok: impl Fn(&'input str) -> Tok<'input>,
1186    ) -> SpannedResult<'input, usize> {
1187        self.bump();
1188        let content_start = self.next_index();
1189
1190        loop {
1191            let scan_start = self.next_index();
1192            self.take_until(scan_start, |c| c == '\'' || c == '\\');
1193
1194            match self.bump() {
1195                Some((_, '\\')) => self.bump(),
1196                Some((end, '\'')) => {
1197                    let content = self.slice(content_start, end);
1198                    let token = tok(content);
1199                    let end = self.next_index();
1200
1201                    return Ok((start, token, end));
1202                }
1203                _ => break,
1204            };
1205        }
1206
1207        Err(Error::Literal { start })
1208    }
1209}
1210
1211// -----------------------------------------------------------------------------
1212// lexing helpers
1213// -----------------------------------------------------------------------------
1214
1215impl<'input> Lexer<'input> {
1216    pub(crate) fn new(input: &'input str) -> Lexer<'input> {
1217        Self {
1218            input,
1219            chars: input.char_indices().peekable(),
1220            open_braces: 0,
1221            open_brackets: 0,
1222            open_parens: 0,
1223            rquery_indices: vec![],
1224            query_start: None,
1225        }
1226    }
1227
1228    fn bump(&mut self) -> Option<(usize, char)> {
1229        self.chars.next()
1230    }
1231
1232    fn peek(&mut self) -> Option<(usize, char)> {
1233        self.chars.peek().copied()
1234    }
1235
1236    fn take_while<F>(&mut self, start: usize, mut keep_going: F) -> (usize, &'input str)
1237    where
1238        F: FnMut(char) -> bool,
1239    {
1240        self.take_until(start, |c| !keep_going(c))
1241    }
1242
1243    fn take_until<F>(&mut self, start: usize, mut terminate: F) -> (usize, &'input str)
1244    where
1245        F: FnMut(char) -> bool,
1246    {
1247        while let Some((end, ch)) = self.peek() {
1248            if terminate(ch) {
1249                return (end, self.slice(start, end));
1250            }
1251
1252            self.bump();
1253        }
1254
1255        let loc = self.next_index();
1256
1257        (loc, self.slice(start, loc))
1258    }
1259
1260    fn test_peek<F>(&mut self, mut test: F) -> bool
1261    where
1262        F: FnMut(char) -> bool,
1263    {
1264        self.peek().is_some_and(|(_, ch)| test(ch))
1265    }
1266
1267    fn slice(&self, start: usize, end: usize) -> &'input str {
1268        &self.input[start..end]
1269    }
1270
1271    fn next_index(&mut self) -> usize {
1272        self.peek().as_ref().map_or(self.input.len(), |l| l.0)
1273    }
1274
1275    /// Returns Ok if the next char is a valid escape code.
1276    fn escape_code(&mut self, start: usize) -> Result<(), Error> {
1277        match self.bump() {
1278            Some((_, '\n' | '\'' | '"' | '\\' | 'n' | 'r' | 't' | '{' | '}' | '0')) => Ok(()),
1279            Some((start, ch)) => Err(Error::EscapeChar {
1280                start,
1281                ch: Some(ch),
1282            }),
1283            None => Err(Error::EscapeChar { start, ch: None }),
1284        }
1285    }
1286}
1287
1288// -----------------------------------------------------------------------------
1289// generic helpers
1290// -----------------------------------------------------------------------------
1291
1292fn is_ident_start(ch: char) -> bool {
1293    matches!(ch, '@' | '_' | 'a'..='z' | 'A'..='Z')
1294}
1295
1296fn is_ident_continue(ch: char) -> bool {
1297    match ch {
1298        '0'..='9' => true,
1299        ch => is_ident_start(ch),
1300    }
1301}
1302
1303fn is_query_start(ch: char) -> bool {
1304    match ch {
1305        '%' | '.' | '{' | '[' => true,
1306        ch => is_ident_start(ch),
1307    }
1308}
1309
1310fn is_digit(ch: char) -> bool {
1311    ch.is_ascii_digit()
1312}
1313
1314pub(crate) fn is_operator(ch: char) -> bool {
1315    matches!(
1316        ch,
1317        '!' | '&' | '*' | '+' | '-' | '/' | '<' | '=' | '>' | '?' | '|'
1318    )
1319}
1320
1321fn unescape_string_literal(mut s: &str) -> String {
1322    let mut string = String::with_capacity(s.len());
1323    while let Some(i) = s.bytes().position(|b| b == b'\\') {
1324        let next = s.as_bytes()[i + 1];
1325        if next == b'\n' {
1326            // Remove the \n and any ensuing spaces or tabs
1327            string.push_str(&s[..i]);
1328            let remaining = &s[i + 2..];
1329            let whitespace: usize = remaining
1330                .chars()
1331                .take_while(|c| c.is_whitespace())
1332                .map(char::len_utf8)
1333                .sum();
1334            s = &s[i + whitespace + 2..];
1335        } else {
1336            let c = match next {
1337                b'\'' => '\'',
1338                b'"' => '"',
1339                b'\\' => '\\',
1340                b'n' => '\n',
1341                b'r' => '\r',
1342                b't' => '\t',
1343                b'0' => '\0',
1344                b'{' => '{',
1345                _ => unimplemented!("invalid escape"),
1346            };
1347
1348            string.push_str(&s[..i]);
1349            string.push(c);
1350            s = &s[i + 2..];
1351        }
1352    }
1353
1354    string.push_str(s);
1355    string
1356}
1357
1358#[cfg(test)]
1359mod test {
1360    #![allow(clippy::print_stdout)] // tests
1361
1362    use super::super::lex::Token::{
1363        Arrow, Bang, Colon, Comma, Dot, Else, Equals, FloatLiteral, FunctionCall, Identifier, If,
1364        IntegerLiteral, LBrace, LBracket, LParen, LQuery, Newline, Operator, PathField, Percent,
1365        RBrace, RBracket, RParen, RQuery, RawStringLiteral, RegexLiteral, StringLiteral,
1366        TimestampLiteral, True,
1367    };
1368    use super::*;
1369
1370    fn lexer(input: &str) -> impl Iterator<Item = SpannedResult<'_, usize>> + '_ {
1371        let mut lexer = Lexer::new(input);
1372        Box::new(std::iter::from_fn(move || lexer.next()))
1373    }
1374
1375    // only exists to visually align assertions with inputs in tests
1376    fn data(source: &str) -> &str {
1377        source
1378    }
1379
1380    fn test(input: &str, expected: Vec<(&str, Tok<'_>)>) {
1381        let mut lexer = lexer(input);
1382        let mut count = 0;
1383        let length = expected.len();
1384        for (token, (expected_span, expected_tok)) in lexer.by_ref().zip(expected.into_iter()) {
1385            count += 1;
1386            println!("{token:?}");
1387            let start = expected_span.find('~').unwrap_or_default();
1388            let end = expected_span.rfind('~').map(|i| i + 1).unwrap_or_default();
1389
1390            let expect = (start, expected_tok, end);
1391            assert_eq!(Ok(expect), token);
1392        }
1393
1394        assert_eq!(count, length);
1395        assert!(count > 0);
1396        assert!(lexer.next().is_none());
1397    }
1398
1399    #[test]
1400    fn test_1() {
1401        test(
1402            data("%foo"),
1403            vec![
1404                ("~   ", LQuery),
1405                ("~   ", Percent),
1406                (" ~~~", Identifier("foo")),
1407                ("   ~", RQuery),
1408            ],
1409        );
1410    }
1411
1412    #[test]
1413    fn test_2() {
1414        test(
1415            data("%@foo"),
1416            vec![
1417                ("~    ", LQuery),
1418                ("~    ", Percent),
1419                (" ~~~~", PathField("@foo")),
1420                ("    ~", RQuery),
1421            ],
1422        );
1423    }
1424
1425    #[test]
1426    fn test_3() {
1427        test(
1428            data("%foo[%bar]"),
1429            vec![
1430                ("~    ", LQuery),
1431                ("~    ", Percent),
1432                (" ~~~ ", Identifier("foo")),
1433                ("    ~ ", LBracket),
1434                ("     ~ ", LQuery),
1435                ("     ~ ", Percent),
1436                ("      ~~~ ", Identifier("bar")),
1437                ("        ~", RQuery),
1438                ("         ~ ", RBracket),
1439                ("         ~", RQuery),
1440            ],
1441        );
1442    }
1443
1444    #[test]
1445    fn test_4() {
1446        test(
1447            data("%foo.@bar"),
1448            vec![
1449                ("~    ", LQuery),
1450                ("~    ", Percent),
1451                (" ~~~ ", Identifier("foo")),
1452                ("    ~ ", Dot),
1453                ("     ~~~~ ", PathField("@bar")),
1454                ("        ~", RQuery),
1455            ],
1456        );
1457    }
1458
1459    #[test]
1460    fn test_5() {
1461        test(
1462            data(".(a|b)"),
1463            vec![
1464                ("~    ", LQuery),
1465                ("~    ", Dot),
1466                (" ~ ", LParen),
1467                ("  ~ ", Identifier("a")),
1468                ("   ~ ", Operator("|")),
1469                ("    ~ ", Identifier("b")),
1470                ("     ~ ", RParen),
1471                ("     ~ ", RQuery),
1472            ],
1473        );
1474    }
1475
1476    #[test]
1477    fn test_6() {
1478        test(
1479            data(".(@a|b)"),
1480            vec![
1481                ("~    ", LQuery),
1482                ("~    ", Dot),
1483                (" ~ ", LParen),
1484                ("  ~~ ", PathField("@a")),
1485                ("    ~ ", Operator("|")),
1486                ("     ~ ", Identifier("b")),
1487                ("      ~ ", RParen),
1488                ("      ~ ", RQuery),
1489            ],
1490        );
1491    }
1492
1493    #[test]
1494    fn unterminated_literal_errors() {
1495        let mut lexer = Lexer::new("a(m, r')");
1496        assert_eq!(Some(Err(Error::Literal { start: 6 })), lexer.next());
1497    }
1498
1499    #[test]
1500    fn invalid_grok_pattern() {
1501        // Grok pattern has an invalid escape char -> `\]`
1502        let mut lexer = Lexer::new(
1503            r#"parse_grok!("1.2.3.4 - - [23/Mar/2021:06:46:35 +0000]", "%{IPORHOST:remote_ip} %{USER:ident} %{USER:user_name} \[%{HTTPDATE:timestamp}\]""#,
1504        );
1505        assert_eq!(
1506            Some(Err(Error::EscapeChar {
1507                start: 112,
1508                ch: Some('[')
1509            })),
1510            lexer.next()
1511        );
1512    }
1513
1514    #[test]
1515    #[rustfmt::skip]
1516    fn string_literals() {
1517        use StringLiteralToken as S;
1518        use StringLiteral as L;
1519
1520        test(
1521            data(r#"foo "bar\"\n" baz "" "\t" "\"\"" "null \0""#),
1522            vec![
1523                ("~~~                                       ", Identifier("foo")),
1524                ("    ~~~~~~~~~                             ", L(S("bar\\\"\\n"))),
1525                ("              ~~~                         ", Identifier("baz")),
1526                ("                  ~~                      ", L(S(""))),
1527                ("                     ~~~~                 ", L(S("\\t"))),
1528                ("                          ~~~~~~          ", L(S(r#"\"\""#))),
1529                ("                                 ~~~~~~~~~", L(S("null \\0"))),
1530            ],
1531        );
1532        assert_eq!(TemplateString(vec![StringSegment::Literal(r#""""#.to_string(), Span::new(1, 5))]), StringLiteralToken(r#"\"\""#).template(Span::new(0, 6)));
1533    }
1534
1535    #[test]
1536    fn multiline_string_literals() {
1537        let mut lexer = lexer(
1538            r#""foo \
1539                bar""#,
1540        );
1541
1542        match lexer.next() {
1543            Some(Ok((_, StringLiteral(s), _))) => assert_eq!(
1544                TemplateString(vec![StringSegment::Literal(
1545                    "foo bar".to_string(),
1546                    Span::new(1, 26)
1547                )]),
1548                s.template(Span::new(0, 26))
1549            ),
1550            _ => panic!("Not a string literal"),
1551        }
1552    }
1553
1554    #[test]
1555    fn string_literal_unexpected_escape_code() {
1556        assert_eq!(
1557            lexer(r#""\X""#).last(),
1558            Some(Err(Error::StringLiteral { start: 3 }))
1559        );
1560    }
1561
1562    #[test]
1563    fn string_literal_unterminated() {
1564        assert_eq!(
1565            lexer(r#"foo "bar\"\n baz"#).last(),
1566            Some(Err(Error::StringLiteral { start: 4 }))
1567        );
1568    }
1569
1570    #[test]
1571    #[rustfmt::skip]
1572    fn regex_literals() {
1573        test(
1574            data(r"r'[fb]oo+' r'a/b\[rz\]' r''"),
1575            vec![
1576                ("~~~~~~~~~~                 ", RegexLiteral("[fb]oo+")),
1577                ("           ~~~~~~~~~~~~    ", RegexLiteral("a/b\\[rz\\]")),
1578                ("                        ~~~", RegexLiteral("")),
1579            ],
1580        );
1581    }
1582
1583    #[test]
1584    fn regex_literal_unterminated() {
1585        assert_eq!(
1586            lexer("r'foo bar").last(),
1587            Some(Err(Error::Literal { start: 0 }))
1588        );
1589    }
1590
1591    #[test]
1592    #[rustfmt::skip]
1593    fn timestamp_literals() {
1594        test(
1595            data(r"t'foo \' bar'"),
1596            vec![
1597                ("~~~~~~~~~~~~~", TimestampLiteral("foo \\' bar")),
1598            ],
1599        );
1600    }
1601
1602    #[test]
1603    fn timestamp_literal_unterminated() {
1604        assert_eq!(
1605            lexer("t'foo").last(),
1606            Some(Err(Error::Literal { start: 0 }))
1607        );
1608    }
1609
1610    #[test]
1611    #[rustfmt::skip]
1612    fn raw_string_literals() {
1613        use RawStringLiteralToken as S;
1614        use RawStringLiteral as R;
1615
1616        test(
1617            data(r#"s'a "bc" \n \'d'"#),
1618            vec![
1619                ("~~~~~~~~~~~~~~~~", R(S(r#"a "bc" \n \'d"#))),
1620            ],
1621        );
1622    }
1623
1624    #[test]
1625    fn raw_string_literal_unterminated() {
1626        assert_eq!(
1627            lexer("s'foo").last(),
1628            Some(Err(Error::Literal { start: 0 }))
1629        );
1630    }
1631
1632    #[test]
1633    #[rustfmt::skip]
1634    fn number_literals() {
1635        test(
1636            data("12 012 12.43 12. 0 902.0001"),
1637            vec![
1638                ("~~                         ", IntegerLiteral(12)),
1639                ("   ~~~                     ", IntegerLiteral(12)),
1640                ("       ~~~~~               ", FloatLiteral(NotNan::new(12.43).unwrap())),
1641                ("             ~~~           ", FloatLiteral(NotNan::new(12.0).unwrap())),
1642                ("                 ~         ", IntegerLiteral(0)),
1643                ("                   ~~~~~~~~", FloatLiteral(NotNan::new(902.0001).unwrap())),
1644            ],
1645        );
1646    }
1647
1648    #[test]
1649    #[rustfmt::skip]
1650    fn number_literals_underscore() {
1651        test(
1652            data("1_000 1_2_3._4_0_"),
1653            vec![
1654                ("~~~~~            ", IntegerLiteral(1000)),
1655                ("      ~~~~~~~~~~~", FloatLiteral(NotNan::new(123.40).unwrap())),
1656            ],
1657        );
1658    }
1659
1660    #[test]
1661    fn identifiers() {
1662        test(
1663            data("foo bar1 if baz_12_qux else "),
1664            vec![
1665                ("~~~                         ", Identifier("foo")),
1666                ("    ~~~~                    ", Identifier("bar1")),
1667                ("         ~~                 ", If),
1668                ("            ~~~~~~~~~~      ", Identifier("baz_12_qux")),
1669                ("                       ~~~~ ", Else),
1670            ],
1671        );
1672    }
1673
1674    #[test]
1675    fn function_calls() {
1676        test(
1677            data("foo() bar_1() if() "),
1678            vec![
1679                ("~~~                ", FunctionCall("foo")),
1680                ("   ~               ", LParen),
1681                ("    ~              ", RParen),
1682                ("      ~~~~~        ", FunctionCall("bar_1")),
1683                ("           ~       ", LParen),
1684                ("            ~      ", RParen),
1685                ("              ~~   ", FunctionCall("if")),
1686                ("                ~  ", LParen),
1687                ("                 ~ ", RParen),
1688            ],
1689        );
1690    }
1691
1692    #[test]
1693    fn single_query() {
1694        test(
1695            data("."),
1696            vec![
1697                //
1698                ("~", LQuery),
1699                ("~", Dot),
1700                ("~", RQuery),
1701            ],
1702        );
1703    }
1704
1705    #[test]
1706    fn root_query() {
1707        test(
1708            data(". .foo . .bar ."),
1709            vec![
1710                ("~              ", LQuery),
1711                ("~              ", Dot),
1712                ("~              ", RQuery),
1713                ("  ~            ", LQuery),
1714                ("  ~            ", Dot),
1715                ("   ~~~         ", Identifier("foo")),
1716                ("     ~         ", RQuery),
1717                ("       ~       ", LQuery),
1718                ("       ~       ", Dot),
1719                ("       ~       ", RQuery),
1720                ("         ~     ", LQuery),
1721                ("         ~     ", Dot),
1722                ("          ~~~  ", Identifier("bar")),
1723                ("            ~  ", RQuery),
1724                ("              ~", LQuery),
1725                ("              ~", Dot),
1726                ("              ~", RQuery),
1727            ],
1728        );
1729    }
1730
1731    #[test]
1732    fn at_sign_in_query() {
1733        test(
1734            data(".@foo .bar.@ook"),
1735            vec![
1736                ("~              ", LQuery),
1737                ("~              ", Dot),
1738                (" ~~~~          ", PathField("@foo")),
1739                ("    ~          ", RQuery),
1740                ("      ~        ", LQuery),
1741                ("      ~        ", Dot),
1742                ("       ~~~     ", Identifier("bar")),
1743                ("          ~    ", Dot),
1744                ("           ~~~~", PathField("@ook")),
1745                ("              ~", RQuery),
1746            ],
1747        );
1748    }
1749
1750    #[test]
1751    fn queries() {
1752        test(
1753            data(".foo bar.baz .baz.qux"),
1754            vec![
1755                ("~                    ", LQuery),
1756                ("~                    ", Dot),
1757                (" ~~~                 ", Identifier("foo")),
1758                ("   ~                 ", RQuery),
1759                ("     ~               ", LQuery),
1760                ("     ~~~             ", Identifier("bar")),
1761                ("        ~            ", Dot),
1762                ("         ~~~         ", Identifier("baz")),
1763                ("           ~         ", RQuery),
1764                ("             ~       ", LQuery),
1765                ("             ~       ", Dot),
1766                ("              ~~~    ", Identifier("baz")),
1767                ("                 ~   ", Dot),
1768                ("                  ~~~", Identifier("qux")),
1769                ("                    ~", RQuery),
1770            ],
1771        );
1772    }
1773
1774    #[test]
1775    #[rustfmt::skip]
1776    fn nested_queries() {
1777        use StringLiteralToken as S;
1778        use StringLiteral as L;
1779
1780        test(
1781            data(r#"[.foo].bar { "foo": [2][0] }"#),
1782            vec![
1783                ("~                           ", LQuery),
1784                ("~                           ", LBracket),
1785                (" ~                          ", LQuery),
1786                (" ~                          ", Dot),
1787                ("  ~~~                       ", Identifier("foo")),
1788                ("    ~                       ", RQuery),
1789                ("     ~                      ", RBracket),
1790                ("      ~                     ", Dot),
1791                ("       ~~~                  ", Identifier("bar")),
1792                ("         ~                  ", RQuery),
1793                ("           ~                ", LBrace),
1794                ("             ~~~~~          ", L(S("foo"))),
1795                ("                  ~         ", Colon),
1796                ("                    ~       ", LQuery),
1797                ("                    ~       ", LBracket),
1798                ("                     ~      ", IntegerLiteral(2)),
1799                ("                      ~     ", RBracket),
1800                ("                       ~    ", LBracket),
1801                ("                        ~   ", IntegerLiteral(0)),
1802                ("                         ~  ", RBracket),
1803                ("                         ~  ", RQuery),
1804                ("                           ~", RBrace),
1805            ],
1806        );
1807    }
1808
1809    #[test]
1810    fn complex_query_1() {
1811        use StringLiteral as L;
1812        use StringLiteralToken as S;
1813
1814        test(
1815            data(r#".a.(b | c  )."d\"e"[2 ][ 1]"#),
1816            vec![
1817                ("~                          ", LQuery),
1818                ("~                          ", Dot),
1819                (" ~                         ", Identifier("a")),
1820                ("  ~                        ", Dot),
1821                ("   ~                       ", LParen),
1822                ("    ~                      ", Identifier("b")),
1823                ("      ~                    ", Operator("|")),
1824                ("        ~                  ", Identifier("c")),
1825                ("           ~               ", RParen),
1826                ("            ~              ", Dot),
1827                ("             ~~~~~~        ", L(S("d\\\"e"))),
1828                ("                   ~       ", LBracket),
1829                ("                    ~      ", IntegerLiteral(2)),
1830                ("                      ~    ", RBracket),
1831                ("                       ~   ", LBracket),
1832                ("                         ~ ", IntegerLiteral(1)),
1833                ("                          ~", RBracket),
1834            ],
1835        );
1836    }
1837
1838    #[test]
1839    #[rustfmt::skip]
1840    fn complex_query_2() {
1841        use StringLiteralToken as S;
1842        use StringLiteral as L;
1843
1844        test(
1845            data(r#"{ "a": parse_json!("{ \"b\": 0 }").c }"#),
1846            vec![
1847                ("~                                     ", LBrace),
1848                ("  ~~~                                 ", L(S("a"))),
1849                ("     ~                                ", Colon),
1850                ("       ~                              ", LQuery),
1851                ("       ~~~~~~~~~~                     ", FunctionCall("parse_json")),
1852                ("                 ~                    ", Bang),
1853                ("                  ~                   ", LParen),
1854                ("                   ~~~~~~~~~~~~~~     ", L(S("{ \\\"b\\\": 0 }"))),
1855                ("                                 ~    ", RParen),
1856                ("                                  ~   ", Dot),
1857                ("                                   ~  ", Identifier("c")),
1858                ("                                   ~  ", RQuery),
1859                ("                                     ~", RBrace),
1860            ],
1861        );
1862    }
1863
1864    #[test]
1865    #[rustfmt::skip]
1866    fn query_with_literals() {
1867        use StringLiteralToken as S;
1868        use RawStringLiteralToken as RS;
1869        use StringLiteral as L;
1870        use RawStringLiteral as R;
1871
1872        test(
1873            data(r#"{ "a": r'b?c', "d": s'"e"\'f', "g": t'1.0T0' }.h"#),
1874            vec![
1875                ("~                                               ", LQuery),
1876                ("~                                               ", LBrace),
1877                ("  ~~~                                           ", L(S("a"))),
1878                ("     ~                                          ", Colon),
1879                ("       ~~~~~~                                   ", RegexLiteral("b?c")),
1880                ("             ~                                  ", Comma),
1881                ("               ~~~                              ", L(S("d"))),
1882                ("                  ~                             ", Colon),
1883                ("                    ~~~~~~~~~                   ", R(RS("\"e\"\\\'f"))),
1884                ("                             ~                  ", Comma),
1885                ("                               ~~~              ", L(S("g"))),
1886                ("                                  ~             ", Colon),
1887                ("                                    ~~~~~~~~    ", TimestampLiteral("1.0T0")),
1888                ("                                             ~  ", RBrace),
1889                ("                                              ~ ", Dot),
1890                ("                                               ~", Identifier("h")),
1891                ("                                               ~", RQuery),
1892            ],
1893        );
1894    }
1895
1896    #[test]
1897    fn variable_queries() {
1898        test(
1899            data("foo.bar foo[2]"),
1900            vec![
1901                ("~             ", LQuery),
1902                ("~~~           ", Identifier("foo")),
1903                ("   ~          ", Dot),
1904                ("    ~~~       ", Identifier("bar")),
1905                ("      ~       ", RQuery),
1906                ("        ~     ", LQuery),
1907                ("        ~~~   ", Identifier("foo")),
1908                ("           ~  ", LBracket),
1909                ("            ~ ", IntegerLiteral(2)),
1910                ("             ~", RBracket),
1911                ("             ~", RQuery),
1912            ],
1913        );
1914    }
1915
1916    #[test]
1917    fn object_queries() {
1918        use StringLiteral as L;
1919        use StringLiteralToken as S;
1920
1921        test(
1922            data(r#"{ "foo": "bar" }.foo"#),
1923            vec![
1924                ("~                   ", LQuery),
1925                ("~                   ", LBrace),
1926                ("  ~~~~~             ", L(S("foo"))),
1927                ("       ~            ", Colon),
1928                ("         ~~~~~      ", L(S("bar"))),
1929                ("               ~    ", RBrace),
1930                ("                ~   ", Dot),
1931                ("                 ~~~", Identifier("foo")),
1932                ("                   ~", RQuery),
1933            ],
1934        );
1935    }
1936
1937    #[test]
1938    fn array_queries() {
1939        test(
1940            data("[ 1, 2 , 3].foo"),
1941            vec![
1942                ("~              ", LQuery),
1943                ("~              ", LBracket),
1944                ("  ~            ", IntegerLiteral(1)),
1945                ("   ~           ", Comma),
1946                ("     ~         ", IntegerLiteral(2)),
1947                ("       ~       ", Comma),
1948                ("         ~     ", IntegerLiteral(3)),
1949                ("          ~    ", RBracket),
1950                ("           ~   ", Dot),
1951                ("            ~~~", Identifier("foo")),
1952                ("              ~", RQuery),
1953            ],
1954        );
1955    }
1956
1957    #[test]
1958    fn function_call_queries() {
1959        use StringLiteral as L;
1960        use StringLiteralToken as S;
1961
1962        test(
1963            data(r#"foo(ab: "c")[2].d"#),
1964            vec![
1965                ("~                ", LQuery),
1966                ("~~~              ", FunctionCall("foo")),
1967                ("   ~             ", LParen),
1968                ("    ~~           ", Identifier("ab")),
1969                ("      ~          ", Colon),
1970                ("        ~~~      ", L(S("c"))),
1971                ("           ~     ", RParen),
1972                ("            ~    ", LBracket),
1973                ("             ~   ", IntegerLiteral(2)),
1974                ("              ~  ", RBracket),
1975                ("               ~ ", Dot),
1976                ("                ~", Identifier("d")),
1977                ("                ~", RQuery),
1978            ],
1979        );
1980    }
1981
1982    #[test]
1983    fn queries_in_array() {
1984        test(
1985            data("[foo[0]]"),
1986            vec![
1987                ("~       ", LBracket),
1988                (" ~      ", LQuery),
1989                (" ~~~    ", Identifier("foo")),
1990                ("    ~   ", LBracket),
1991                ("     ~  ", IntegerLiteral(0)),
1992                ("      ~ ", RBracket),
1993                ("      ~ ", RQuery),
1994                ("       ~", RBracket),
1995            ],
1996        );
1997    }
1998
1999    #[test]
2000    fn queries_op() {
2001        test(
2002            data(".a + 3 .b == true"),
2003            vec![
2004                ("~                ", LQuery),
2005                ("~                ", Dot),
2006                (" ~               ", Identifier("a")),
2007                (" ~               ", RQuery),
2008                ("   ~             ", Operator("+")),
2009                ("     ~           ", IntegerLiteral(3)),
2010                ("       ~         ", LQuery),
2011                ("       ~         ", Dot),
2012                ("        ~        ", Identifier("b")),
2013                ("        ~        ", RQuery),
2014                ("          ~~     ", Operator("==")),
2015                ("             ~~~~", True),
2016            ],
2017        );
2018    }
2019
2020    #[test]
2021    fn invalid_queries() {
2022        test(
2023            data(".foo.\n"),
2024            vec![
2025                ("~      ", LQuery),
2026                ("~      ", Dot),
2027                (" ~~~   ", Identifier("foo")),
2028                ("    ~  ", Dot),
2029                ("    ~  ", RQuery),
2030                ("     ~ ", Newline),
2031            ],
2032        );
2033    }
2034
2035    #[test]
2036    fn queries_in_multiline() {
2037        test(
2038            data(".foo\n.bar = true"),
2039            vec![
2040                ("~               ", LQuery),
2041                ("~               ", Dot),
2042                (" ~~~            ", Identifier("foo")),
2043                ("   ~            ", RQuery),
2044                ("    ~           ", Newline),
2045                ("     ~          ", LQuery),
2046                ("     ~          ", Dot),
2047                ("      ~~~       ", Identifier("bar")),
2048                ("        ~       ", RQuery),
2049                ("          ~     ", Equals),
2050                ("            ~~~~", True),
2051            ],
2052        );
2053    }
2054
2055    #[test]
2056    #[rustfmt::skip]
2057    fn quoted_path_queries() {
2058        use StringLiteralToken as S;
2059        use StringLiteral as L;
2060
2061        test(
2062            data(r#"."parent.key.with.special characters".child"#),
2063            vec![
2064                ("~                                          ", LQuery),
2065                ("~                                          ", Dot),
2066                (" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~      ", L(S("parent.key.with.special characters"))),
2067                ("                                     ~     ", Dot),
2068                ("                                      ~~~~~", Identifier("child")),
2069                ("                                          ~", RQuery),
2070            ],
2071        );
2072    }
2073
2074    #[test]
2075    fn queries_digit_path() {
2076        test(
2077            data(".0foo foo.00_7bar.tar"),
2078            vec![
2079                ("~                    ", LQuery),
2080                ("~                    ", Dot),
2081                (" ~~~~                ", Identifier("0foo")),
2082                ("    ~                ", RQuery),
2083                ("      ~              ", LQuery),
2084                ("      ~~~            ", Identifier("foo")),
2085                ("         ~           ", Dot),
2086                ("          ~~~~~~~    ", Identifier("00_7bar")),
2087                ("                 ~   ", Dot),
2088                ("                  ~~~", Identifier("tar")),
2089                ("                    ~", RQuery),
2090            ],
2091        );
2092    }
2093
2094    #[test]
2095    fn queries_nested_delims() {
2096        use StringLiteral as L;
2097        use StringLiteralToken as S;
2098
2099        test(
2100            data(r#"{ "foo": [true] }.foo[0]"#),
2101            vec![
2102                ("~                       ", LQuery),
2103                ("~                       ", LBrace),
2104                ("  ~~~~~                 ", L(S("foo"))),
2105                ("       ~                ", Colon),
2106                ("         ~              ", LBracket),
2107                ("          ~~~~          ", True),
2108                ("              ~         ", RBracket),
2109                ("                ~       ", RBrace),
2110                ("                 ~      ", Dot),
2111                ("                  ~~~   ", Identifier("foo")),
2112                ("                     ~  ", LBracket),
2113                ("                      ~ ", IntegerLiteral(0)),
2114                ("                       ~", RBracket),
2115                ("                       ~", RQuery),
2116            ],
2117        );
2118    }
2119
2120    #[test]
2121    fn queries_negative_index() {
2122        test(
2123            data("v[-1] = 2"),
2124            vec![
2125                ("~        ", LQuery),
2126                ("~        ", Identifier("v")),
2127                (" ~       ", LBracket),
2128                ("  ~~     ", IntegerLiteral(-1)),
2129                ("    ~    ", RBracket),
2130                ("    ~    ", RQuery),
2131                ("      ~  ", Equals),
2132                ("        ~", IntegerLiteral(2)),
2133            ],
2134        );
2135    }
2136
2137    #[test]
2138    fn multi_byte_character_1() {
2139        use RawStringLiteral as R;
2140        use RawStringLiteralToken as RS;
2141
2142        test(
2143            data("a * s'漢字' * a"),
2144            vec![
2145                ("~                ", Identifier("a")),
2146                ("  ~              ", Operator("*")),
2147                ("    ~~~~~~~~~    ", R(RS("漢字"))),
2148                ("              ~  ", Operator("*")),
2149                ("                ~", Identifier("a")),
2150            ],
2151        );
2152    }
2153
2154    #[test]
2155    fn multi_byte_character_2() {
2156        use RawStringLiteral as R;
2157        use RawStringLiteralToken as RS;
2158
2159        test(
2160            data("a * s'¡' * a"),
2161            vec![
2162                ("~            ", Identifier("a")),
2163                ("  ~          ", Operator("*")),
2164                ("    ~~~~~    ", R(RS("¡"))),
2165                ("          ~  ", Operator("*")),
2166                ("            ~", Identifier("a")),
2167            ],
2168        );
2169    }
2170
2171    #[test]
2172    fn comment_in_block() {
2173        test(
2174            data("if x {\n   # It's an apostrophe.\n   3\n}"),
2175            vec![
2176                ("~~                                    ", If),
2177                ("   ~                                  ", Identifier("x")),
2178                ("     ~                                ", LBrace),
2179                ("      ~                               ", Newline),
2180                ("                               ~      ", Newline),
2181                ("                                   ~  ", IntegerLiteral(3)),
2182                ("                                    ~ ", Newline),
2183                ("                                     ~", RBrace),
2184            ],
2185        );
2186    }
2187
2188    #[test]
2189    fn unescape_string_literal() {
2190        let string = StringLiteralToken("zork {{ zonk }} zoog");
2191        assert_eq!(
2192            TemplateString(vec![
2193                StringSegment::Literal("zork ".to_string(), Span::new(1, 6)),
2194                StringSegment::Template("zonk".to_string(), Span::new(6, 16)),
2195                StringSegment::Literal(" zoog".to_string(), Span::new(16, 21)),
2196            ]),
2197            string.template(Span::new(0, 22))
2198        );
2199    }
2200
2201    #[test]
2202    fn function_closure_no_arg() {
2203        test(
2204            data("foo() -> || {}"),
2205            vec![
2206                ("~~~           ", FunctionCall("foo")),
2207                ("   ~          ", LParen),
2208                ("    ~         ", RParen),
2209                ("      ~~      ", Arrow),
2210                ("         ~~   ", Operator("||")),
2211                ("            ~ ", LBrace),
2212                ("             ~", RBrace),
2213            ],
2214        );
2215    }
2216
2217    #[test]
2218    fn function_closure_single_arg() {
2219        test(
2220            data("foo() -> |idx| { idx }"),
2221            vec![
2222                ("~~~                   ", FunctionCall("foo")),
2223                ("   ~                  ", LParen),
2224                ("    ~                 ", RParen),
2225                ("      ~~              ", Arrow),
2226                ("         ~            ", Operator("|")),
2227                ("          ~~~         ", Identifier("idx")),
2228                ("             ~        ", Operator("|")),
2229                ("               ~      ", LBrace),
2230                ("                 ~~~  ", Identifier("idx")),
2231                ("                     ~", RBrace),
2232            ],
2233        );
2234    }
2235
2236    #[test]
2237    fn function_closure_args() {
2238        test(
2239            data("foo() -> |i, v| { v }"),
2240            vec![
2241                ("~~~                  ", FunctionCall("foo")),
2242                ("   ~                 ", LParen),
2243                ("    ~                ", RParen),
2244                ("      ~~             ", Arrow),
2245                ("         ~           ", Operator("|")),
2246                ("          ~          ", Identifier("i")),
2247                ("           ~         ", Comma),
2248                ("             ~       ", Identifier("v")),
2249                ("              ~      ", Operator("|")),
2250                ("                ~    ", LBrace),
2251                ("                  ~  ", Identifier("v")),
2252                ("                    ~", RBrace),
2253            ],
2254        );
2255    }
2256}