1use std::{fmt, iter::Peekable, str::CharIndices};
2
3use crate::diagnostic::{DiagnosticMessage, Label, Span};
4use ordered_float::NotNan;
5
6use super::template_string::{StringSegment, TemplateString};
7
8pub(crate) type Tok<'input> = Token<&'input str>;
9pub(crate) type SpannedResult<'input, Loc> = Result<Spanned<'input, Loc>, Error>;
10pub(crate) type Spanned<'input, Loc> = (Loc, Tok<'input>, Loc);
11
12#[derive(thiserror::Error, Clone, Debug, PartialEq, Eq)]
13pub enum Error {
14 #[error("syntax error")]
15 ParseError {
16 span: Span,
17 source: lalrpop_util::ParseError<usize, Token<String>, String>,
18 dropped_tokens: Vec<(usize, Token<String>, usize)>,
19 },
20
21 #[error("reserved keyword")]
22 ReservedKeyword {
23 start: usize,
24 keyword: String,
25 end: usize,
26 },
27
28 #[error("invalid numeric literal")]
29 NumericLiteral {
30 start: usize,
31 error: String,
32 end: usize,
33 },
34
35 #[error("invalid string literal")]
36 StringLiteral { start: usize },
37
38 #[error("invalid literal")]
39 Literal { start: usize },
40
41 #[error("invalid escape character: \\{}", .ch.unwrap_or_default())]
42 EscapeChar { start: usize, ch: Option<char> },
43
44 #[error("unexpected parse error")]
45 UnexpectedParseError(String),
46}
47
48impl Error {
49 fn offset_by(self, offset: usize) -> Self {
53 match self {
54 Error::ParseError {
55 span,
56 source,
57 dropped_tokens,
58 } => Error::ParseError {
59 span: Span::new(span.start() + offset, span.end() + offset),
60 source,
61 dropped_tokens,
62 },
63 Error::ReservedKeyword {
64 start,
65 keyword,
66 end,
67 } => Error::ReservedKeyword {
68 start: start + offset,
69 keyword,
70 end: end + offset,
71 },
72 Error::NumericLiteral { start, error, end } => Error::NumericLiteral {
73 start: start + offset,
74 error,
75 end: end + offset,
76 },
77 Error::StringLiteral { start } => Error::StringLiteral {
78 start: start + offset,
79 },
80 Error::Literal { start } => Error::Literal {
81 start: start + offset,
82 },
83 Error::EscapeChar { start, ch } => Error::EscapeChar {
84 start: start + offset,
85 ch,
86 },
87 Error::UnexpectedParseError(s) => Error::UnexpectedParseError(s),
88 }
89 }
90}
91
92impl DiagnosticMessage for Error {
93 fn code(&self) -> usize {
94 use Error::{
95 EscapeChar, Literal, NumericLiteral, ParseError, ReservedKeyword, StringLiteral,
96 UnexpectedParseError,
97 };
98
99 match self {
100 ParseError { source, .. } => match source {
101 lalrpop_util::ParseError::InvalidToken { .. } => 200,
102 lalrpop_util::ParseError::ExtraToken { .. } => 201,
103 lalrpop_util::ParseError::User { .. } => 202,
104 lalrpop_util::ParseError::UnrecognizedToken { .. } => 203,
105 lalrpop_util::ParseError::UnrecognizedEof { .. } => 204,
106 },
107 ReservedKeyword { .. } => 205,
108 NumericLiteral { .. } => 206,
109 StringLiteral { .. } => 207,
110 Literal { .. } => 208,
111 EscapeChar { .. } => 209,
112 UnexpectedParseError(..) => 210,
113 }
114 }
115
116 fn labels(&self) -> Vec<Label> {
117 use Error::{
118 EscapeChar, Literal, NumericLiteral, ParseError, ReservedKeyword, StringLiteral,
119 UnexpectedParseError,
120 };
121
122 fn update_expected(expected: Vec<String>) -> Vec<String> {
123 expected
124 .into_iter()
125 .map(|expect| match expect.as_str() {
126 "LQuery" => r#""path literal""#.to_owned(),
127 _ => expect,
128 })
129 .collect::<Vec<_>>()
130 }
131
132 match self {
133 ParseError { span, source, .. } => match source {
134 lalrpop_util::ParseError::InvalidToken { location } => vec![Label::primary(
135 "invalid token",
136 Span::new(*location, *location + 1),
137 )],
138 lalrpop_util::ParseError::ExtraToken { token } => {
139 let (start, token, end) = token;
140 vec![Label::primary(
141 format!("unexpected extra token: {token}"),
142 Span::new(*start, *end),
143 )]
144 }
145 lalrpop_util::ParseError::User { error } => {
146 vec![Label::primary(format!("unexpected error: {error}"), span)]
147 }
148 lalrpop_util::ParseError::UnrecognizedToken { token, expected } => {
149 let (start, token, end) = token;
150 let span = Span::new(*start, *end);
151 let got = token.to_string();
152 let mut expected = update_expected(expected.clone());
153
154 let any_ident = [
157 r#""reserved identifier""#,
158 r#""else""#,
159 r#""false""#,
160 r#""null""#,
161 r#""true""#,
162 r#""if""#,
163 ];
164 let is_any_ident = any_ident
165 .iter()
166 .all(|i| expected.contains(&(*i).to_string()));
167 if is_any_ident {
168 expected = expected
169 .into_iter()
170 .filter(|e| !any_ident.contains(&e.as_str()))
171 .collect::<Vec<_>>();
172 }
173
174 if token == &Token::RQuery {
175 return vec![
176 Label::primary("unexpected end of query path", span),
177 Label::context(
178 format!("expected one of: {}", expected.join(", ")),
179 span,
180 ),
181 ];
182 }
183
184 vec![
185 Label::primary(format!(r#"unexpected syntax token: "{got}""#), span),
186 Label::context(format!("expected one of: {}", expected.join(", ")), span),
187 ]
188 }
189 lalrpop_util::ParseError::UnrecognizedEof { location, expected } => {
190 let span = Span::new(*location, *location);
191 let expected = update_expected(expected.clone());
192
193 vec![
194 Label::primary("unexpected end of program", span),
195 Label::context(format!("expected one of: {}", expected.join(", ")), span),
196 ]
197 }
198 },
199
200 ReservedKeyword { start, end, .. } => {
201 let span = Span::new(*start, *end);
202
203 vec![
204 Label::primary(
205 "this identifier name is reserved for future use in the language",
206 span,
207 ),
208 Label::context("use a different name instead", span),
209 ]
210 }
211
212 NumericLiteral { start, error, end } => vec![Label::primary(
213 format!("invalid numeric literal: {error}"),
214 Span::new(*start, *end),
215 )],
216
217 StringLiteral { start } => vec![Label::primary(
218 "invalid string literal",
219 Span::new(*start, *start + 1),
220 )],
221
222 Literal { start } => vec![Label::primary(
223 "invalid literal",
224 Span::new(*start, *start + 1),
225 )],
226
227 EscapeChar { start, ch } => vec![Label::primary(
228 format!(
229 "invalid escape character: {}",
230 ch.map(|ch| ch.to_string())
231 .unwrap_or_else(|| "none".to_string())
232 ),
233 Span::new(*start, *start + 1),
234 )],
235
236 UnexpectedParseError(string) => vec![Label::primary(string, Span::default())],
237 }
238 }
239}
240
241#[derive(Debug)]
246pub(crate) struct Lexer<'input> {
247 input: &'input str,
248 chars: Peekable<CharIndices<'input>>,
249
250 open_brackets: usize,
252 open_braces: usize,
253 open_parens: usize,
254 query_start: Option<usize>,
255
256 rquery_indices: Vec<usize>,
272}
273
274impl<'input> Lexer<'input> {
275 fn next_token(&mut self) -> Option<SpannedResult<'input, usize>> {
276 use Token::{
277 Ampersand, Arrow, Bang, Colon, Comma, Dot, Escape, InvalidToken, LBrace, LBracket,
278 LParen, LQuery, Newline, Percent, RBrace, RBracket, RParen, RQuery, SemiColon,
279 Underscore,
280 };
281
282 loop {
283 let start = self.next_index();
284
285 let query_start_result = self.query_start(start);
290 match query_start_result {
291 Err(err) => return Some(Err(err)),
292 Ok(true) => {
293 self.query_start = Some(start);
294 return Some(Ok((start, LQuery, start + 1)));
296 }
297 Ok(false) => {}
298 }
299
300 if let Some(pos) = self.query_end(start) {
305 return Some(Ok((pos, RQuery, pos + 1)));
307 }
308
309 if let Some((start, ch)) = self.bump() {
312 let result = match ch {
313 '"' => Some(self.string_literal(start)),
314
315 ';' => Some(Ok(self.token(start, SemiColon))),
316 '\n' => Some(Ok(self.token(start, Newline))),
317 '\\' => Some(Ok(self.token(start, Escape))),
318
319 '(' => Some(Ok(self.open(start, LParen))),
320 '[' => Some(Ok(self.open(start, LBracket))),
321 '{' => Some(Ok(self.open(start, LBrace))),
322 '}' => Some(Ok(self.close(start, RBrace))),
323 ']' => Some(Ok(self.close(start, RBracket))),
324 ')' => Some(Ok(self.close(start, RParen))),
325 '.' => Some(Ok(self.token(start, Dot))),
326 '%' => Some(Ok(self.token(start, Percent))),
327 '&' if !matches!(self.peek(), Some((_, '&'))) => {
328 Some(Ok(self.token(start, Ampersand)))
329 }
330 ':' => Some(Ok(self.token(start, Colon))),
331 ',' => Some(Ok(self.token(start, Comma))),
332
333 '_' if !self.test_peek(is_ident_continue) => {
334 Some(Ok(self.token(start, Underscore)))
335 }
336
337 '!' if self.test_peek(|ch| ch == '!' || !is_operator(ch)) => {
338 Some(Ok(self.token(start, Bang)))
339 }
340
341 '-' if self.test_peek(|ch| ch == '>') => {
342 self.bump();
343 Some(Ok(self.token(start, Arrow)))
344 }
345
346 '#' => {
347 self.take_until(start, |ch| ch == '\n');
348 continue;
349 }
350
351 'r' if self.test_peek(|ch| ch == '\'') => Some(self.regex_literal(start)),
352 's' if self.test_peek(|ch| ch == '\'') => Some(self.raw_string_literal(start)),
353 't' if self.test_peek(|ch| ch == '\'') => Some(self.timestamp_literal(start)),
354
355 ch if is_ident_start(ch) => Some(Ok(self.identifier_or_function_call(start))),
356 ch if is_digit(ch) || (ch == '-' && self.test_peek(is_digit)) => {
357 Some(self.numeric_literal_or_identifier(start))
358 }
359 ch if is_operator(ch) => Some(Ok(self.operator(start))),
360 ch if ch.is_whitespace() => continue,
361
362 ch => Some(Ok(self.token(start, InvalidToken(ch)))),
363 };
364
365 return result;
368
369 } else if let Some(end) = self.rquery_indices.pop() {
373 return Some(Ok((end, RQuery, end + 1)));
375 }
376
377 return None;
378 }
379 }
380}
381
382#[derive(Clone, Eq, PartialEq, Hash, Debug)]
383pub enum Token<S> {
384 Identifier(S),
385 PathField(S),
386 FunctionCall(S),
387 Operator(S),
388
389 StringLiteral(StringLiteralToken<S>),
391 RawStringLiteral(RawStringLiteralToken<S>),
392 IntegerLiteral(i64),
393 FloatLiteral(NotNan<f64>),
394 RegexLiteral(S),
395 TimestampLiteral(S),
396
397 ReservedIdentifier(S),
399
400 InvalidToken(char),
401
402 If,
404 Else,
405 Null,
406 False,
407 True,
408 Abort,
409 Return,
410
411 Colon,
413 Comma,
414 Dot,
415 LBrace,
416 LBracket,
417 LParen,
418 Newline,
419 RBrace,
420 RBracket,
421 RParen,
422 SemiColon,
423 Underscore,
424 Escape,
425 Arrow,
426 Ampersand,
427 Percent,
428
429 Equals,
430 MergeEquals,
431 Bang,
432 Question,
433
434 LQuery,
465 RQuery,
466}
467
468impl<S> Token<S> {
469 pub(crate) fn map<R>(self, f: impl Fn(S) -> R) -> Token<R> {
470 use self::Token::{
471 Abort, Ampersand, Arrow, Bang, Colon, Comma, Dot, Else, Equals, Escape, False,
472 FloatLiteral, FunctionCall, Identifier, If, IntegerLiteral, InvalidToken, LBrace,
473 LBracket, LParen, LQuery, MergeEquals, Newline, Null, Operator, PathField, Percent,
474 Question, RBrace, RBracket, RParen, RQuery, RawStringLiteral, RegexLiteral,
475 ReservedIdentifier, Return, SemiColon, StringLiteral, TimestampLiteral, True,
476 Underscore,
477 };
478
479 match self {
480 Identifier(s) => Identifier(f(s)),
481 PathField(s) => PathField(f(s)),
482 FunctionCall(s) => FunctionCall(f(s)),
483 Operator(s) => Operator(f(s)),
484
485 StringLiteral(StringLiteralToken(s)) => StringLiteral(StringLiteralToken(f(s))),
486 RawStringLiteral(RawStringLiteralToken(s)) => {
487 RawStringLiteral(RawStringLiteralToken(f(s)))
488 }
489
490 IntegerLiteral(s) => IntegerLiteral(s),
491 FloatLiteral(s) => FloatLiteral(s),
492 RegexLiteral(s) => RegexLiteral(f(s)),
493 TimestampLiteral(s) => TimestampLiteral(f(s)),
494
495 ReservedIdentifier(s) => ReservedIdentifier(f(s)),
496
497 InvalidToken(s) => InvalidToken(s),
498
499 Else => Else,
500 False => False,
501 If => If,
502 Null => Null,
503 True => True,
504 Abort => Abort,
505 Return => Return,
506
507 Colon => Colon,
509 Comma => Comma,
510 Dot => Dot,
511 LBrace => LBrace,
512 LBracket => LBracket,
513 LParen => LParen,
514 Newline => Newline,
515 RBrace => RBrace,
516 RBracket => RBracket,
517 RParen => RParen,
518 SemiColon => SemiColon,
519 Underscore => Underscore,
520 Escape => Escape,
521 Arrow => Arrow,
522 Ampersand => Ampersand,
523 Percent => Percent,
524
525 Equals => Equals,
526 MergeEquals => MergeEquals,
527 Bang => Bang,
528 Question => Question,
529
530 LQuery => LQuery,
531 RQuery => RQuery,
532 }
533 }
534}
535
536impl<S> fmt::Display for Token<S>
537where
538 S: fmt::Display,
539{
540 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
541 use self::Token::{
542 Abort, Ampersand, Arrow, Bang, Colon, Comma, Dot, Else, Equals, Escape, False,
543 FloatLiteral, FunctionCall, Identifier, If, IntegerLiteral, InvalidToken, LBrace,
544 LBracket, LParen, LQuery, MergeEquals, Newline, Null, Operator, PathField, Percent,
545 Question, RBrace, RBracket, RParen, RQuery, RawStringLiteral, RegexLiteral,
546 ReservedIdentifier, Return, SemiColon, StringLiteral, TimestampLiteral, True,
547 Underscore,
548 };
549
550 let s = match *self {
551 Identifier(_) => "Identifier",
552 PathField(_) => "PathField",
553 FunctionCall(_) => "FunctionCall",
554 Operator(_) => "Operator",
555 StringLiteral(_) => "StringLiteral",
556 RawStringLiteral(_) => "RawStringLiteral",
557 IntegerLiteral(_) => "IntegerLiteral",
558 FloatLiteral(_) => "FloatLiteral",
559 RegexLiteral(_) => "RegexLiteral",
560 TimestampLiteral(_) => "TimestampLiteral",
561 ReservedIdentifier(_) => "ReservedIdentifier",
562 InvalidToken(_) => "InvalidToken",
563
564 Else => "Else",
565 False => "False",
566 If => "If",
567 Null => "Null",
568 True => "True",
569 Abort => "Abort",
570 Return => "Return",
571
572 Colon => "Colon",
574 Comma => "Comma",
575 Dot => "Dot",
576 LBrace => "LBrace",
577 LBracket => "LBracket",
578 LParen => "LParen",
579 Newline => "Newline",
580 RBrace => "RBrace",
581 RBracket => "RBracket",
582 RParen => "RParen",
583 SemiColon => "SemiColon",
584 Underscore => "Underscore",
585 Escape => "Escape",
586 Arrow => "Arrow",
587 Ampersand => "Ampersand",
588 Percent => "Percent",
589
590 Equals => "Equals",
591 MergeEquals => "MergeEquals",
592 Bang => "Bang",
593 Question => "Question",
594
595 LQuery => "LQuery",
596 RQuery => "RQuery",
597 };
598
599 s.fmt(f)
600 }
601}
602
603impl<'input> Token<&'input str> {
604 fn ident(s: &'input str) -> Self {
606 use Token::{
607 Abort, Else, False, Identifier, If, Null, PathField, ReservedIdentifier, Return, True,
608 };
609
610 match s {
611 "if" => If,
612 "else" => Else,
613 "true" => True,
614 "false" => False,
615 "null" => Null,
616 "abort" => Abort,
617 "return" => Return,
618
619 "array" | "bool" | "boolean" | "break" | "continue" | "do" | "emit" | "float"
621 | "for" | "forall" | "foreach" | "all" | "each" | "any" | "try" | "undefined"
622 | "int" | "integer" | "iter" | "object" | "regex" | "string" | "traverse"
623 | "timestamp" | "duration" | "unless" | "walk" | "while" | "loop" => {
624 ReservedIdentifier(s)
625 }
626
627 _ if s.contains('@') => PathField(s),
628
629 _ => Identifier(s),
630 }
631 }
632}
633
634#[derive(Clone, PartialEq, Eq, Debug, Hash)]
635pub struct StringLiteralToken<S>(pub S);
636
637#[derive(Clone, PartialEq, Eq, Debug, Hash)]
638pub struct RawStringLiteralToken<S>(pub S);
639
640impl StringLiteralToken<&str> {
641 pub fn template(&self, span: Span) -> TemplateString {
645 let mut segments = Vec::new();
646
647 let chars = self.0.chars().collect::<Vec<_>>();
648 let mut template = false;
649 let mut current = String::new();
650
651 let mut pos = 0;
652 while pos < chars.len() {
653 match chars[pos] {
654 '}' if template && chars.get(pos + 1) == Some(&'}') => {
655 if !current.is_empty() {
657 let seg = std::mem::take(&mut current);
658 segments.push(StringSegment::Template(
659 seg.trim().to_string(),
660 Span::new(pos - seg.chars().count() - 1, pos + 3) + span.start(),
661 ));
662 }
663 template = false;
664 pos += 2;
665 }
666 '\\' if !template
667 && chars.get(pos + 1) == Some(&'{')
668 && chars.get(pos + 2) == Some(&'{') =>
669 {
670 current.push_str("{{");
672 pos += 3;
673 }
674 '\\' if !template
675 && chars.get(pos + 1) == Some(&'}')
676 && chars.get(pos + 2) == Some(&'}') =>
677 {
678 current.push_str("}}");
680 pos += 3;
681 }
682 '{' if !template && chars.get(pos + 1) == Some(&'{') => {
683 if !current.is_empty() {
685 let seg = std::mem::take(&mut current);
686 segments.push(StringSegment::Literal(
687 unescape_string_literal(&seg),
688 Span::new(pos - seg.chars().count() + 1, pos + 1) + span.start(),
689 ));
690 }
691 template = true;
692 pos += 2;
693 }
694 chr => {
695 current.push(chr);
696 pos += 1;
697 }
698 }
699 }
700
701 if !template && !current.is_empty() {
702 segments.push(StringSegment::Literal(
703 unescape_string_literal(¤t),
704 Span::new(pos - current.chars().count() + 1, pos + 1) + span.start(),
705 ));
706 }
707
708 TemplateString(segments)
709 }
710
711 pub fn unescape(&self) -> String {
712 unescape_string_literal(self.0)
713 }
714}
715
716impl RawStringLiteralToken<&str> {
717 pub fn unescape(&self) -> String {
718 self.0.to_string()
719 }
720}
721
722impl<'input> Iterator for Lexer<'input> {
727 type Item = SpannedResult<'input, usize>;
728
729 fn next(&mut self) -> Option<Self::Item> {
730 self.next_token()
731 }
732}
733
734impl<'input> Lexer<'input> {
739 fn open(&mut self, start: usize, token: Token<&'input str>) -> Spanned<'input, usize> {
740 match &token {
741 Token::LParen => self.open_parens += 1,
742 Token::LBracket => self.open_brackets += 1,
743 Token::LBrace => self.open_braces += 1,
744 _ => {}
745 }
746
747 self.token(start, token)
748 }
749
750 fn close(&mut self, start: usize, token: Token<&'input str>) -> Spanned<'input, usize> {
751 match &token {
752 Token::RParen => self.open_parens = self.open_parens.saturating_sub(1),
753 Token::RBracket => self.open_brackets = self.open_brackets.saturating_sub(1),
754 Token::RBrace => self.open_braces = self.open_braces.saturating_sub(1),
755 _ => {}
756 }
757
758 self.token(start, token)
759 }
760
761 fn token(&mut self, start: usize, token: Token<&'input str>) -> Spanned<'input, usize> {
762 (start, token, self.next_index())
763 }
764
765 fn query_end(&mut self, start: usize) -> Option<usize> {
766 match self.rquery_indices.last() {
767 Some(end) if start > 0 && start.saturating_sub(1) == *end => self.rquery_indices.pop(),
768 _ => None,
769 }
770 }
771
772 fn query_start(&mut self, start: usize) -> Result<bool, Error> {
773 if self.rquery_indices.last() == Some(&start) {
776 return Ok(false);
777 }
778
779 if self.peek().is_none() {
781 return Ok(false);
782 }
783
784 let mut chars = self.chars.clone();
788 debug_assert!(chars.peek().is_some());
789
790 let query_start_char = chars.peek().unwrap().1;
795 if !is_query_start(query_start_char) {
796 return Ok(false);
797 }
798
799 let mut valid = false;
816
817 let mut last_char = None;
819
820 let mut braces = 0;
823 let mut brackets = 0;
824 let mut parens = 0;
825
826 let mut end = 0;
827 while let Some((pos, ch)) = chars.next() {
828 let take_until_end =
829 |result: SpannedResult<'input, usize>,
830 last_char: &mut Option<char>,
831 end: &mut usize,
832 chars: &mut Peekable<CharIndices<'input>>| {
833 result.map(|(_, _, new)| {
834 for (i, ch) in chars {
835 *last_char = Some(ch);
836 if i == new + pos {
837 break;
838 }
839 }
840
841 *end = pos + new;
842 })
843 };
844
845 match ch {
846 '{' => braces += 1,
848 '(' => parens += 1,
849 '[' if braces == 0 && parens == 0 && brackets == 0 => {
850 brackets += 1;
851
852 if last_char == Some(']') {
853 valid = true
854 }
855
856 if last_char == Some('}') {
857 valid = true
858 }
859
860 if last_char == Some(')') {
861 valid = true
862 }
863
864 if last_char.is_some_and(is_ident_continue) {
865 valid = true
866 }
867 }
868 '[' => brackets += 1,
869
870 '"' => {
872 let result = Lexer::new(&self.input[pos + 1..]).string_literal(0);
873 match take_until_end(result, &mut last_char, &mut end, &mut chars) {
874 Ok(()) => continue,
875 Err(_) => break,
876 }
877 }
878 's' if chars.peek().map(|(_, ch)| ch) == Some(&'\'') => {
879 let result = Lexer::new(&self.input[pos + 1..]).raw_string_literal(0);
880 match take_until_end(result, &mut last_char, &mut end, &mut chars) {
881 Ok(()) => continue,
882 Err(_) => break,
883 }
884 }
885 'r' if chars.peek().map(|(_, ch)| ch) == Some(&'\'') => {
886 let result = Lexer::new(&self.input[pos + 1..]).regex_literal(0);
887 match take_until_end(result, &mut last_char, &mut end, &mut chars) {
888 Ok(()) => continue,
889 Err(_) => break,
890 }
891 }
892 't' if chars.peek().map(|(_, ch)| ch) == Some(&'\'') => {
893 let result = Lexer::new(&self.input[pos + 1..]).timestamp_literal(0);
894 match take_until_end(result, &mut last_char, &mut end, &mut chars) {
895 Ok(()) => continue,
896 Err(_) => break,
897 }
898 }
899
900 '}' if braces == 0 => break,
901 '}' => braces -= 1,
902
903 ')' if parens == 0 => break,
904 ')' => parens -= 1,
905
906 ']' if brackets == 0 => break,
907 ']' => brackets -= 1,
908
909 _ if braces > 0 || brackets > 0 || parens > 0 => {
912 let (start_delim, end_delim) = if braces > 0 {
913 ('{', '}')
914 } else if brackets > 0 {
915 ('[', ']')
916 } else {
917 ('(', ')')
918 };
919
920 let mut skip_delim = 0;
921 while let Some((pos, ch)) = chars.peek() {
922 let pos = *pos;
923
924 let literal_check = |result: Spanned<'input, usize>, chars: &mut Peekable<CharIndices<'input>>| {
925 let (_, _, new) = result;
926
927 #[allow(clippy::while_let_on_iterator)]
928 while let Some((i, _)) = chars.next() {
929 if i == new + pos {
930 break;
931 }
932 }
933 match chars.peek().map(|(_, ch)| ch) {
934 Some(ch) => Ok(*ch),
935 None => Err(()),
936 }
937 };
938
939 let ch = match &self.input[pos..] {
940 s if s.starts_with('#') => {
941 for (_, chr) in chars.by_ref() {
942 if chr == '\n' {
943 break;
944 }
945 }
946 match chars.peek().map(|(_, ch)| ch) {
947 Some(ch) => *ch,
948 None => {
949 return Err(Error::UnexpectedParseError(
950 "Expected characters at end of comment.".to_string(),
951 ));
952 }
953 }
954 }
955 s if s.starts_with('"') => {
956 let r = Lexer::new(&self.input[pos + 1..])
957 .string_literal(0)
958 .map_err(|e| e.offset_by(pos + 1))?;
959 match literal_check(r, &mut chars) {
960 Ok(ch) => ch,
961 Err(()) => {
962 return Err(Error::UnexpectedParseError(
965 "Expected characters at end of string literal."
966 .to_string(),
967 ));
968 }
969 }
970 }
971 s if s.starts_with("s'") => {
972 let r = Lexer::new(&self.input[pos + 1..])
973 .raw_string_literal(0)
974 .map_err(|e| e.offset_by(pos + 1))?;
975 match literal_check(r, &mut chars) {
976 Ok(ch) => ch,
977 Err(()) => {
978 return Err(Error::UnexpectedParseError(
979 "Expected characters at end of raw string literal."
980 .to_string(),
981 ));
982 }
983 }
984 }
985 s if s.starts_with("r'") => {
986 let r = Lexer::new(&self.input[pos + 1..])
987 .regex_literal(0)
988 .map_err(|e| e.offset_by(pos + 1))?;
989 match literal_check(r, &mut chars) {
990 Ok(ch) => ch,
991 Err(()) => {
992 return Err(Error::UnexpectedParseError(
993 "Expected characters at end of regex literal."
994 .to_string(),
995 ));
996 }
997 }
998 }
999 s if s.starts_with("t'") => {
1000 let r = Lexer::new(&self.input[pos + 1..])
1001 .timestamp_literal(0)
1002 .map_err(|e| e.offset_by(pos + 1))?;
1003 match literal_check(r, &mut chars) {
1004 Ok(ch) => ch,
1005 Err(()) => {
1006 return Err(Error::UnexpectedParseError(
1007 "Expected characters at end of timestamp literal."
1008 .to_string(),
1009 ));
1010 }
1011 }
1012 }
1013 _ => *ch,
1014 };
1015
1016 if skip_delim == 0 && ch == end_delim {
1017 break;
1018 }
1019 if let Some((_, c)) = chars.next() {
1020 if c == start_delim {
1021 skip_delim += 1;
1022 }
1023 if c == end_delim {
1024 skip_delim -= 1;
1025 }
1026 }
1027 }
1028 }
1029 '.' | '%' if last_char.is_none() => valid = true,
1030 '.' if last_char == Some(')') => valid = true,
1031 '.' if last_char == Some('}') => valid = true,
1032 '.' if last_char == Some(']') => valid = true,
1033 '.' if last_char == Some('"') => valid = true,
1034 '.' if last_char.is_some_and(is_ident_continue) => {
1035 let digits = self.input[..pos]
1037 .chars()
1038 .rev()
1039 .take_while(|ch| !ch.is_whitespace())
1040 .all(|ch| is_digit(ch) || ch == '_');
1041
1042 if !digits {
1043 valid = true
1044 }
1045 }
1046
1047 '!' => {}
1049
1050 '#' => {
1052 #[allow(clippy::while_let_on_iterator)]
1053 while let Some((pos, ch)) = chars.next() {
1054 if ch == '\n' {
1055 break;
1056 }
1057 end = pos;
1058 }
1059 continue;
1060 }
1061
1062 ch if is_ident_continue(ch) => {}
1063
1064 _ => break,
1066 }
1067
1068 last_char = Some(ch);
1069 end = pos;
1070 }
1071
1072 if !valid {
1074 return Ok(false);
1075 }
1076
1077 if self.rquery_indices.contains(&end) {
1079 return Ok(false);
1080 }
1081
1082 self.rquery_indices.push(end);
1083 Ok(true)
1084 }
1085
1086 fn string_literal(&mut self, start: usize) -> SpannedResult<'input, usize> {
1087 let content_start = self.next_index();
1088
1089 loop {
1090 let scan_start = self.next_index();
1091 self.take_until(scan_start, |c| c == '"' || c == '\\');
1092
1093 match self.bump() {
1094 Some((escape_start, '\\')) => self.escape_code(escape_start)?,
1095 Some((content_end, '"')) => {
1096 let end = self.next_index();
1097 let slice = self.slice(content_start, content_end);
1098 let token = Token::StringLiteral(StringLiteralToken(slice));
1099 return Ok((start, token, end));
1100 }
1101 _ => break,
1102 }
1103 }
1104
1105 Err(Error::StringLiteral { start })
1106 }
1107
1108 fn regex_literal(&mut self, start: usize) -> SpannedResult<'input, usize> {
1109 self.quoted_literal(start, Token::RegexLiteral)
1110 }
1111
1112 fn raw_string_literal(&mut self, start: usize) -> SpannedResult<'input, usize> {
1113 self.quoted_literal(start, |c| Token::RawStringLiteral(RawStringLiteralToken(c)))
1114 }
1115
1116 fn timestamp_literal(&mut self, start: usize) -> SpannedResult<'input, usize> {
1117 self.quoted_literal(start, Token::TimestampLiteral)
1118 }
1119
1120 fn numeric_literal_or_identifier(&mut self, start: usize) -> SpannedResult<'input, usize> {
1121 let (end, int) = self.take_while(start, |ch| is_digit(ch) || ch == '_');
1122
1123 let negative = self.input.get(start..=start) == Some("-");
1124 match self.peek() {
1125 Some((_, ch)) if is_ident_continue(ch) && !negative => {
1126 self.bump();
1127 let (end, ident) = self.take_while(start, is_ident_continue);
1128 Ok((start, Token::ident(ident), end))
1129 }
1130 Some((_, '.')) => {
1131 self.bump();
1132 let (end, float) = self.take_while(start, |ch| is_digit(ch) || ch == '_');
1133
1134 match float.replace('_', "").parse() {
1135 Ok(float) => {
1136 let float = NotNan::new(float).unwrap();
1137 Ok((start, Token::FloatLiteral(float), end))
1138 }
1139 Err(err) => Err(Error::NumericLiteral {
1140 start,
1141 end,
1142 error: err.to_string(),
1143 }),
1144 }
1145 }
1146 None | Some(_) => match int.replace('_', "").parse() {
1147 Ok(int) => Ok((start, Token::IntegerLiteral(int), end)),
1148 Err(err) => Err(Error::NumericLiteral {
1149 start,
1150 end,
1151 error: err.to_string(),
1152 }),
1153 },
1154 }
1155 }
1156
1157 fn identifier_or_function_call(&mut self, start: usize) -> Spanned<'input, usize> {
1158 let (end, ident) = self.take_while(start, is_ident_continue);
1159
1160 let token = if self.test_peek(|ch| ch == '(' || ch == '!') {
1161 Token::FunctionCall(ident)
1162 } else {
1163 Token::ident(ident)
1164 };
1165
1166 (start, token, end)
1167 }
1168
1169 fn operator(&mut self, start: usize) -> Spanned<'input, usize> {
1170 let (end, op) = self.take_while(start, is_operator);
1171
1172 let token = match op {
1173 "=" => Token::Equals,
1174 "|=" => Token::MergeEquals,
1175 "?" => Token::Question,
1176 op => Token::Operator(op),
1177 };
1178
1179 (start, token, end)
1180 }
1181
1182 fn quoted_literal(
1183 &mut self,
1184 start: usize,
1185 tok: impl Fn(&'input str) -> Tok<'input>,
1186 ) -> SpannedResult<'input, usize> {
1187 self.bump();
1188 let content_start = self.next_index();
1189
1190 loop {
1191 let scan_start = self.next_index();
1192 self.take_until(scan_start, |c| c == '\'' || c == '\\');
1193
1194 match self.bump() {
1195 Some((_, '\\')) => self.bump(),
1196 Some((end, '\'')) => {
1197 let content = self.slice(content_start, end);
1198 let token = tok(content);
1199 let end = self.next_index();
1200
1201 return Ok((start, token, end));
1202 }
1203 _ => break,
1204 };
1205 }
1206
1207 Err(Error::Literal { start })
1208 }
1209}
1210
1211impl<'input> Lexer<'input> {
1216 pub(crate) fn new(input: &'input str) -> Lexer<'input> {
1217 Self {
1218 input,
1219 chars: input.char_indices().peekable(),
1220 open_braces: 0,
1221 open_brackets: 0,
1222 open_parens: 0,
1223 rquery_indices: vec![],
1224 query_start: None,
1225 }
1226 }
1227
1228 fn bump(&mut self) -> Option<(usize, char)> {
1229 self.chars.next()
1230 }
1231
1232 fn peek(&mut self) -> Option<(usize, char)> {
1233 self.chars.peek().copied()
1234 }
1235
1236 fn take_while<F>(&mut self, start: usize, mut keep_going: F) -> (usize, &'input str)
1237 where
1238 F: FnMut(char) -> bool,
1239 {
1240 self.take_until(start, |c| !keep_going(c))
1241 }
1242
1243 fn take_until<F>(&mut self, start: usize, mut terminate: F) -> (usize, &'input str)
1244 where
1245 F: FnMut(char) -> bool,
1246 {
1247 while let Some((end, ch)) = self.peek() {
1248 if terminate(ch) {
1249 return (end, self.slice(start, end));
1250 }
1251
1252 self.bump();
1253 }
1254
1255 let loc = self.next_index();
1256
1257 (loc, self.slice(start, loc))
1258 }
1259
1260 fn test_peek<F>(&mut self, mut test: F) -> bool
1261 where
1262 F: FnMut(char) -> bool,
1263 {
1264 self.peek().is_some_and(|(_, ch)| test(ch))
1265 }
1266
1267 fn slice(&self, start: usize, end: usize) -> &'input str {
1268 &self.input[start..end]
1269 }
1270
1271 fn next_index(&mut self) -> usize {
1272 self.peek().as_ref().map_or(self.input.len(), |l| l.0)
1273 }
1274
1275 fn escape_code(&mut self, start: usize) -> Result<(), Error> {
1277 match self.bump() {
1278 Some((_, '\n' | '\'' | '"' | '\\' | 'n' | 'r' | 't' | '{' | '}' | '0')) => Ok(()),
1279 Some((start, ch)) => Err(Error::EscapeChar {
1280 start,
1281 ch: Some(ch),
1282 }),
1283 None => Err(Error::EscapeChar { start, ch: None }),
1284 }
1285 }
1286}
1287
1288fn is_ident_start(ch: char) -> bool {
1293 matches!(ch, '@' | '_' | 'a'..='z' | 'A'..='Z')
1294}
1295
1296fn is_ident_continue(ch: char) -> bool {
1297 match ch {
1298 '0'..='9' => true,
1299 ch => is_ident_start(ch),
1300 }
1301}
1302
1303fn is_query_start(ch: char) -> bool {
1304 match ch {
1305 '%' | '.' | '{' | '[' => true,
1306 ch => is_ident_start(ch),
1307 }
1308}
1309
1310fn is_digit(ch: char) -> bool {
1311 ch.is_ascii_digit()
1312}
1313
1314pub(crate) fn is_operator(ch: char) -> bool {
1315 matches!(
1316 ch,
1317 '!' | '&' | '*' | '+' | '-' | '/' | '<' | '=' | '>' | '?' | '|'
1318 )
1319}
1320
1321fn unescape_string_literal(mut s: &str) -> String {
1322 let mut string = String::with_capacity(s.len());
1323 while let Some(i) = s.bytes().position(|b| b == b'\\') {
1324 let next = s.as_bytes()[i + 1];
1325 if next == b'\n' {
1326 string.push_str(&s[..i]);
1328 let remaining = &s[i + 2..];
1329 let whitespace: usize = remaining
1330 .chars()
1331 .take_while(|c| c.is_whitespace())
1332 .map(char::len_utf8)
1333 .sum();
1334 s = &s[i + whitespace + 2..];
1335 } else {
1336 let c = match next {
1337 b'\'' => '\'',
1338 b'"' => '"',
1339 b'\\' => '\\',
1340 b'n' => '\n',
1341 b'r' => '\r',
1342 b't' => '\t',
1343 b'0' => '\0',
1344 b'{' => '{',
1345 _ => unimplemented!("invalid escape"),
1346 };
1347
1348 string.push_str(&s[..i]);
1349 string.push(c);
1350 s = &s[i + 2..];
1351 }
1352 }
1353
1354 string.push_str(s);
1355 string
1356}
1357
1358#[cfg(test)]
1359mod test {
1360 #![allow(clippy::print_stdout)] use super::super::lex::Token::{
1363 Arrow, Bang, Colon, Comma, Dot, Else, Equals, FloatLiteral, FunctionCall, Identifier, If,
1364 IntegerLiteral, LBrace, LBracket, LParen, LQuery, Newline, Operator, PathField, Percent,
1365 RBrace, RBracket, RParen, RQuery, RawStringLiteral, RegexLiteral, StringLiteral,
1366 TimestampLiteral, True,
1367 };
1368 use super::*;
1369
1370 fn lexer(input: &str) -> impl Iterator<Item = SpannedResult<'_, usize>> + '_ {
1371 let mut lexer = Lexer::new(input);
1372 Box::new(std::iter::from_fn(move || lexer.next()))
1373 }
1374
1375 fn data(source: &str) -> &str {
1377 source
1378 }
1379
1380 fn test(input: &str, expected: Vec<(&str, Tok<'_>)>) {
1381 let mut lexer = lexer(input);
1382 let mut count = 0;
1383 let length = expected.len();
1384 for (token, (expected_span, expected_tok)) in lexer.by_ref().zip(expected.into_iter()) {
1385 count += 1;
1386 println!("{token:?}");
1387 let start = expected_span.find('~').unwrap_or_default();
1388 let end = expected_span.rfind('~').map(|i| i + 1).unwrap_or_default();
1389
1390 let expect = (start, expected_tok, end);
1391 assert_eq!(Ok(expect), token);
1392 }
1393
1394 assert_eq!(count, length);
1395 assert!(count > 0);
1396 assert!(lexer.next().is_none());
1397 }
1398
1399 #[test]
1400 fn test_1() {
1401 test(
1402 data("%foo"),
1403 vec![
1404 ("~ ", LQuery),
1405 ("~ ", Percent),
1406 (" ~~~", Identifier("foo")),
1407 (" ~", RQuery),
1408 ],
1409 );
1410 }
1411
1412 #[test]
1413 fn test_2() {
1414 test(
1415 data("%@foo"),
1416 vec![
1417 ("~ ", LQuery),
1418 ("~ ", Percent),
1419 (" ~~~~", PathField("@foo")),
1420 (" ~", RQuery),
1421 ],
1422 );
1423 }
1424
1425 #[test]
1426 fn test_3() {
1427 test(
1428 data("%foo[%bar]"),
1429 vec![
1430 ("~ ", LQuery),
1431 ("~ ", Percent),
1432 (" ~~~ ", Identifier("foo")),
1433 (" ~ ", LBracket),
1434 (" ~ ", LQuery),
1435 (" ~ ", Percent),
1436 (" ~~~ ", Identifier("bar")),
1437 (" ~", RQuery),
1438 (" ~ ", RBracket),
1439 (" ~", RQuery),
1440 ],
1441 );
1442 }
1443
1444 #[test]
1445 fn test_4() {
1446 test(
1447 data("%foo.@bar"),
1448 vec![
1449 ("~ ", LQuery),
1450 ("~ ", Percent),
1451 (" ~~~ ", Identifier("foo")),
1452 (" ~ ", Dot),
1453 (" ~~~~ ", PathField("@bar")),
1454 (" ~", RQuery),
1455 ],
1456 );
1457 }
1458
1459 #[test]
1460 fn test_5() {
1461 test(
1462 data(".(a|b)"),
1463 vec![
1464 ("~ ", LQuery),
1465 ("~ ", Dot),
1466 (" ~ ", LParen),
1467 (" ~ ", Identifier("a")),
1468 (" ~ ", Operator("|")),
1469 (" ~ ", Identifier("b")),
1470 (" ~ ", RParen),
1471 (" ~ ", RQuery),
1472 ],
1473 );
1474 }
1475
1476 #[test]
1477 fn test_6() {
1478 test(
1479 data(".(@a|b)"),
1480 vec![
1481 ("~ ", LQuery),
1482 ("~ ", Dot),
1483 (" ~ ", LParen),
1484 (" ~~ ", PathField("@a")),
1485 (" ~ ", Operator("|")),
1486 (" ~ ", Identifier("b")),
1487 (" ~ ", RParen),
1488 (" ~ ", RQuery),
1489 ],
1490 );
1491 }
1492
1493 #[test]
1494 fn unterminated_literal_errors() {
1495 let mut lexer = Lexer::new("a(m, r')");
1496 assert_eq!(Some(Err(Error::Literal { start: 6 })), lexer.next());
1497 }
1498
1499 #[test]
1500 fn invalid_grok_pattern() {
1501 let mut lexer = Lexer::new(
1503 r#"parse_grok!("1.2.3.4 - - [23/Mar/2021:06:46:35 +0000]", "%{IPORHOST:remote_ip} %{USER:ident} %{USER:user_name} \[%{HTTPDATE:timestamp}\]""#,
1504 );
1505 assert_eq!(
1506 Some(Err(Error::EscapeChar {
1507 start: 112,
1508 ch: Some('[')
1509 })),
1510 lexer.next()
1511 );
1512 }
1513
1514 #[test]
1515 #[rustfmt::skip]
1516 fn string_literals() {
1517 use StringLiteralToken as S;
1518 use StringLiteral as L;
1519
1520 test(
1521 data(r#"foo "bar\"\n" baz "" "\t" "\"\"" "null \0""#),
1522 vec![
1523 ("~~~ ", Identifier("foo")),
1524 (" ~~~~~~~~~ ", L(S("bar\\\"\\n"))),
1525 (" ~~~ ", Identifier("baz")),
1526 (" ~~ ", L(S(""))),
1527 (" ~~~~ ", L(S("\\t"))),
1528 (" ~~~~~~ ", L(S(r#"\"\""#))),
1529 (" ~~~~~~~~~", L(S("null \\0"))),
1530 ],
1531 );
1532 assert_eq!(TemplateString(vec![StringSegment::Literal(r#""""#.to_string(), Span::new(1, 5))]), StringLiteralToken(r#"\"\""#).template(Span::new(0, 6)));
1533 }
1534
1535 #[test]
1536 fn multiline_string_literals() {
1537 let mut lexer = lexer(
1538 r#""foo \
1539 bar""#,
1540 );
1541
1542 match lexer.next() {
1543 Some(Ok((_, StringLiteral(s), _))) => assert_eq!(
1544 TemplateString(vec![StringSegment::Literal(
1545 "foo bar".to_string(),
1546 Span::new(1, 26)
1547 )]),
1548 s.template(Span::new(0, 26))
1549 ),
1550 _ => panic!("Not a string literal"),
1551 }
1552 }
1553
1554 #[test]
1555 fn string_literal_unexpected_escape_code() {
1556 assert_eq!(
1557 lexer(r#""\X""#).last(),
1558 Some(Err(Error::StringLiteral { start: 3 }))
1559 );
1560 }
1561
1562 #[test]
1563 fn string_literal_unterminated() {
1564 assert_eq!(
1565 lexer(r#"foo "bar\"\n baz"#).last(),
1566 Some(Err(Error::StringLiteral { start: 4 }))
1567 );
1568 }
1569
1570 #[test]
1571 #[rustfmt::skip]
1572 fn regex_literals() {
1573 test(
1574 data(r"r'[fb]oo+' r'a/b\[rz\]' r''"),
1575 vec![
1576 ("~~~~~~~~~~ ", RegexLiteral("[fb]oo+")),
1577 (" ~~~~~~~~~~~~ ", RegexLiteral("a/b\\[rz\\]")),
1578 (" ~~~", RegexLiteral("")),
1579 ],
1580 );
1581 }
1582
1583 #[test]
1584 fn regex_literal_unterminated() {
1585 assert_eq!(
1586 lexer("r'foo bar").last(),
1587 Some(Err(Error::Literal { start: 0 }))
1588 );
1589 }
1590
1591 #[test]
1592 #[rustfmt::skip]
1593 fn timestamp_literals() {
1594 test(
1595 data(r"t'foo \' bar'"),
1596 vec![
1597 ("~~~~~~~~~~~~~", TimestampLiteral("foo \\' bar")),
1598 ],
1599 );
1600 }
1601
1602 #[test]
1603 fn timestamp_literal_unterminated() {
1604 assert_eq!(
1605 lexer("t'foo").last(),
1606 Some(Err(Error::Literal { start: 0 }))
1607 );
1608 }
1609
1610 #[test]
1611 #[rustfmt::skip]
1612 fn raw_string_literals() {
1613 use RawStringLiteralToken as S;
1614 use RawStringLiteral as R;
1615
1616 test(
1617 data(r#"s'a "bc" \n \'d'"#),
1618 vec![
1619 ("~~~~~~~~~~~~~~~~", R(S(r#"a "bc" \n \'d"#))),
1620 ],
1621 );
1622 }
1623
1624 #[test]
1625 fn raw_string_literal_unterminated() {
1626 assert_eq!(
1627 lexer("s'foo").last(),
1628 Some(Err(Error::Literal { start: 0 }))
1629 );
1630 }
1631
1632 #[test]
1633 #[rustfmt::skip]
1634 fn number_literals() {
1635 test(
1636 data("12 012 12.43 12. 0 902.0001"),
1637 vec![
1638 ("~~ ", IntegerLiteral(12)),
1639 (" ~~~ ", IntegerLiteral(12)),
1640 (" ~~~~~ ", FloatLiteral(NotNan::new(12.43).unwrap())),
1641 (" ~~~ ", FloatLiteral(NotNan::new(12.0).unwrap())),
1642 (" ~ ", IntegerLiteral(0)),
1643 (" ~~~~~~~~", FloatLiteral(NotNan::new(902.0001).unwrap())),
1644 ],
1645 );
1646 }
1647
1648 #[test]
1649 #[rustfmt::skip]
1650 fn number_literals_underscore() {
1651 test(
1652 data("1_000 1_2_3._4_0_"),
1653 vec![
1654 ("~~~~~ ", IntegerLiteral(1000)),
1655 (" ~~~~~~~~~~~", FloatLiteral(NotNan::new(123.40).unwrap())),
1656 ],
1657 );
1658 }
1659
1660 #[test]
1661 fn identifiers() {
1662 test(
1663 data("foo bar1 if baz_12_qux else "),
1664 vec![
1665 ("~~~ ", Identifier("foo")),
1666 (" ~~~~ ", Identifier("bar1")),
1667 (" ~~ ", If),
1668 (" ~~~~~~~~~~ ", Identifier("baz_12_qux")),
1669 (" ~~~~ ", Else),
1670 ],
1671 );
1672 }
1673
1674 #[test]
1675 fn function_calls() {
1676 test(
1677 data("foo() bar_1() if() "),
1678 vec![
1679 ("~~~ ", FunctionCall("foo")),
1680 (" ~ ", LParen),
1681 (" ~ ", RParen),
1682 (" ~~~~~ ", FunctionCall("bar_1")),
1683 (" ~ ", LParen),
1684 (" ~ ", RParen),
1685 (" ~~ ", FunctionCall("if")),
1686 (" ~ ", LParen),
1687 (" ~ ", RParen),
1688 ],
1689 );
1690 }
1691
1692 #[test]
1693 fn single_query() {
1694 test(
1695 data("."),
1696 vec![
1697 ("~", LQuery),
1699 ("~", Dot),
1700 ("~", RQuery),
1701 ],
1702 );
1703 }
1704
1705 #[test]
1706 fn root_query() {
1707 test(
1708 data(". .foo . .bar ."),
1709 vec![
1710 ("~ ", LQuery),
1711 ("~ ", Dot),
1712 ("~ ", RQuery),
1713 (" ~ ", LQuery),
1714 (" ~ ", Dot),
1715 (" ~~~ ", Identifier("foo")),
1716 (" ~ ", RQuery),
1717 (" ~ ", LQuery),
1718 (" ~ ", Dot),
1719 (" ~ ", RQuery),
1720 (" ~ ", LQuery),
1721 (" ~ ", Dot),
1722 (" ~~~ ", Identifier("bar")),
1723 (" ~ ", RQuery),
1724 (" ~", LQuery),
1725 (" ~", Dot),
1726 (" ~", RQuery),
1727 ],
1728 );
1729 }
1730
1731 #[test]
1732 fn at_sign_in_query() {
1733 test(
1734 data(".@foo .bar.@ook"),
1735 vec![
1736 ("~ ", LQuery),
1737 ("~ ", Dot),
1738 (" ~~~~ ", PathField("@foo")),
1739 (" ~ ", RQuery),
1740 (" ~ ", LQuery),
1741 (" ~ ", Dot),
1742 (" ~~~ ", Identifier("bar")),
1743 (" ~ ", Dot),
1744 (" ~~~~", PathField("@ook")),
1745 (" ~", RQuery),
1746 ],
1747 );
1748 }
1749
1750 #[test]
1751 fn queries() {
1752 test(
1753 data(".foo bar.baz .baz.qux"),
1754 vec![
1755 ("~ ", LQuery),
1756 ("~ ", Dot),
1757 (" ~~~ ", Identifier("foo")),
1758 (" ~ ", RQuery),
1759 (" ~ ", LQuery),
1760 (" ~~~ ", Identifier("bar")),
1761 (" ~ ", Dot),
1762 (" ~~~ ", Identifier("baz")),
1763 (" ~ ", RQuery),
1764 (" ~ ", LQuery),
1765 (" ~ ", Dot),
1766 (" ~~~ ", Identifier("baz")),
1767 (" ~ ", Dot),
1768 (" ~~~", Identifier("qux")),
1769 (" ~", RQuery),
1770 ],
1771 );
1772 }
1773
1774 #[test]
1775 #[rustfmt::skip]
1776 fn nested_queries() {
1777 use StringLiteralToken as S;
1778 use StringLiteral as L;
1779
1780 test(
1781 data(r#"[.foo].bar { "foo": [2][0] }"#),
1782 vec![
1783 ("~ ", LQuery),
1784 ("~ ", LBracket),
1785 (" ~ ", LQuery),
1786 (" ~ ", Dot),
1787 (" ~~~ ", Identifier("foo")),
1788 (" ~ ", RQuery),
1789 (" ~ ", RBracket),
1790 (" ~ ", Dot),
1791 (" ~~~ ", Identifier("bar")),
1792 (" ~ ", RQuery),
1793 (" ~ ", LBrace),
1794 (" ~~~~~ ", L(S("foo"))),
1795 (" ~ ", Colon),
1796 (" ~ ", LQuery),
1797 (" ~ ", LBracket),
1798 (" ~ ", IntegerLiteral(2)),
1799 (" ~ ", RBracket),
1800 (" ~ ", LBracket),
1801 (" ~ ", IntegerLiteral(0)),
1802 (" ~ ", RBracket),
1803 (" ~ ", RQuery),
1804 (" ~", RBrace),
1805 ],
1806 );
1807 }
1808
1809 #[test]
1810 fn complex_query_1() {
1811 use StringLiteral as L;
1812 use StringLiteralToken as S;
1813
1814 test(
1815 data(r#".a.(b | c )."d\"e"[2 ][ 1]"#),
1816 vec![
1817 ("~ ", LQuery),
1818 ("~ ", Dot),
1819 (" ~ ", Identifier("a")),
1820 (" ~ ", Dot),
1821 (" ~ ", LParen),
1822 (" ~ ", Identifier("b")),
1823 (" ~ ", Operator("|")),
1824 (" ~ ", Identifier("c")),
1825 (" ~ ", RParen),
1826 (" ~ ", Dot),
1827 (" ~~~~~~ ", L(S("d\\\"e"))),
1828 (" ~ ", LBracket),
1829 (" ~ ", IntegerLiteral(2)),
1830 (" ~ ", RBracket),
1831 (" ~ ", LBracket),
1832 (" ~ ", IntegerLiteral(1)),
1833 (" ~", RBracket),
1834 ],
1835 );
1836 }
1837
1838 #[test]
1839 #[rustfmt::skip]
1840 fn complex_query_2() {
1841 use StringLiteralToken as S;
1842 use StringLiteral as L;
1843
1844 test(
1845 data(r#"{ "a": parse_json!("{ \"b\": 0 }").c }"#),
1846 vec![
1847 ("~ ", LBrace),
1848 (" ~~~ ", L(S("a"))),
1849 (" ~ ", Colon),
1850 (" ~ ", LQuery),
1851 (" ~~~~~~~~~~ ", FunctionCall("parse_json")),
1852 (" ~ ", Bang),
1853 (" ~ ", LParen),
1854 (" ~~~~~~~~~~~~~~ ", L(S("{ \\\"b\\\": 0 }"))),
1855 (" ~ ", RParen),
1856 (" ~ ", Dot),
1857 (" ~ ", Identifier("c")),
1858 (" ~ ", RQuery),
1859 (" ~", RBrace),
1860 ],
1861 );
1862 }
1863
1864 #[test]
1865 #[rustfmt::skip]
1866 fn query_with_literals() {
1867 use StringLiteralToken as S;
1868 use RawStringLiteralToken as RS;
1869 use StringLiteral as L;
1870 use RawStringLiteral as R;
1871
1872 test(
1873 data(r#"{ "a": r'b?c', "d": s'"e"\'f', "g": t'1.0T0' }.h"#),
1874 vec![
1875 ("~ ", LQuery),
1876 ("~ ", LBrace),
1877 (" ~~~ ", L(S("a"))),
1878 (" ~ ", Colon),
1879 (" ~~~~~~ ", RegexLiteral("b?c")),
1880 (" ~ ", Comma),
1881 (" ~~~ ", L(S("d"))),
1882 (" ~ ", Colon),
1883 (" ~~~~~~~~~ ", R(RS("\"e\"\\\'f"))),
1884 (" ~ ", Comma),
1885 (" ~~~ ", L(S("g"))),
1886 (" ~ ", Colon),
1887 (" ~~~~~~~~ ", TimestampLiteral("1.0T0")),
1888 (" ~ ", RBrace),
1889 (" ~ ", Dot),
1890 (" ~", Identifier("h")),
1891 (" ~", RQuery),
1892 ],
1893 );
1894 }
1895
1896 #[test]
1897 fn variable_queries() {
1898 test(
1899 data("foo.bar foo[2]"),
1900 vec![
1901 ("~ ", LQuery),
1902 ("~~~ ", Identifier("foo")),
1903 (" ~ ", Dot),
1904 (" ~~~ ", Identifier("bar")),
1905 (" ~ ", RQuery),
1906 (" ~ ", LQuery),
1907 (" ~~~ ", Identifier("foo")),
1908 (" ~ ", LBracket),
1909 (" ~ ", IntegerLiteral(2)),
1910 (" ~", RBracket),
1911 (" ~", RQuery),
1912 ],
1913 );
1914 }
1915
1916 #[test]
1917 fn object_queries() {
1918 use StringLiteral as L;
1919 use StringLiteralToken as S;
1920
1921 test(
1922 data(r#"{ "foo": "bar" }.foo"#),
1923 vec![
1924 ("~ ", LQuery),
1925 ("~ ", LBrace),
1926 (" ~~~~~ ", L(S("foo"))),
1927 (" ~ ", Colon),
1928 (" ~~~~~ ", L(S("bar"))),
1929 (" ~ ", RBrace),
1930 (" ~ ", Dot),
1931 (" ~~~", Identifier("foo")),
1932 (" ~", RQuery),
1933 ],
1934 );
1935 }
1936
1937 #[test]
1938 fn array_queries() {
1939 test(
1940 data("[ 1, 2 , 3].foo"),
1941 vec![
1942 ("~ ", LQuery),
1943 ("~ ", LBracket),
1944 (" ~ ", IntegerLiteral(1)),
1945 (" ~ ", Comma),
1946 (" ~ ", IntegerLiteral(2)),
1947 (" ~ ", Comma),
1948 (" ~ ", IntegerLiteral(3)),
1949 (" ~ ", RBracket),
1950 (" ~ ", Dot),
1951 (" ~~~", Identifier("foo")),
1952 (" ~", RQuery),
1953 ],
1954 );
1955 }
1956
1957 #[test]
1958 fn function_call_queries() {
1959 use StringLiteral as L;
1960 use StringLiteralToken as S;
1961
1962 test(
1963 data(r#"foo(ab: "c")[2].d"#),
1964 vec![
1965 ("~ ", LQuery),
1966 ("~~~ ", FunctionCall("foo")),
1967 (" ~ ", LParen),
1968 (" ~~ ", Identifier("ab")),
1969 (" ~ ", Colon),
1970 (" ~~~ ", L(S("c"))),
1971 (" ~ ", RParen),
1972 (" ~ ", LBracket),
1973 (" ~ ", IntegerLiteral(2)),
1974 (" ~ ", RBracket),
1975 (" ~ ", Dot),
1976 (" ~", Identifier("d")),
1977 (" ~", RQuery),
1978 ],
1979 );
1980 }
1981
1982 #[test]
1983 fn queries_in_array() {
1984 test(
1985 data("[foo[0]]"),
1986 vec![
1987 ("~ ", LBracket),
1988 (" ~ ", LQuery),
1989 (" ~~~ ", Identifier("foo")),
1990 (" ~ ", LBracket),
1991 (" ~ ", IntegerLiteral(0)),
1992 (" ~ ", RBracket),
1993 (" ~ ", RQuery),
1994 (" ~", RBracket),
1995 ],
1996 );
1997 }
1998
1999 #[test]
2000 fn queries_op() {
2001 test(
2002 data(".a + 3 .b == true"),
2003 vec![
2004 ("~ ", LQuery),
2005 ("~ ", Dot),
2006 (" ~ ", Identifier("a")),
2007 (" ~ ", RQuery),
2008 (" ~ ", Operator("+")),
2009 (" ~ ", IntegerLiteral(3)),
2010 (" ~ ", LQuery),
2011 (" ~ ", Dot),
2012 (" ~ ", Identifier("b")),
2013 (" ~ ", RQuery),
2014 (" ~~ ", Operator("==")),
2015 (" ~~~~", True),
2016 ],
2017 );
2018 }
2019
2020 #[test]
2021 fn invalid_queries() {
2022 test(
2023 data(".foo.\n"),
2024 vec![
2025 ("~ ", LQuery),
2026 ("~ ", Dot),
2027 (" ~~~ ", Identifier("foo")),
2028 (" ~ ", Dot),
2029 (" ~ ", RQuery),
2030 (" ~ ", Newline),
2031 ],
2032 );
2033 }
2034
2035 #[test]
2036 fn queries_in_multiline() {
2037 test(
2038 data(".foo\n.bar = true"),
2039 vec![
2040 ("~ ", LQuery),
2041 ("~ ", Dot),
2042 (" ~~~ ", Identifier("foo")),
2043 (" ~ ", RQuery),
2044 (" ~ ", Newline),
2045 (" ~ ", LQuery),
2046 (" ~ ", Dot),
2047 (" ~~~ ", Identifier("bar")),
2048 (" ~ ", RQuery),
2049 (" ~ ", Equals),
2050 (" ~~~~", True),
2051 ],
2052 );
2053 }
2054
2055 #[test]
2056 #[rustfmt::skip]
2057 fn quoted_path_queries() {
2058 use StringLiteralToken as S;
2059 use StringLiteral as L;
2060
2061 test(
2062 data(r#"."parent.key.with.special characters".child"#),
2063 vec![
2064 ("~ ", LQuery),
2065 ("~ ", Dot),
2066 (" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ", L(S("parent.key.with.special characters"))),
2067 (" ~ ", Dot),
2068 (" ~~~~~", Identifier("child")),
2069 (" ~", RQuery),
2070 ],
2071 );
2072 }
2073
2074 #[test]
2075 fn queries_digit_path() {
2076 test(
2077 data(".0foo foo.00_7bar.tar"),
2078 vec![
2079 ("~ ", LQuery),
2080 ("~ ", Dot),
2081 (" ~~~~ ", Identifier("0foo")),
2082 (" ~ ", RQuery),
2083 (" ~ ", LQuery),
2084 (" ~~~ ", Identifier("foo")),
2085 (" ~ ", Dot),
2086 (" ~~~~~~~ ", Identifier("00_7bar")),
2087 (" ~ ", Dot),
2088 (" ~~~", Identifier("tar")),
2089 (" ~", RQuery),
2090 ],
2091 );
2092 }
2093
2094 #[test]
2095 fn queries_nested_delims() {
2096 use StringLiteral as L;
2097 use StringLiteralToken as S;
2098
2099 test(
2100 data(r#"{ "foo": [true] }.foo[0]"#),
2101 vec![
2102 ("~ ", LQuery),
2103 ("~ ", LBrace),
2104 (" ~~~~~ ", L(S("foo"))),
2105 (" ~ ", Colon),
2106 (" ~ ", LBracket),
2107 (" ~~~~ ", True),
2108 (" ~ ", RBracket),
2109 (" ~ ", RBrace),
2110 (" ~ ", Dot),
2111 (" ~~~ ", Identifier("foo")),
2112 (" ~ ", LBracket),
2113 (" ~ ", IntegerLiteral(0)),
2114 (" ~", RBracket),
2115 (" ~", RQuery),
2116 ],
2117 );
2118 }
2119
2120 #[test]
2121 fn queries_negative_index() {
2122 test(
2123 data("v[-1] = 2"),
2124 vec![
2125 ("~ ", LQuery),
2126 ("~ ", Identifier("v")),
2127 (" ~ ", LBracket),
2128 (" ~~ ", IntegerLiteral(-1)),
2129 (" ~ ", RBracket),
2130 (" ~ ", RQuery),
2131 (" ~ ", Equals),
2132 (" ~", IntegerLiteral(2)),
2133 ],
2134 );
2135 }
2136
2137 #[test]
2138 fn multi_byte_character_1() {
2139 use RawStringLiteral as R;
2140 use RawStringLiteralToken as RS;
2141
2142 test(
2143 data("a * s'漢字' * a"),
2144 vec![
2145 ("~ ", Identifier("a")),
2146 (" ~ ", Operator("*")),
2147 (" ~~~~~~~~~ ", R(RS("漢字"))),
2148 (" ~ ", Operator("*")),
2149 (" ~", Identifier("a")),
2150 ],
2151 );
2152 }
2153
2154 #[test]
2155 fn multi_byte_character_2() {
2156 use RawStringLiteral as R;
2157 use RawStringLiteralToken as RS;
2158
2159 test(
2160 data("a * s'¡' * a"),
2161 vec![
2162 ("~ ", Identifier("a")),
2163 (" ~ ", Operator("*")),
2164 (" ~~~~~ ", R(RS("¡"))),
2165 (" ~ ", Operator("*")),
2166 (" ~", Identifier("a")),
2167 ],
2168 );
2169 }
2170
2171 #[test]
2172 fn comment_in_block() {
2173 test(
2174 data("if x {\n # It's an apostrophe.\n 3\n}"),
2175 vec![
2176 ("~~ ", If),
2177 (" ~ ", Identifier("x")),
2178 (" ~ ", LBrace),
2179 (" ~ ", Newline),
2180 (" ~ ", Newline),
2181 (" ~ ", IntegerLiteral(3)),
2182 (" ~ ", Newline),
2183 (" ~", RBrace),
2184 ],
2185 );
2186 }
2187
2188 #[test]
2189 fn unescape_string_literal() {
2190 let string = StringLiteralToken("zork {{ zonk }} zoog");
2191 assert_eq!(
2192 TemplateString(vec![
2193 StringSegment::Literal("zork ".to_string(), Span::new(1, 6)),
2194 StringSegment::Template("zonk".to_string(), Span::new(6, 16)),
2195 StringSegment::Literal(" zoog".to_string(), Span::new(16, 21)),
2196 ]),
2197 string.template(Span::new(0, 22))
2198 );
2199 }
2200
2201 #[test]
2202 fn function_closure_no_arg() {
2203 test(
2204 data("foo() -> || {}"),
2205 vec![
2206 ("~~~ ", FunctionCall("foo")),
2207 (" ~ ", LParen),
2208 (" ~ ", RParen),
2209 (" ~~ ", Arrow),
2210 (" ~~ ", Operator("||")),
2211 (" ~ ", LBrace),
2212 (" ~", RBrace),
2213 ],
2214 );
2215 }
2216
2217 #[test]
2218 fn function_closure_single_arg() {
2219 test(
2220 data("foo() -> |idx| { idx }"),
2221 vec![
2222 ("~~~ ", FunctionCall("foo")),
2223 (" ~ ", LParen),
2224 (" ~ ", RParen),
2225 (" ~~ ", Arrow),
2226 (" ~ ", Operator("|")),
2227 (" ~~~ ", Identifier("idx")),
2228 (" ~ ", Operator("|")),
2229 (" ~ ", LBrace),
2230 (" ~~~ ", Identifier("idx")),
2231 (" ~", RBrace),
2232 ],
2233 );
2234 }
2235
2236 #[test]
2237 fn function_closure_args() {
2238 test(
2239 data("foo() -> |i, v| { v }"),
2240 vec![
2241 ("~~~ ", FunctionCall("foo")),
2242 (" ~ ", LParen),
2243 (" ~ ", RParen),
2244 (" ~~ ", Arrow),
2245 (" ~ ", Operator("|")),
2246 (" ~ ", Identifier("i")),
2247 (" ~ ", Comma),
2248 (" ~ ", Identifier("v")),
2249 (" ~ ", Operator("|")),
2250 (" ~ ", LBrace),
2251 (" ~ ", Identifier("v")),
2252 (" ~", RBrace),
2253 ],
2254 );
2255 }
2256}