Browse Source

Add ^ to all matchers and avoid infinite loop if there's no match

main
Dylan Baker 4 years ago
parent
commit
336803fe81
1 changed files with 28 additions and 6 deletions
  1. 28
    6
      src/lexer.rs

+ 28
- 6
src/lexer.rs View File

22
 lazy_static! {
22
 lazy_static! {
23
     static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s").unwrap();
23
     static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s").unwrap();
24
     static ref MATCHERS: Vec<Matcher> = vec![
24
     static ref MATCHERS: Vec<Matcher> = vec![
25
-        Matcher::new(r#""(.*)""#, TokenType::String),
26
-        Matcher::new(r#"SELECT"#, TokenType::Select),
27
-        Matcher::new(r#"FROM"#, TokenType::From),
28
-        Matcher::new(r#"[a-z][a-zA-Z_\-]*"#, TokenType::Identfiier),
29
-        Matcher::new(r#"[0-9]+"#, TokenType::Number),
30
-        Matcher::new(r#"\*"#, TokenType::Star),
25
+        Matcher::new(r#"^"(.*)""#, TokenType::String),
26
+        Matcher::new(r#"^SELECT"#, TokenType::Select),
27
+        Matcher::new(r#"^FROM"#, TokenType::From),
28
+        Matcher::new(r#"^[a-z][a-zA-Z_\-]*"#, TokenType::Identfiier),
29
+        Matcher::new(r#"^[0-9]+"#, TokenType::Number),
30
+        Matcher::new(r#"^\*"#, TokenType::Star),
31
     ];
31
     ];
32
 }
32
 }
33
 
33
 
40
             position += 1;
40
             position += 1;
41
         }
41
         }
42
 
42
 
43
+        let mut matched = false;
44
+
43
         for matcher in MATCHERS.iter() {
45
         for matcher in MATCHERS.iter() {
44
             if matcher.regex.is_match(&input[position..]) {
46
             if matcher.regex.is_match(&input[position..]) {
45
                 if let Some(m) = matcher.regex.captures_iter(&input[position..]).next() {
47
                 if let Some(m) = matcher.regex.captures_iter(&input[position..]).next() {
46
                     let value = if m.len() > 1 { &m[1] } else { &m[0] };
48
                     let value = if m.len() > 1 { &m[1] } else { &m[0] };
47
                     position += value.len();
49
                     position += value.len();
50
+
48
                     if matcher.token_type == TokenType::String {
51
                     if matcher.token_type == TokenType::String {
49
                         position += 2
52
                         position += 2
50
                     };
53
                     };
54
+
51
                     tokens.push(Token::new(matcher.token_type, value));
55
                     tokens.push(Token::new(matcher.token_type, value));
52
                 }
56
                 }
53
 
57
 
58
+                matched = true;
54
                 break;
59
                 break;
55
             }
60
             }
56
         }
61
         }
62
+
63
+        if !matched {
64
+            return Err(LexerError::new("Unrecognized sequence"));
65
+        }
57
     }
66
     }
58
 
67
 
59
     Ok(tokens)
68
     Ok(tokens)
118
     fn it_scans_a_star() {
127
     fn it_scans_a_star() {
119
         assert_eq!(scan("*").unwrap(), vec![Token::new(TokenType::Star, "*")])
128
         assert_eq!(scan("*").unwrap(), vec![Token::new(TokenType::Star, "*")])
120
     }
129
     }
130
+
131
+    #[test]
132
+    fn it_scans_a_whole_expression() {
133
+        assert_eq!(
134
+            scan("SELECT * FROM index").unwrap(),
135
+            vec![
136
+                Token::new(TokenType::Select, "SELECT"),
137
+                Token::new(TokenType::Star, "*"),
138
+                Token::new(TokenType::From, "FROM"),
139
+                Token::new(TokenType::Identfiier, "index"),
140
+            ]
141
+        )
142
+    }
121
 }
143
 }

Loading…
Cancel
Save