Browse Source

Add ^ to all matchers and avoid infinite loop if there's no match

main
Dylan Baker 3 years ago
parent
commit
336803fe81
1 changed files with 28 additions and 6 deletions
  1. 28
    6
      src/lexer.rs

+ 28
- 6
src/lexer.rs View File

@@ -22,12 +22,12 @@ impl Matcher {
22 22
 lazy_static! {
23 23
     static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s").unwrap();
24 24
     static ref MATCHERS: Vec<Matcher> = vec![
25
-        Matcher::new(r#""(.*)""#, TokenType::String),
26
-        Matcher::new(r#"SELECT"#, TokenType::Select),
27
-        Matcher::new(r#"FROM"#, TokenType::From),
28
-        Matcher::new(r#"[a-z][a-zA-Z_\-]*"#, TokenType::Identfiier),
29
-        Matcher::new(r#"[0-9]+"#, TokenType::Number),
30
-        Matcher::new(r#"\*"#, TokenType::Star),
25
+        Matcher::new(r#"^"(.*)""#, TokenType::String),
26
+        Matcher::new(r#"^SELECT"#, TokenType::Select),
27
+        Matcher::new(r#"^FROM"#, TokenType::From),
28
+        Matcher::new(r#"^[a-z][a-zA-Z_\-]*"#, TokenType::Identfiier),
29
+        Matcher::new(r#"^[0-9]+"#, TokenType::Number),
30
+        Matcher::new(r#"^\*"#, TokenType::Star),
31 31
     ];
32 32
 }
33 33
 
@@ -40,20 +40,29 @@ pub fn scan(input: &str) -> Result<Vec<Token>, LexerError> {
40 40
             position += 1;
41 41
         }
42 42
 
43
+        let mut matched = false;
44
+
43 45
         for matcher in MATCHERS.iter() {
44 46
             if matcher.regex.is_match(&input[position..]) {
45 47
                 if let Some(m) = matcher.regex.captures_iter(&input[position..]).next() {
46 48
                     let value = if m.len() > 1 { &m[1] } else { &m[0] };
47 49
                     position += value.len();
50
+
48 51
                     if matcher.token_type == TokenType::String {
49 52
                         position += 2
50 53
                     };
54
+
51 55
                     tokens.push(Token::new(matcher.token_type, value));
52 56
                 }
53 57
 
58
+                matched = true;
54 59
                 break;
55 60
             }
56 61
         }
62
+
63
+        if !matched {
64
+            return Err(LexerError::new("Unrecognized sequence"));
65
+        }
57 66
     }
58 67
 
59 68
     Ok(tokens)
@@ -118,4 +127,17 @@ mod tests {
118 127
     fn it_scans_a_star() {
119 128
         assert_eq!(scan("*").unwrap(), vec![Token::new(TokenType::Star, "*")])
120 129
     }
130
+
131
+    #[test]
132
+    fn it_scans_a_whole_expression() {
133
+        assert_eq!(
134
+            scan("SELECT * FROM index").unwrap(),
135
+            vec![
136
+                Token::new(TokenType::Select, "SELECT"),
137
+                Token::new(TokenType::Star, "*"),
138
+                Token::new(TokenType::From, "FROM"),
139
+                Token::new(TokenType::Identfiier, "index"),
140
+            ]
141
+        )
142
+    }
121 143
 }

Loading…
Cancel
Save