Browse Source

Lexer

main
Dylan Baker 3 years ago
parent
commit
6d3559c04f
4 changed files with 166 additions and 0 deletions
  1. 47
    0
      Cargo.lock
  2. 2
    0
      Cargo.toml
  3. 116
    0
      src/lexer.rs
  4. 1
    0
      src/lib.rs

+ 47
- 0
Cargo.lock View File

@@ -0,0 +1,47 @@
1
+# This file is automatically @generated by Cargo.
2
+# It is not intended for manual editing.
3
+[[package]]
4
+name = "aho-corasick"
5
+version = "0.7.15"
6
+source = "registry+https://github.com/rust-lang/crates.io-index"
7
+checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
8
+dependencies = [
9
+ "memchr",
10
+]
11
+
12
+[[package]]
13
+name = "elastic"
14
+version = "0.1.0"
15
+dependencies = [
16
+ "lazy_static",
17
+ "regex",
18
+]
19
+
20
+[[package]]
21
+name = "lazy_static"
22
+version = "1.4.0"
23
+source = "registry+https://github.com/rust-lang/crates.io-index"
24
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
25
+
26
+[[package]]
27
+name = "memchr"
28
+version = "2.3.4"
29
+source = "registry+https://github.com/rust-lang/crates.io-index"
30
+checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
31
+
32
+[[package]]
33
+name = "regex"
34
+version = "1.4.5"
35
+source = "registry+https://github.com/rust-lang/crates.io-index"
36
+checksum = "957056ecddbeba1b26965114e191d2e8589ce74db242b6ea25fc4062427a5c19"
37
+dependencies = [
38
+ "aho-corasick",
39
+ "memchr",
40
+ "regex-syntax",
41
+]
42
+
43
+[[package]]
44
+name = "regex-syntax"
45
+version = "0.6.23"
46
+source = "registry+https://github.com/rust-lang/crates.io-index"
47
+checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548"

+ 2
- 0
Cargo.toml View File

@@ -7,3 +7,5 @@ edition = "2018"
7 7
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
8 8
 
9 9
 [dependencies]
10
+lazy_static = "1.4.0"
11
+regex = "1.4.5"

+ 116
- 0
src/lexer.rs View File

@@ -0,0 +1,116 @@
1
+use lazy_static::lazy_static;
2
+use regex::Regex;
3
+
4
+#[derive(Debug, PartialEq)]
5
+pub struct LexerError {
6
+    message: String,
7
+}
8
+
9
+#[derive(Debug, PartialEq)]
10
+pub struct Token {
11
+    token_type: TokenType,
12
+    value: String,
13
+}
14
+
15
+impl Token {
16
+    pub fn new(token_type: TokenType, value: &str) -> Self {
17
+        Token {
18
+            token_type,
19
+            value: value.to_string(),
20
+        }
21
+    }
22
+}
23
+
24
+#[derive(Clone, Copy, Debug, PartialEq)]
25
+pub enum TokenType {
26
+    Identfiier,
27
+    Keyword,
28
+    Number,
29
+    String,
30
+}
31
+
32
+#[derive(Debug)]
33
+struct Matcher {
34
+    regex: Regex,
35
+    token_type: TokenType,
36
+}
37
+
38
+impl Matcher {
39
+    pub fn new(regex: &str, token_type: TokenType) -> Self {
40
+        Self {
41
+            regex: Regex::new(regex).unwrap(),
42
+            token_type,
43
+        }
44
+    }
45
+}
46
+
47
+lazy_static! {
48
+    static ref MATCHERS: Vec<Matcher> = vec![
49
+        Matcher::new(r#""(.*)""#, TokenType::String),
50
+        Matcher::new(r#"SELECT"#, TokenType::Keyword),
51
+        Matcher::new(r#"[a-z][a-zA-Z_]*"#, TokenType::Identfiier),
52
+        Matcher::new(r#"[0-9]+"#, TokenType::Number)
53
+    ];
54
+}
55
+
56
+pub fn scan(input: &str) -> Result<Vec<Token>, LexerError> {
57
+    let mut tokens: Vec<Token> = vec![];
58
+    let mut position = 0;
59
+
60
+    while position < input.len() {
61
+        for matcher in MATCHERS.iter() {
62
+            if matcher.regex.is_match(&input[position..]) {
63
+                dbg!(&input[position..]);
64
+                if let Some(m) = matcher.regex.captures_iter(&input[position..]).next() {
65
+                    let value = if m.len() > 1 { &m[1] } else { &m[0] };
66
+                    position += value.len();
67
+                    if matcher.token_type == TokenType::String {
68
+                        position += 2
69
+                    };
70
+                    tokens.push(Token::new(matcher.token_type, value));
71
+                }
72
+
73
+                break;
74
+            }
75
+        }
76
+    }
77
+
78
+    Ok(tokens)
79
+}
80
+
81
+#[cfg(test)]
82
+mod tests {
83
+    use super::*;
84
+
85
+    #[test]
86
+    fn it_scans_a_number() {
87
+        assert_eq!(
88
+            scan("123").unwrap(),
89
+            vec![Token::new(TokenType::Number, "123")]
90
+        )
91
+    }
92
+
93
+    #[test]
94
+    fn it_scans_a_string() {
95
+        assert_eq!(
96
+            scan("\"hello world\"").unwrap(),
97
+            vec![Token::new(TokenType::String, "hello world")]
98
+        )
99
+    }
100
+
101
+    #[test]
102
+    fn it_scans_a_keyword() {
103
+        assert_eq!(
104
+            scan("SELECT").unwrap(),
105
+            vec![Token::new(TokenType::Keyword, "SELECT")]
106
+        )
107
+    }
108
+
109
+    #[test]
110
+    fn it_scans_an_identifier() {
111
+        assert_eq!(
112
+            scan("abc").unwrap(),
113
+            vec![Token::new(TokenType::Identfiier, "abc")]
114
+        )
115
+    }
116
+}

+ 1
- 0
src/lib.rs View File

@@ -0,0 +1 @@
1
+mod lexer;

Loading…
Cancel
Save