123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116 |
- use lazy_static::lazy_static;
- use regex::Regex;
-
- #[derive(Debug, PartialEq)]
- pub struct LexerError {
- message: String,
- }
-
- #[derive(Debug, PartialEq)]
- pub struct Token {
- token_type: TokenType,
- value: String,
- }
-
- impl Token {
- pub fn new(token_type: TokenType, value: &str) -> Self {
- Token {
- token_type,
- value: value.to_string(),
- }
- }
- }
-
- #[derive(Clone, Copy, Debug, PartialEq)]
- pub enum TokenType {
- Identfiier,
- Keyword,
- Number,
- String,
- }
-
- #[derive(Debug)]
- struct Matcher {
- regex: Regex,
- token_type: TokenType,
- }
-
- impl Matcher {
- pub fn new(regex: &str, token_type: TokenType) -> Self {
- Self {
- regex: Regex::new(regex).unwrap(),
- token_type,
- }
- }
- }
-
- lazy_static! {
- static ref MATCHERS: Vec<Matcher> = vec![
- Matcher::new(r#""(.*)""#, TokenType::String),
- Matcher::new(r#"SELECT"#, TokenType::Keyword),
- Matcher::new(r#"[a-z][a-zA-Z_]*"#, TokenType::Identfiier),
- Matcher::new(r#"[0-9]+"#, TokenType::Number)
- ];
- }
-
- pub fn scan(input: &str) -> Result<Vec<Token>, LexerError> {
- let mut tokens: Vec<Token> = vec![];
- let mut position = 0;
-
- while position < input.len() {
- for matcher in MATCHERS.iter() {
- if matcher.regex.is_match(&input[position..]) {
- dbg!(&input[position..]);
- if let Some(m) = matcher.regex.captures_iter(&input[position..]).next() {
- let value = if m.len() > 1 { &m[1] } else { &m[0] };
- position += value.len();
- if matcher.token_type == TokenType::String {
- position += 2
- };
- tokens.push(Token::new(matcher.token_type, value));
- }
-
- break;
- }
- }
- }
-
- Ok(tokens)
- }
-
- #[cfg(test)]
- mod tests {
- use super::*;
-
- #[test]
- fn it_scans_a_number() {
- assert_eq!(
- scan("123").unwrap(),
- vec![Token::new(TokenType::Number, "123")]
- )
- }
-
- #[test]
- fn it_scans_a_string() {
- assert_eq!(
- scan("\"hello world\"").unwrap(),
- vec![Token::new(TokenType::String, "hello world")]
- )
- }
-
- #[test]
- fn it_scans_a_keyword() {
- assert_eq!(
- scan("SELECT").unwrap(),
- vec![Token::new(TokenType::Keyword, "SELECT")]
- )
- }
-
- #[test]
- fn it_scans_an_identifier() {
- assert_eq!(
- scan("abc").unwrap(),
- vec![Token::new(TokenType::Identfiier, "abc")]
- )
- }
- }
|