use lazy_static::lazy_static; use regex::Regex; #[derive(Debug, PartialEq)] pub struct LexerError { message: String, } #[derive(Debug, PartialEq)] pub struct Token { token_type: TokenType, value: String, } impl Token { pub fn new(token_type: TokenType, value: &str) -> Self { Token { token_type, value: value.to_string(), } } } #[derive(Clone, Copy, Debug, PartialEq)] pub enum TokenType { Identfiier, Keyword, Number, String, } #[derive(Debug)] struct Matcher { regex: Regex, token_type: TokenType, } impl Matcher { pub fn new(regex: &str, token_type: TokenType) -> Self { Self { regex: Regex::new(regex).unwrap(), token_type, } } } lazy_static! { static ref MATCHERS: Vec = vec![ Matcher::new(r#""(.*)""#, TokenType::String), Matcher::new(r#"SELECT"#, TokenType::Keyword), Matcher::new(r#"[a-z][a-zA-Z_]*"#, TokenType::Identfiier), Matcher::new(r#"[0-9]+"#, TokenType::Number) ]; } pub fn scan(input: &str) -> Result, LexerError> { let mut tokens: Vec = vec![]; let mut position = 0; while position < input.len() { for matcher in MATCHERS.iter() { if matcher.regex.is_match(&input[position..]) { dbg!(&input[position..]); if let Some(m) = matcher.regex.captures_iter(&input[position..]).next() { let value = if m.len() > 1 { &m[1] } else { &m[0] }; position += value.len(); if matcher.token_type == TokenType::String { position += 2 }; tokens.push(Token::new(matcher.token_type, value)); } break; } } } Ok(tokens) } #[cfg(test)] mod tests { use super::*; #[test] fn it_scans_a_number() { assert_eq!( scan("123").unwrap(), vec![Token::new(TokenType::Number, "123")] ) } #[test] fn it_scans_a_string() { assert_eq!( scan("\"hello world\"").unwrap(), vec![Token::new(TokenType::String, "hello world")] ) } #[test] fn it_scans_a_keyword() { assert_eq!( scan("SELECT").unwrap(), vec![Token::new(TokenType::Keyword, "SELECT")] ) } #[test] fn it_scans_an_identifier() { assert_eq!( scan("abc").unwrap(), vec![Token::new(TokenType::Identfiier, "abc")] ) } }