123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 |
- use lazy_static::lazy_static;
- use regex::Regex;
-
- use crate::error::LexerError;
- use crate::token::{Token, TokenType};
-
- #[derive(Debug)]
- struct Matcher {
- regex: Regex,
- token_type: TokenType,
- }
-
- impl Matcher {
- pub fn new(regex: &str, token_type: TokenType) -> Self {
- Self {
- regex: Regex::new(regex).unwrap(),
- token_type,
- }
- }
- }
-
- lazy_static! {
- static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s").unwrap();
- static ref MATCHERS: Vec<Matcher> = vec![
- Matcher::new(r#""(.*)""#, TokenType::String),
- Matcher::new(r#"SELECT"#, TokenType::Select),
- Matcher::new(r#"FROM"#, TokenType::From),
- Matcher::new(r#"[a-z][a-zA-Z_\-]*"#, TokenType::Identfiier),
- Matcher::new(r#"[0-9]+"#, TokenType::Number),
- Matcher::new(r#"\*"#, TokenType::Star),
- ];
- }
-
- pub fn scan(input: &str) -> Result<Vec<Token>, LexerError> {
- let mut tokens: Vec<Token> = vec![];
- let mut position = 0;
-
- while position < input.len() {
- while WHITESPACE_REGEX.is_match(&input[position..position + 1]) {
- position += 1;
- }
-
- for matcher in MATCHERS.iter() {
- if matcher.regex.is_match(&input[position..]) {
- if let Some(m) = matcher.regex.captures_iter(&input[position..]).next() {
- let value = if m.len() > 1 { &m[1] } else { &m[0] };
- position += value.len();
- if matcher.token_type == TokenType::String {
- position += 2
- };
- tokens.push(Token::new(matcher.token_type, value));
- }
-
- break;
- }
- }
- }
-
- Ok(tokens)
- }
-
- #[cfg(test)]
- mod tests {
- use super::*;
-
- #[test]
- fn it_scans_a_number() {
- assert_eq!(
- scan("123").unwrap(),
- vec![Token::new(TokenType::Number, "123")]
- )
- }
-
- #[test]
- fn it_scans_a_string() {
- assert_eq!(
- scan("\"hello world\"").unwrap(),
- vec![Token::new(TokenType::String, "hello world")]
- )
- }
-
- #[test]
- fn it_scans_a_keyword() {
- assert_eq!(
- scan("SELECT").unwrap(),
- vec![Token::new(TokenType::Select, "SELECT")]
- )
- }
-
- #[test]
- fn it_scans_two_keywords() {
- assert_eq!(
- scan("SELECT FROM").unwrap(),
- vec![
- Token::new(TokenType::Select, "SELECT"),
- Token::new(TokenType::From, "FROM")
- ]
- )
- }
-
- #[test]
- fn it_scans_an_identifier() {
- assert_eq!(
- scan("abc").unwrap(),
- vec![Token::new(TokenType::Identfiier, "abc")]
- )
- }
-
- #[test]
- fn it_allows_hyphens_in_identifiers() {
- assert_eq!(
- scan("abc-def").unwrap(),
- vec![Token::new(TokenType::Identfiier, "abc-def")]
- )
- }
-
- #[test]
- fn it_scans_a_star() {
- assert_eq!(scan("*").unwrap(), vec![Token::new(TokenType::Star, "*")])
- }
- }
|