A tool to compile SQL to Elasticsearch queries
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.rs 3.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. use lazy_static::lazy_static;
  2. use regex::Regex;
  3. use crate::error::LexerError;
  4. use crate::token::{Token, TokenType};
  5. #[derive(Debug)]
  6. struct Matcher {
  7. regex: Regex,
  8. token_type: TokenType,
  9. }
  10. impl Matcher {
  11. pub fn new(regex: &str, token_type: TokenType) -> Self {
  12. Self {
  13. regex: Regex::new(regex).unwrap(),
  14. token_type,
  15. }
  16. }
  17. }
  18. lazy_static! {
  19. static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s").unwrap();
  20. static ref MATCHERS: Vec<Matcher> = vec![
  21. Matcher::new(r#""(.*)""#, TokenType::String),
  22. Matcher::new(r#"SELECT"#, TokenType::Select),
  23. Matcher::new(r#"FROM"#, TokenType::From),
  24. Matcher::new(r#"[a-z][a-zA-Z_\-]*"#, TokenType::Identfiier),
  25. Matcher::new(r#"[0-9]+"#, TokenType::Number),
  26. Matcher::new(r#"\*"#, TokenType::Star),
  27. ];
  28. }
  29. pub fn scan(input: &str) -> Result<Vec<Token>, LexerError> {
  30. let mut tokens: Vec<Token> = vec![];
  31. let mut position = 0;
  32. while position < input.len() {
  33. while WHITESPACE_REGEX.is_match(&input[position..position + 1]) {
  34. position += 1;
  35. }
  36. for matcher in MATCHERS.iter() {
  37. if matcher.regex.is_match(&input[position..]) {
  38. if let Some(m) = matcher.regex.captures_iter(&input[position..]).next() {
  39. let value = if m.len() > 1 { &m[1] } else { &m[0] };
  40. position += value.len();
  41. if matcher.token_type == TokenType::String {
  42. position += 2
  43. };
  44. tokens.push(Token::new(matcher.token_type, value));
  45. }
  46. break;
  47. }
  48. }
  49. }
  50. Ok(tokens)
  51. }
  52. #[cfg(test)]
  53. mod tests {
  54. use super::*;
  55. #[test]
  56. fn it_scans_a_number() {
  57. assert_eq!(
  58. scan("123").unwrap(),
  59. vec![Token::new(TokenType::Number, "123")]
  60. )
  61. }
  62. #[test]
  63. fn it_scans_a_string() {
  64. assert_eq!(
  65. scan("\"hello world\"").unwrap(),
  66. vec![Token::new(TokenType::String, "hello world")]
  67. )
  68. }
  69. #[test]
  70. fn it_scans_a_keyword() {
  71. assert_eq!(
  72. scan("SELECT").unwrap(),
  73. vec![Token::new(TokenType::Select, "SELECT")]
  74. )
  75. }
  76. #[test]
  77. fn it_scans_two_keywords() {
  78. assert_eq!(
  79. scan("SELECT FROM").unwrap(),
  80. vec![
  81. Token::new(TokenType::Select, "SELECT"),
  82. Token::new(TokenType::From, "FROM")
  83. ]
  84. )
  85. }
  86. #[test]
  87. fn it_scans_an_identifier() {
  88. assert_eq!(
  89. scan("abc").unwrap(),
  90. vec![Token::new(TokenType::Identfiier, "abc")]
  91. )
  92. }
  93. #[test]
  94. fn it_allows_hyphens_in_identifiers() {
  95. assert_eq!(
  96. scan("abc-def").unwrap(),
  97. vec![Token::new(TokenType::Identfiier, "abc-def")]
  98. )
  99. }
  100. #[test]
  101. fn it_scans_a_star() {
  102. assert_eq!(scan("*").unwrap(), vec![Token::new(TokenType::Star, "*")])
  103. }
  104. }