A tool to compile SQL to Elasticsearch queries
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.rs 2.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. use lazy_static::lazy_static;
  2. use regex::Regex;
  3. #[derive(Debug, PartialEq)]
  4. pub struct LexerError {
  5. message: String,
  6. }
  7. #[derive(Debug, PartialEq)]
  8. pub struct Token {
  9. token_type: TokenType,
  10. value: String,
  11. }
  12. impl Token {
  13. pub fn new(token_type: TokenType, value: &str) -> Self {
  14. Token {
  15. token_type,
  16. value: value.to_string(),
  17. }
  18. }
  19. }
  20. #[derive(Clone, Copy, Debug, PartialEq)]
  21. pub enum TokenType {
  22. Identfiier,
  23. Keyword,
  24. Number,
  25. String,
  26. }
  27. #[derive(Debug)]
  28. struct Matcher {
  29. regex: Regex,
  30. token_type: TokenType,
  31. }
  32. impl Matcher {
  33. pub fn new(regex: &str, token_type: TokenType) -> Self {
  34. Self {
  35. regex: Regex::new(regex).unwrap(),
  36. token_type,
  37. }
  38. }
  39. }
  40. lazy_static! {
  41. static ref MATCHERS: Vec<Matcher> = vec![
  42. Matcher::new(r#""(.*)""#, TokenType::String),
  43. Matcher::new(r#"SELECT"#, TokenType::Keyword),
  44. Matcher::new(r#"[a-z][a-zA-Z_]*"#, TokenType::Identfiier),
  45. Matcher::new(r#"[0-9]+"#, TokenType::Number)
  46. ];
  47. }
  48. pub fn scan(input: &str) -> Result<Vec<Token>, LexerError> {
  49. let mut tokens: Vec<Token> = vec![];
  50. let mut position = 0;
  51. while position < input.len() {
  52. for matcher in MATCHERS.iter() {
  53. if matcher.regex.is_match(&input[position..]) {
  54. dbg!(&input[position..]);
  55. if let Some(m) = matcher.regex.captures_iter(&input[position..]).next() {
  56. let value = if m.len() > 1 { &m[1] } else { &m[0] };
  57. position += value.len();
  58. if matcher.token_type == TokenType::String {
  59. position += 2
  60. };
  61. tokens.push(Token::new(matcher.token_type, value));
  62. }
  63. break;
  64. }
  65. }
  66. }
  67. Ok(tokens)
  68. }
  69. #[cfg(test)]
  70. mod tests {
  71. use super::*;
  72. #[test]
  73. fn it_scans_a_number() {
  74. assert_eq!(
  75. scan("123").unwrap(),
  76. vec![Token::new(TokenType::Number, "123")]
  77. )
  78. }
  79. #[test]
  80. fn it_scans_a_string() {
  81. assert_eq!(
  82. scan("\"hello world\"").unwrap(),
  83. vec![Token::new(TokenType::String, "hello world")]
  84. )
  85. }
  86. #[test]
  87. fn it_scans_a_keyword() {
  88. assert_eq!(
  89. scan("SELECT").unwrap(),
  90. vec![Token::new(TokenType::Keyword, "SELECT")]
  91. )
  92. }
  93. #[test]
  94. fn it_scans_an_identifier() {
  95. assert_eq!(
  96. scan("abc").unwrap(),
  97. vec![Token::new(TokenType::Identfiier, "abc")]
  98. )
  99. }
  100. }