A tool to compile SQL to Elasticsearch queries
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

lexer.rs 4.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. use lazy_static::lazy_static;
  2. use regex::Regex;
  3. use crate::error::KappeError;
  4. use crate::token::{Token, TokenType};
  5. #[derive(Debug)]
  6. struct Matcher {
  7. regex: Regex,
  8. token_type: TokenType,
  9. }
  10. impl Matcher {
  11. pub fn new(regex: &str, token_type: TokenType) -> Self {
  12. Self {
  13. regex: Regex::new(regex).unwrap(),
  14. token_type,
  15. }
  16. }
  17. }
  18. lazy_static! {
  19. static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s").unwrap();
  20. static ref MATCHERS: Vec<Matcher> = vec![
  21. Matcher::new(r#"^"(.*)""#, TokenType::String),
  22. Matcher::new(r#"^SELECT"#, TokenType::Select),
  23. Matcher::new(r#"^FROM"#, TokenType::From),
  24. Matcher::new(r#"^[a-z][a-zA-Z_\-.*]*"#, TokenType::Identfiier),
  25. Matcher::new(r#"^[0-9]+"#, TokenType::Number),
  26. Matcher::new(r#"^\*"#, TokenType::Star),
  27. Matcher::new(r#"^,"#, TokenType::Comma),
  28. ];
  29. }
  30. pub fn scan(input: &str) -> Result<Vec<Token>, KappeError> {
  31. let mut tokens: Vec<Token> = vec![];
  32. let mut position = 0;
  33. while position < input.len() {
  34. while WHITESPACE_REGEX.is_match(&input[position..position + 1]) {
  35. position += 1;
  36. }
  37. let mut matched = false;
  38. for matcher in MATCHERS.iter() {
  39. if matcher.regex.is_match(&input[position..]) {
  40. if let Some(m) = matcher.regex.captures_iter(&input[position..]).next() {
  41. let value = if m.len() > 1 { &m[1] } else { &m[0] };
  42. position += value.len();
  43. if matcher.token_type == TokenType::String {
  44. position += 2
  45. };
  46. tokens.push(Token::new(matcher.token_type, value));
  47. }
  48. matched = true;
  49. break;
  50. }
  51. }
  52. if !matched {
  53. return Err(KappeError::new("Unrecognized sequence"));
  54. }
  55. }
  56. Ok(tokens)
  57. }
  58. #[cfg(test)]
  59. mod tests {
  60. use super::*;
  61. #[test]
  62. fn it_scans_a_number() {
  63. assert_eq!(
  64. scan("123").unwrap(),
  65. vec![Token::new(TokenType::Number, "123")]
  66. )
  67. }
  68. #[test]
  69. fn it_scans_a_string() {
  70. assert_eq!(
  71. scan("\"hello world\"").unwrap(),
  72. vec![Token::new(TokenType::String, "hello world")]
  73. )
  74. }
  75. #[test]
  76. fn it_scans_a_keyword() {
  77. assert_eq!(
  78. scan("SELECT").unwrap(),
  79. vec![Token::new(TokenType::Select, "SELECT")]
  80. )
  81. }
  82. #[test]
  83. fn it_scans_two_keywords() {
  84. assert_eq!(
  85. scan("SELECT FROM").unwrap(),
  86. vec![
  87. Token::new(TokenType::Select, "SELECT"),
  88. Token::new(TokenType::From, "FROM")
  89. ]
  90. )
  91. }
  92. #[test]
  93. fn it_scans_an_identifier() {
  94. assert_eq!(
  95. scan("abc").unwrap(),
  96. vec![Token::new(TokenType::Identfiier, "abc")]
  97. )
  98. }
  99. #[test]
  100. fn it_allows_hyphens_in_identifiers() {
  101. assert_eq!(
  102. scan("abc-def").unwrap(),
  103. vec![Token::new(TokenType::Identfiier, "abc-def")]
  104. )
  105. }
  106. #[test]
  107. fn it_scans_a_star() {
  108. assert_eq!(scan("*").unwrap(), vec![Token::new(TokenType::Star, "*")])
  109. }
  110. #[test]
  111. fn it_scans_a_comma() {
  112. assert_eq!(scan(",").unwrap(), vec![Token::new(TokenType::Comma, ",")])
  113. }
  114. #[test]
  115. fn it_allows_dots_in_identifiers() {
  116. assert_eq!(
  117. scan("foo.bar").unwrap(),
  118. vec![Token::new(TokenType::Identfiier, "foo.bar")]
  119. )
  120. }
  121. #[test]
  122. fn it_allows_stars_in_identifiers() {
  123. assert_eq!(
  124. scan("foo.*").unwrap(),
  125. vec![Token::new(TokenType::Identfiier, "foo.*")]
  126. )
  127. }
  128. #[test]
  129. fn it_scans_a_whole_expression() {
  130. assert_eq!(
  131. scan("SELECT * FROM index").unwrap(),
  132. vec![
  133. Token::new(TokenType::Select, "SELECT"),
  134. Token::new(TokenType::Star, "*"),
  135. Token::new(TokenType::From, "FROM"),
  136. Token::new(TokenType::Identfiier, "index"),
  137. ]
  138. )
  139. }
  140. }