A tool to compile SQL to Elasticsearch queries
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

lexer.rs 2.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. use lazy_static::lazy_static;
  2. use regex::Regex;
  3. #[derive(Debug, PartialEq)]
  4. pub struct LexerError {
  5. message: String,
  6. }
  7. #[derive(Debug, PartialEq)]
  8. pub struct Token {
  9. token_type: TokenType,
  10. value: String,
  11. }
  12. impl Token {
  13. pub fn new(token_type: TokenType, value: &str) -> Self {
  14. Token {
  15. token_type,
  16. value: value.to_string(),
  17. }
  18. }
  19. }
  20. #[derive(Clone, Copy, Debug, PartialEq)]
  21. pub enum TokenType {
  22. Identfiier,
  23. Keyword,
  24. Number,
  25. String,
  26. }
  27. #[derive(Debug)]
  28. struct Matcher {
  29. regex: Regex,
  30. token_type: TokenType,
  31. }
  32. impl Matcher {
  33. pub fn new(regex: &str, token_type: TokenType) -> Self {
  34. Self {
  35. regex: Regex::new(regex).unwrap(),
  36. token_type,
  37. }
  38. }
  39. }
  40. lazy_static! {
  41. static ref MATCHERS: Vec<Matcher> = vec![
  42. Matcher::new(r#""(.*)""#, TokenType::String),
  43. Matcher::new(r#"SELECT"#, TokenType::Keyword),
  44. Matcher::new(r#"[a-z][a-zA-Z_]*"#, TokenType::Identfiier),
  45. Matcher::new(r#"[0-9]+"#, TokenType::Number)
  46. ];
  47. }
  48. pub fn scan(input: &str) -> Result<Vec<Token>, LexerError> {
  49. let mut tokens: Vec<Token> = vec![];
  50. let mut position = 0;
  51. while position < input.len() {
  52. for matcher in MATCHERS.iter() {
  53. if matcher.regex.is_match(&input[position..]) {
  54. dbg!(&input[position..]);
  55. if let Some(m) = matcher.regex.captures_iter(&input[position..]).next() {
  56. let value = if m.len() > 1 { &m[1] } else { &m[0] };
  57. position += value.len();
  58. if matcher.token_type == TokenType::String {
  59. position += 2
  60. };
  61. tokens.push(Token::new(matcher.token_type, value));
  62. }
  63. break;
  64. }
  65. }
  66. }
  67. Ok(tokens)
  68. }
  69. #[cfg(test)]
  70. mod tests {
  71. use super::*;
  72. #[test]
  73. fn it_scans_a_number() {
  74. assert_eq!(
  75. scan("123").unwrap(),
  76. vec![Token::new(TokenType::Number, "123")]
  77. )
  78. }
  79. #[test]
  80. fn it_scans_a_string() {
  81. assert_eq!(
  82. scan("\"hello world\"").unwrap(),
  83. vec![Token::new(TokenType::String, "hello world")]
  84. )
  85. }
  86. #[test]
  87. fn it_scans_a_keyword() {
  88. assert_eq!(
  89. scan("SELECT").unwrap(),
  90. vec![Token::new(TokenType::Keyword, "SELECT")]
  91. )
  92. }
  93. #[test]
  94. fn it_scans_an_identifier() {
  95. assert_eq!(
  96. scan("abc").unwrap(),
  97. vec![Token::new(TokenType::Identfiier, "abc")]
  98. )
  99. }
  100. }