A work-in-progress SQL parser written in TypeScript
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.ts 3.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. import Error, { isError } from "./error";
  2. import Token, { TokenKind } from "./token";
  3. export default class Lexer {
  4. public source: string;
  5. public position: number;
  6. public line: number;
  7. constructor(source: string) {
  8. this.source = source;
  9. this.position = 0;
  10. this.line = 1;
  11. }
  12. public scan(): Token[] | Error {
  13. const tokens = [];
  14. while (!this.atEnd()) {
  15. const result = this.getToken();
  16. if (isError(result)) {
  17. return result;
  18. } else if (result) {
  19. tokens.push(result);
  20. }
  21. }
  22. tokens.push(new Token(TokenKind.EOF, null, this.line));
  23. return tokens;
  24. }
  25. private getToken(): Token | Error | null {
  26. const source = this.source.slice(this.position);
  27. if (source.match(/^select/i)) {
  28. this.advance(6);
  29. return new Token(TokenKind.SELECT, null, this.line);
  30. } else if (source.match(/^where/i)) {
  31. this.advance(5);
  32. return new Token(TokenKind.WHERE, null, this.line);
  33. } else if (source.match(/^from/i)) {
  34. this.advance(4);
  35. return new Token(TokenKind.FROM, null, this.line);
  36. } else if (source.match(/^as/i)) {
  37. this.advance(2);
  38. return new Token(TokenKind.AS, null, this.line);
  39. } else if (source.match(/^and/i)) {
  40. this.advance(3);
  41. return new Token(TokenKind.AND, null, this.line);
  42. } else if (source.match(/^or/i)) {
  43. this.advance(2);
  44. return new Token(TokenKind.OR, null, this.line);
  45. } else if (source.match(/^\+/)) {
  46. this.advance();
  47. return new Token(TokenKind.PLUS, null, this.line);
  48. } else if (source.match(/^-/)) {
  49. this.advance();
  50. return new Token(TokenKind.MINUS, null, this.line);
  51. } else if (source.match(/^\*/)) {
  52. this.advance();
  53. return new Token(TokenKind.STAR, null, this.line);
  54. } else if (source.match(/^\//)) {
  55. this.advance();
  56. return new Token(TokenKind.SLASH, null, this.line);
  57. } else if (source.match(/^=/)) {
  58. this.advance();
  59. return new Token(TokenKind.EQUALS, null, this.line);
  60. } else if (source.match(/^,/)) {
  61. this.advance();
  62. return new Token(TokenKind.COMMA, null, this.line);
  63. } else if (source.match(/^`/)) {
  64. this.advance();
  65. return new Token(TokenKind.BACKTICK, null, this.line);
  66. } else if (source.match(/^\.([^0-9]|$)/)) {
  67. this.advance();
  68. return new Token(TokenKind.DOT, null, this.line);
  69. } else if (source.match(/^;/)) {
  70. this.advance();
  71. return new Token(TokenKind.SEMICOLON, null, this.line);
  72. } else if (source.match(/^\(/)) {
  73. this.advance();
  74. return new Token(TokenKind.LPAREN, null, this.line);
  75. } else if (source.match(/^\)/)) {
  76. this.advance();
  77. return new Token(TokenKind.RPAREN, null, this.line);
  78. } else if (source.match(/^[0-9]+(\.[0-9]+)?/)) {
  79. const match = source.match(/^[0-9]+(\.[0-9]+)?/);
  80. if (match) {
  81. const numeric = match[0];
  82. this.advance(numeric.length);
  83. return new Token(TokenKind.NUMBER, numeric, this.line);
  84. }
  85. } else if (source.match(/^\.[0-9]+/)) {
  86. const match = source.match(/^\.[0-9]+/);
  87. if (match) {
  88. const numeric = match[0];
  89. this.advance(numeric.length);
  90. return new Token(TokenKind.NUMBER, numeric, this.line);
  91. }
  92. } else if (source.match(/^[a-zA-Z_][a-zA-Z0-9_]*/)) {
  93. const match = source.match(/^[a-zA-Z_][a-zA-Z0-9_]*/);
  94. if (match) {
  95. const identifier = match[0];
  96. this.advance(identifier.length);
  97. return new Token(TokenKind.IDENTIFIER, identifier, this.line);
  98. }
  99. } else if (source.match(/^\n/)) {
  100. this.advance();
  101. this.nextLine();
  102. return null;
  103. } else if (source.match(/^\s/)) {
  104. this.advance();
  105. return null;
  106. }
  107. return new Error(`Unrecognized character ${source[0]}`, this.line);
  108. }
  109. private advance(step: number = 1) {
  110. this.position += step;
  111. }
  112. private nextLine() {
  113. this.line += 1;
  114. }
  115. private atEnd(): boolean {
  116. return this.position === this.source.length;
  117. }
  118. }