A work-in-progress SQL parser written in TypeScript
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

lexer.ts 3.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. import Error, { isError } from "./error";
  2. import Token, { TokenKind } from "./token";
  3. export default class Lexer {
  4. public source: string;
  5. public position: number;
  6. public line: number;
  7. constructor(source: string) {
  8. this.source = source;
  9. this.position = 0;
  10. this.line = 1;
  11. }
  12. public scan(): Token[] | Error {
  13. const tokens = [];
  14. while (!this.atEnd()) {
  15. const result = this.getToken();
  16. if (isError(result)) {
  17. return result;
  18. } else if (result) {
  19. tokens.push(result);
  20. }
  21. }
  22. tokens.push(new Token(TokenKind.EOF, null, this.line));
  23. return tokens;
  24. }
  25. private getToken(): Token | Error | null {
  26. const source = this.source.slice(this.position);
  27. if (source.match(/^select/i)) {
  28. this.advance(6);
  29. return new Token(TokenKind.SELECT, null, this.line);
  30. } else if (source.match(/^where/i)) {
  31. this.advance(5);
  32. return new Token(TokenKind.WHERE, null, this.line);
  33. } else if (source.match(/^from/i)) {
  34. this.advance(4);
  35. return new Token(TokenKind.FROM, null, this.line);
  36. } else if (source.match(/^as/i)) {
  37. this.advance(2);
  38. return new Token(TokenKind.AS, null, this.line);
  39. } else if (source.match(/^and/i)) {
  40. this.advance(3);
  41. return new Token(TokenKind.AND, null, this.line);
  42. } else if (source.match(/^or/i)) {
  43. this.advance(2);
  44. return new Token(TokenKind.OR, null, this.line);
  45. } else if (source.match(/^\+/)) {
  46. this.advance();
  47. return new Token(TokenKind.PLUS, null, this.line);
  48. } else if (source.match(/^-/)) {
  49. this.advance();
  50. return new Token(TokenKind.MINUS, null, this.line);
  51. } else if (source.match(/^\*/)) {
  52. this.advance();
  53. return new Token(TokenKind.STAR, null, this.line);
  54. } else if (source.match(/^\//)) {
  55. this.advance();
  56. return new Token(TokenKind.SLASH, null, this.line);
  57. } else if (source.match(/^=/)) {
  58. this.advance();
  59. return new Token(TokenKind.EQUALS, null, this.line);
  60. } else if (source.match(/^,/)) {
  61. this.advance();
  62. return new Token(TokenKind.COMMA, null, this.line);
  63. } else if (source.match(/^`/)) {
  64. this.advance();
  65. return new Token(TokenKind.BACKTICK, null, this.line);
  66. } else if (source.match(/^\.([^0-9]|$)/)) {
  67. this.advance();
  68. return new Token(TokenKind.DOT, null, this.line);
  69. } else if (source.match(/^;/)) {
  70. this.advance();
  71. return new Token(TokenKind.SEMICOLON, null, this.line);
  72. } else if (source.match(/^[0-9]+(\.[0-9]+)?/)) {
  73. const match = source.match(/^[0-9]+(\.[0-9]+)?/);
  74. if (match) {
  75. const numeric = match[0];
  76. this.advance(numeric.length);
  77. return new Token(TokenKind.NUMBER, numeric, this.line);
  78. }
  79. } else if (source.match(/^\.[0-9]+/)) {
  80. const match = source.match(/^\.[0-9]+/);
  81. if (match) {
  82. const numeric = match[0];
  83. this.advance(numeric.length);
  84. return new Token(TokenKind.NUMBER, numeric, this.line);
  85. }
  86. } else if (source.match(/^[a-zA-Z_][a-zA-Z0-9_]*/)) {
  87. const match = source.match(/^[a-zA-Z_][a-zA-Z0-9_]*/);
  88. if (match) {
  89. const identifier = match[0];
  90. this.advance(identifier.length);
  91. return new Token(TokenKind.IDENTIFIER, identifier, this.line);
  92. }
  93. } else if (source.match(/^\n/)) {
  94. this.advance();
  95. this.nextLine();
  96. return null;
  97. } else if (source.match(/^\s/)) {
  98. this.advance();
  99. return null;
  100. }
  101. return new Error(`Unrecognized character ${source[0]}`, this.line);
  102. }
  103. private advance(step: number = 1) {
  104. this.position += step;
  105. }
  106. private nextLine() {
  107. this.line += 1;
  108. }
  109. private atEnd(): boolean {
  110. return this.position === this.source.length;
  111. }
  112. }