A work-in-progress SQL parser written in TypeScript
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.ts 3.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. import Error, { isError } from "./error";
  2. import Token, { TokenKind } from "./token";
  3. export default class Lexer {
  4. public source: string;
  5. public position: number;
  6. public line: number;
  7. constructor(source: string) {
  8. this.source = source;
  9. this.position = 0;
  10. this.line = 1;
  11. }
  12. public scan(): Token[] | Error {
  13. const tokens = [];
  14. while (!this.atEnd()) {
  15. const result = this.getToken();
  16. if (isError(result)) {
  17. return result;
  18. } else if (result) {
  19. tokens.push(result);
  20. }
  21. }
  22. tokens.push(new Token(TokenKind.EOF, null, this.line));
  23. return tokens;
  24. }
  25. private getToken(): Token | Error | null {
  26. const source = this.source.slice(this.position);
  27. if (source.match(/^select/i)) {
  28. this.advance(6);
  29. return new Token(TokenKind.SELECT, null, this.line);
  30. } else if (source.match(/^where/i)) {
  31. this.advance(5);
  32. return new Token(TokenKind.WHERE, null, this.line);
  33. } else if (source.match(/^from/i)) {
  34. this.advance(4);
  35. return new Token(TokenKind.FROM, null, this.line);
  36. } else if (source.match(/^as/i)) {
  37. this.advance(2);
  38. return new Token(TokenKind.AS, null, this.line);
  39. } else if (source.match(/^\*/)) {
  40. this.advance();
  41. return new Token(TokenKind.STAR, null, this.line);
  42. } else if (source.match(/^=/)) {
  43. this.advance();
  44. return new Token(TokenKind.EQUALS, null, this.line);
  45. } else if (source.match(/^,/)) {
  46. this.advance();
  47. return new Token(TokenKind.COMMA, null, this.line);
  48. } else if (source.match(/^`/)) {
  49. this.advance();
  50. return new Token(TokenKind.BACKTICK, null, this.line);
  51. } else if (source.match(/^\.([^0-9]|$)/)) {
  52. this.advance();
  53. return new Token(TokenKind.DOT, null, this.line);
  54. } else if (source.match(/^;/)) {
  55. this.advance();
  56. return new Token(TokenKind.SEMICOLON, null, this.line);
  57. } else if (source.match(/^[0-9]+(\.[0-9]+)?/)) {
  58. const match = source.match(/^[0-9]+(\.[0-9]+)?/);
  59. if (match) {
  60. const numeric = match[0];
  61. this.advance(numeric.length);
  62. return new Token(TokenKind.NUMBER, numeric, this.line);
  63. }
  64. } else if (source.match(/^\.[0-9]+/)) {
  65. const match = source.match(/^\.[0-9]+/);
  66. if (match) {
  67. const numeric = match[0];
  68. this.advance(numeric.length);
  69. return new Token(TokenKind.NUMBER, numeric, this.line);
  70. }
  71. } else if (source.match(/^[a-zA-Z_][a-zA-Z0-9_]*/)) {
  72. const match = source.match(/^[a-zA-Z_][a-zA-Z0-9_]*/);
  73. if (match) {
  74. const identifier = match[0];
  75. this.advance(identifier.length);
  76. return new Token(TokenKind.IDENTIFIER, identifier, this.line);
  77. }
  78. } else if (source.match(/^\n/)) {
  79. this.advance();
  80. this.nextLine();
  81. return null;
  82. } else if (source.match(/^\s/)) {
  83. this.advance();
  84. return null;
  85. }
  86. return new Error(`Unrecognized character ${source[0]}`, this.line);
  87. }
  88. private advance(step: number = 1) {
  89. this.position += step;
  90. }
  91. private nextLine() {
  92. this.line += 1;
  93. }
  94. private atEnd(): boolean {
  95. return this.position === this.source.length;
  96. }
  97. }