A work-in-progress SQL parser written in TypeScript
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.ts 3.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. import Error, { isError } from "./error";
  2. import Token, { TokenKind } from "./token";
  3. export default class Lexer {
  4. public source: string;
  5. public position: number;
  6. public line: number;
  7. constructor(source: string) {
  8. this.source = source;
  9. this.position = 0;
  10. this.line = 1;
  11. }
  12. public scan(): Token[] | Error {
  13. const tokens = [];
  14. while (!this.atEnd()) {
  15. const result = this.getToken();
  16. if (isError(result)) {
  17. return result;
  18. } else if (result) {
  19. tokens.push(result);
  20. }
  21. }
  22. tokens.push(new Token(TokenKind.EOF, null, this.line));
  23. return tokens;
  24. }
  25. private getToken(): Token | Error | null {
  26. const source = this.source.slice(this.position);
  27. if (source.match(/^select/i)) {
  28. this.advance(6);
  29. return new Token(TokenKind.SELECT, null, this.line);
  30. } else if (source.match(/^where/i)) {
  31. this.advance(5);
  32. return new Token(TokenKind.WHERE, null, this.line);
  33. } else if (source.match(/^from/i)) {
  34. this.advance(4);
  35. return new Token(TokenKind.FROM, null, this.line);
  36. } else if (source.match(/^\*/)) {
  37. this.advance();
  38. return new Token(TokenKind.STAR, null, this.line);
  39. } else if (source.match(/^=/)) {
  40. this.advance();
  41. return new Token(TokenKind.EQUALS, null, this.line);
  42. } else if (source.match(/^,/)) {
  43. this.advance();
  44. return new Token(TokenKind.COMMA, null, this.line);
  45. } else if (source.match(/^`/)) {
  46. this.advance();
  47. return new Token(TokenKind.BACKTICK, null, this.line);
  48. } else if (source.match(/^\.([^0-9]|$)/)) {
  49. this.advance();
  50. return new Token(TokenKind.DOT, null, this.line);
  51. } else if (source.match(/^;/)) {
  52. this.advance();
  53. return new Token(TokenKind.SEMICOLON, null, this.line);
  54. } else if (source.match(/^[0-9]+(\.[0-9]+)?/)) {
  55. const match = source.match(/^[0-9]+(\.[0-9]+)?/);
  56. if (match) {
  57. const numeric = match[0];
  58. this.advance(numeric.length);
  59. return new Token(TokenKind.NUMBER, numeric, this.line);
  60. }
  61. } else if (source.match(/^\.[0-9]+/)) {
  62. const match = source.match(/^\.[0-9]+/);
  63. if (match) {
  64. const numeric = match[0];
  65. this.advance(numeric.length);
  66. return new Token(TokenKind.NUMBER, numeric, this.line);
  67. }
  68. } else if (source.match(/^[a-zA-Z_][a-zA-Z0-9_]*/)) {
  69. const match = source.match(/^[a-zA-Z_][a-zA-Z0-9_]*/);
  70. if (match) {
  71. const identifier = match[0];
  72. this.advance(identifier.length);
  73. return new Token(TokenKind.IDENTIFIER, identifier, this.line);
  74. }
  75. } else if (source.match(/^\n/)) {
  76. this.advance();
  77. this.nextLine();
  78. return null;
  79. } else if (source.match(/^\s/)) {
  80. this.advance();
  81. return null;
  82. }
  83. return new Error(`Unrecognized character ${source[0]}`, this.line);
  84. }
  85. private advance(step: number = 1) {
  86. this.position += step;
  87. }
  88. private nextLine() {
  89. this.line += 1;
  90. }
  91. private atEnd(): boolean {
  92. return this.position === this.source.length;
  93. }
  94. }