import { Error } from './error'; import Token, { TokenTypes } from './token'; export interface LexerError {} export class LexerError implements Error { public line: number; public message: string; public module?: string; public constructor(line: number, message: string, module?: string) { this.line = line; this.message = message; this.module = module; } } export type LexerResult = { tokens: Token[]; module?: string } | LexerError; export default class Lexer { private source: string; private module?: string; private line: number; private position: number; private tokens: Token[]; private allowWhitespace: boolean; public constructor(source: string, module?: string) { this.source = source; this.module = module; this.line = 1; this.position = 0; this.tokens = []; this.allowWhitespace = true; } public scan(): LexerResult { while (this.position < this.source.length) { const result = this.getToken(); if (!(result instanceof Token)) return result; if ( result.type !== TokenTypes.WHITESPACE && result.type !== TokenTypes.COMMENT ) { this.tokens.push(result); } if (result.type !== TokenTypes.LITERAL) { this.position += result.value.length; } if (this.hasSigil(result)) { this.position += 1; } } this.tokens.push(this.token(TokenTypes.EOF, 'eof')); return { tokens: this.tokens, module: this.module, }; } private getToken(): Token | LexerError { const c = this.currentChar(); const source = this.source.slice(this.position); if (c === '(') { this.allowWhitespace = true; return this.token(TokenTypes.LPAREN, '('); } else if (c === ')') { this.allowWhitespace = true; return this.token(TokenTypes.RPAREN, ')'); } else if (c === ',') { return this.token(TokenTypes.COMMA, ','); } else if (c === ':') { const match = source.match(/^:([a-z][a-zA-Z0-9_-]*)/); if (match === null) { return this.error( `Unexpected character ${this.source[this.position + 1]}` ); } return this.token(TokenTypes.PROPERTY, match[1]); } else if (c === '$') { const match = source.match(/^\$([a-z][a-zA-Z0-9_-]*)/); if (match === null) { return this.error( `Unexpected character ${this.source[this.position + 1]}` ); } return this.token(TokenTypes.IDENTIFIER, match[1]); } else if (c === '@') { this.allowWhitespace = false; const match = source.match(/^\@([a-z][a-zA-Z0-9_-]*)/); if (match === null) { return this.error( `Unexpected character ${this.source[this.position + 1]}` ); } return this.token(TokenTypes.FUNCTION_NAME, match[1]); } else if (c.match(/^\s/)) { if (c === '\n') this.line += 1; return this.token(TokenTypes.WHITESPACE, c); } else if (c === ';') { while (this.currentChar() !== '\n') { this.position++; } return this.token(TokenTypes.COMMENT, ''); } else { let literal = ''; const endPattern = this.allowWhitespace ? /^[\(\)\n\,]/ : /^[\(\)\n\, ]/; while ( this.position < this.source.length && !this.source.slice(this.position).match(/^ [\:\$][a-z]/) && !this.source.slice(this.position).match(endPattern) && !(this.currentChar() === ' ' && this.nextChar() === '(') ) { literal += this.currentChar(); this.position += 1; } return this.token(TokenTypes.LITERAL, literal); } } private token(type: TokenTypes, value: string): Token { return new Token(type, value, this.line, this.module); } private error(message: string): LexerError { return new LexerError(this.line, message, this.module); } private currentChar(): string { return this.source[this.position]; } private nextChar(): string { return this.source[this.position + 1]; } private hasSigil(token: Token): boolean { return [ TokenTypes.IDENTIFIER, TokenTypes.PROPERTY, TokenTypes.FUNCTION_NAME, ].includes(token.type); } }