class Lexer def initialize(source) @source = source @position = 0 end def get_token skip_whitespace skip_comment return Token.new(TokenKinds::EOF) if at_end source = @source.slice(@position..-1) if source.match(/\Anull/) @position += 4 Token.new(TokenKinds::NULL) elsif source.match(/\Atrue/) @position += 4 Token.new(TokenKinds::BOOLEAN, true) elsif source.match(/\Afalse/) @position += 5 Token.new(TokenKinds::BOOLEAN, false) elsif source.match(/\Aif/) @position += 2 Token.new(TokenKinds::IF) elsif source.match(/\Aelseif/) @position += 6 Token.new(TokenKinds::ELSEIF) elsif source.match(/\Aelse/) @position += 4 Token.new(TokenKinds::ELSE) elsif source.match(/\A\d+(\.\d+)?/) number = source.match(/\A\d+(\.\d+)?/)[0] @position += number.size Token.new(TokenKinds::NUMBER, number.to_f) elsif source[0] == '"' @position += 1 string = String.new while !at_end && @source[@position] != '"' if @source[@position] == '\\' if @source[@position + 1] == '"' @position += 1 elsif @source[@position + 1] == 'n' @position += 2 string << "\n" next elsif @source[@position + 1] == 't' @position += 2 string << "\t" next end end string << @source[@position] @position += 1 end if at_end raise 'Unterminated string' elsif @source[@position] == '"' @position += 1 end Token.new(TokenKinds::STRING, string) elsif source.match(/\A\:([a-z][a-zA-Z0-9_]*)/) atom = source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)[1] @position += atom.size + 1 Token.new(TokenKinds::ATOM, atom.to_sym) elsif source.match(/\A\+/) @position += 1 Token.new(TokenKinds::OPERATOR, :+) elsif source.match(/\A\-/) @position += 1 Token.new(TokenKinds::OPERATOR, :-) elsif source.match(/\A\*/) @position += 1 Token.new(TokenKinds::OPERATOR, :*) elsif source.match(%r{\A\/}) @position += 1 Token.new(TokenKinds::OPERATOR, :/) elsif source.match(/\A\{/) @position += 1 Token.new(TokenKinds::LBRACE) elsif source.match(/\A\}/) @position += 1 Token.new(TokenKinds::RBRACE) elsif source.match(/\A\(/) @position += 1 Token.new(TokenKinds::LPAREN) elsif source.match(/\A\)/) @position += 1 Token.new(TokenKinds::RPAREN) elsif source.match(/\A\[/) @position += 1 Token.new(TokenKinds::LBRACKET) elsif source.match(/\A\]/) @position += 1 Token.new(TokenKinds::RBRACKET) elsif source.match(/\A\;/) @position += 1 Token.new(TokenKinds::SEMICOLON) elsif source.match(/\A,/) @position += 1 Token.new(TokenKinds::COMMA) elsif source.match(/\A\./) @position += 1 Token.new(TokenKinds::DOT) elsif source.match(/\A=>/) @position += 2 Token.new(TokenKinds::ROCKET) elsif source.match(/\A\#/) @position += 1 Token.new(TokenKinds::HASH) elsif source.match(/\A!/) @position += 1 Token.new(TokenKinds::OPERATOR, :!) elsif source.match(/\A==/) @position += 2 Token.new(TokenKinds::OPERATOR, :==) elsif source.match(/\A\<=/) @position += 2 Token.new(TokenKinds::OPERATOR, :<=) elsif source.match(/\A\>\=/) @position += 2 Token.new(TokenKinds::OPERATOR, :>=) elsif source.match(/\A\/) @position += 1 Token.new(TokenKinds::OPERATOR, :>) elsif source.match(/\Aand/) @position += 3 Token.new(TokenKinds::OPERATOR, :and) elsif source.match(/\Aor/) @position += 2 Token.new(TokenKinds::OPERATOR, :or) elsif source.match(/\Anot/) @position += 3 Token.new(TokenKinds::OPERATOR, :not) elsif source.match(/\A\=/) @position += 1 Token.new(TokenKinds::EQUALS) elsif source.match(/\Alet/) @position += 3 Token.new(TokenKinds::LET) elsif source.match(/\Afunction/) @position += 8 Token.new(TokenKinds::FUNCTION) elsif source.match(/\Amethod/) @position += 6 Token.new(TokenKinds::METHOD) elsif source.match(/\Aclassmethod/) @position += 11 Token.new(TokenKinds::CLASSMETHOD) elsif source.match(/\Aclass/) @position += 5 Token.new(TokenKinds::CLASS) elsif source.match(/\Apublic/) @position += 6 Token.new(TokenKinds::PUBLIC) elsif source.match(/\Aprivate/) @position += 7 Token.new(TokenKinds::PRIVATE) elsif source.match(/\Afor/) @position += 3 Token.new(TokenKinds::FOR) elsif source.match(/\Ain/) @position += 2 Token.new(TokenKinds::IN) elsif source.match(/\A[a-z][a-zA-Z0-9_]*/) identifier = source.match(/\A[a-z][a-zA-Z0-9_]*/)[0] @position += identifier.size Token.new(TokenKinds::IDENTIFIER, identifier) elsif source.match(/\A[A-Z][a-zA-Z0-9_]*/) class_name = source.match(/\A[A-Z][a-zA-Z0-9_]*/)[0] @position += class_name.size Token.new(TokenKinds::CLASS_NAME, class_name) else raise "Unrecognized character #{source[0]}" end end def peek position = @position token = get_token @position = position token end def scan_all tokens = Array.new tokens << get_token until at_end tokens << Token.new(TokenKinds::EOF) end private def skip_whitespace @position += 1 while !at_end && @source[@position].match(/\A\s/) end def skip_comment if @source.slice(@position..@position + 1) == '//' @position += 2 @position += 1 until @source[@position] == "\n" || at_end @position += 1 unless at_end end end def at_end @position == @source.size end end