class Lexer def initialize(source) @source = source.split("\n").join(' ') @position = 0 end def get_token return Token.new(TokenKinds::EOF) if at_end @position += 1 while !at_end && @source.slice(@position).match(/\s/) source = @source.slice(@position..-1) if source.match(/^null/) @position += 4 Token.new(TokenKinds::NULL) elsif source.match(/^true/) @position += 4 Token.new(TokenKinds::BOOLEAN, true) elsif source.match(/^false/) @position += 5 Token.new(TokenKinds::BOOLEAN, false) elsif source.match(/^if/) @position += 2 Token.new(TokenKinds::IF) elsif source.match(/^elseif/) @position += 6 Token.new(TokenKinds::ELSEIF) elsif source.match(/^else/) @position += 4 Token.new(TokenKinds::ELSE) elsif source.match(/^\d+(\.\d+)?/) number = source.match(/^\d+(\.\d+)?/)[0] @position += number.size Token.new(TokenKinds::NUMBER, number.to_f) elsif source.match(/^"(.*)"/) string = source.match(/^"([^"]*)"/)[1] @position += (string.size + 2) Token.new(TokenKinds::STRING, string) elsif source.match(/^\:([a-z][a-zA-Z0-9_]*)/) atom = source.match(/^\:([a-z][a-zA-Z0-9_]*)/)[1] @position += atom.size + 1 Token.new(TokenKinds::ATOM, atom.to_sym) elsif source.match(/^\+/) @position += 1 Token.new(TokenKinds::OPERATOR, :+) elsif source.match(/^\-/) @position += 1 Token.new(TokenKinds::OPERATOR, :-) elsif source.match(/^\*/) @position += 1 Token.new(TokenKinds::OPERATOR, :*) elsif source.match(%r{^\/}) @position += 1 Token.new(TokenKinds::OPERATOR, :/) elsif source.match(/^\{/) @position += 1 Token.new(TokenKinds::LBRACE) elsif source.match(/^\}/) @position += 1 Token.new(TokenKinds::RBRACE) elsif source.match(/^\(/) @position += 1 Token.new(TokenKinds::LPAREN) elsif source.match(/^\)/) @position += 1 Token.new(TokenKinds::RPAREN) elsif source.match(/^\[/) @position += 1 Token.new(TokenKinds::LBRACKET) elsif source.match(/^\]/) @position += 1 Token.new(TokenKinds::RBRACKET) elsif source.match(/^\;/) @position += 1 Token.new(TokenKinds::SEMICOLON) elsif source.match(/^,/) @position += 1 Token.new(TokenKinds::COMMA) elsif source.match(/^\./) @position += 1 Token.new(TokenKinds::DOT) elsif source.match(/^=>/) @position += 2 Token.new(TokenKinds::ROCKET) elsif source.match(/^!/) @position += 1 Token.new(TokenKinds::OPERATOR, :!) elsif source.match(/^==/) @position += 2 Token.new(TokenKinds::OPERATOR, :==) elsif source.match(/^\<=/) @position += 2 Token.new(TokenKinds::OPERATOR, :<=) elsif source.match(/^\>\=/) @position += 2 Token.new(TokenKinds::OPERATOR, :>=) elsif source.match(/^\/) @position += 1 Token.new(TokenKinds::OPERATOR, :>) elsif source.match(/^and/) @position += 3 Token.new(TokenKinds::OPERATOR, :and) elsif source.match(/^or/) @position += 2 Token.new(TokenKinds::OPERATOR, :or) elsif source.match(/^not/) @position += 3 Token.new(TokenKinds::OPERATOR, :not) elsif source.match(/^\=/) @position += 1 Token.new(TokenKinds::EQUALS) elsif source.match(/^let/) @position += 3 Token.new(TokenKinds::LET) elsif source.match(/^function/) @position += 8 Token.new(TokenKinds::FUNCTION) elsif source.match(/^class/) @position += 5 Token.new(TokenKinds::CLASS) elsif source.match(/^public/) @position += 6 Token.new(TokenKinds::PUBLIC) elsif source.match(/^private/) @position += 7 Token.new(TokenKinds::PRIVATE) elsif source.match(/^[a-z][a-zA-Z0-9_]*/) identifier = source.match(/^[a-z][a-zA-Z0-9_]*/)[0] @position += identifier.size Token.new(TokenKinds::IDENTIFIER, identifier) elsif source.match(/^[A-Z][a-zA-Z0-9_]*/) class_name = source.match(/^[A-Z][a-zA-Z0-9_]*/)[0] @position += class_name.size Token.new(TokenKinds::CLASS_NAME, class_name) else raise "Unrecognized character #{source[0]}" end end def scan_all tokens = Array.new until at_end if @source.slice(@position).match(/\s/) @position += 1 else tokens << get_token end end tokens << Token.new(TokenKinds::EOF) tokens end private def at_end @position == @source.size end end