123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- class Lexer
- def initialize(source)
- @source = source.split("\n").join(' ')
- @position = 0
- end
-
- def get_token
- return Token.new(TokenKinds::EOF) if at_end
-
- @position += 1 while !at_end && @source.slice(@position).match(/\s/)
-
- source = @source.slice(@position..-1)
- if source.match(/^null/)
- @position += 4
- Token.new(TokenKinds::NULL)
- elsif source.match(/^true/)
- @position += 4
- Token.new(TokenKinds::BOOLEAN, true)
- elsif source.match(/^false/)
- @position += 5
- Token.new(TokenKinds::BOOLEAN, false)
- elsif source.match(/^if/)
- @position += 2
- Token.new(TokenKinds::IF)
- elsif source.match(/^elseif/)
- @position += 6
- Token.new(TokenKinds::ELSEIF)
- elsif source.match(/^else/)
- @position += 4
- Token.new(TokenKinds::ELSE)
- elsif source.match(/^\d+(\.\d+)?/)
- number = source.match(/^\d+(\.\d+)?/)[0]
- @position += number.size
- Token.new(TokenKinds::NUMBER, number.to_f)
- elsif source.match(/^"(.*)"/)
- string = source.match(/^"([^"]*)"/)[1]
- @position += (string.size + 2)
- Token.new(TokenKinds::STRING, string)
- elsif source.match(/^\+/)
- @position += 1
- Token.new(TokenKinds::OPERATOR, :+)
- elsif source.match(/^\-/)
- @position += 1
- Token.new(TokenKinds::OPERATOR, :-)
- elsif source.match(/^\*/)
- @position += 1
- Token.new(TokenKinds::OPERATOR, :*)
- elsif source.match(%r{^\/})
- @position += 1
- Token.new(TokenKinds::OPERATOR, :/)
- elsif source.match(/^\{/)
- @position += 1
- Token.new(TokenKinds::LBRACE)
- elsif source.match(/^\}/)
- @position += 1
- Token.new(TokenKinds::RBRACE)
- elsif source.match(/^\(/)
- @position += 1
- Token.new(TokenKinds::LPAREN)
- elsif source.match(/^\)/)
- @position += 1
- Token.new(TokenKinds::RPAREN)
- elsif source.match(/^\[/)
- @position += 1
- Token.new(TokenKinds::LBRACKET)
- elsif source.match(/^\]/)
- @position += 1
- Token.new(TokenKinds::RBRACKET)
- elsif source.match(/^\;/)
- @position += 1
- Token.new(TokenKinds::SEMICOLON)
- elsif source.match(/^,/)
- @position += 1
- Token.new(TokenKinds::COMMA)
- elsif source.match(/^\./)
- @position += 1
- Token.new(TokenKinds::DOT)
- elsif source.match(/^==/)
- @position += 2
- Token.new(TokenKinds::OPERATOR, :==)
- elsif source.match(/^\<=/)
- @position += 2
- Token.new(TokenKinds::OPERATOR, :<=)
- elsif source.match(/^\>\=/)
- @position += 2
- Token.new(TokenKinds::OPERATOR, :>=)
- elsif source.match(/^\</)
- @position += 1
- Token.new(TokenKinds::OPERATOR, :<)
- elsif source.match(/^\>/)
- @position += 1
- Token.new(TokenKinds::OPERATOR, :>)
- elsif source.match(/^and/)
- @position += 3
- Token.new(TokenKinds::OPERATOR, :and)
- elsif source.match(/^or/)
- @position += 2
- Token.new(TokenKinds::OPERATOR, :or)
- elsif source.match(/^\=/)
- @position += 1
- Token.new(TokenKinds::EQUALS)
- elsif source.match(/^let/)
- @position += 3
- Token.new(TokenKinds::LET)
- elsif source.match(/^function/)
- @position += 8
- Token.new(TokenKinds::FUNCTION)
- elsif source.match(/^class/)
- @position += 5
- Token.new(TokenKinds::CLASS)
- elsif source.match(/^public/)
- @position += 6
- Token.new(TokenKinds::PUBLIC)
- elsif source.match(/^private/)
- @position += 7
- Token.new(TokenKinds::PRIVATE)
- elsif source.match(/^[a-z][a-zA-Z0-9_]*/)
- identifier = source.match(/^[a-z][a-zA-Z0-9_]*/)[0]
- @position += identifier.size
- Token.new(TokenKinds::IDENTIFIER, identifier)
- elsif source.match(/^[A-Z][a-zA-Z0-9_]*/)
- class_name = source.match(/^[A-Z][a-zA-Z0-9_]*/)[0]
- @position += class_name.size
- Token.new(TokenKinds::CLASS_NAME, class_name)
- else
- throw "Unrecognized character #{source[0]}"
- end
- end
-
- def scan_all
- tokens = Array.new
- until at_end
- if @source.slice(@position).match(/\s/)
- @position += 1
- else
- tokens << get_token
- end
- end
- tokens << Token.new(TokenKinds::EOF)
- tokens
- end
-
- private
-
- def at_end
- @position == @source.size
- end
- end
|