module Chervil class Lexer def initialize(source) @source = source @position = 0 end def current_char @source[@position] end def advance(step = 1) @position += step end def get_next_token while @source.slice(@position..-1).match(/^\s/) advance end case current_char when nil Token.new(:eof, "eof") when '(' advance Token.new(:lparen, "(") when ')' advance Token.new(:rparen, ")") when '#' advance if current_char == 't' advance Token.new(:boolean, true) elsif current_char == 'f' advance Token.new(:boolean, false) else raise "Unexpected character #{current_char}" end when '\'' advance Token.new(:quote, '\'') when '"' delimiter = current_char advance string = String.new until current_char == delimiter || current_char.nil? string << current_char advance end if current_char.nil? return Error.new("Unterminated string") end advance Token.new(:string, string) else source = @source.slice(@position..-1) if match = source.match(/^[0-9]+(\.[0-9]+)?/) advance(match[0].size) Token.new(:number, match[0]) elsif match = source.match(/^[a-z!$%&*\/:<=>?~_^+\-][a-z0-9@!$%&*\/:<=>?~_^+\-]*/) advance(match[0].size) Token.new(:identifier, match[0]) else raise "Unrecognized character #{current_char}" end end end def tokenize tokens = Array.new loop do token = get_next_token tokens << token unless token.nil? break if token.type == :eof end tokens end end end