123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- module Chervil
- class Lexer
- def initialize(source)
- @source = source
- @position = 0
- end
-
- def current_char
- @source[@position]
- end
-
- def advance(step = 1)
- @position += step
- end
-
- def get_next_token
- while @source.slice(@position..-1).match(/^\s/)
- advance
- end
-
- case current_char
- when nil
- Token.new(:eof, "eof")
- when '('
- advance
- Token.new(:lparen, "(")
- when ')'
- advance
- Token.new(:rparen, ")")
- when '#'
- advance
- if current_char == 't'
- advance
- Token.new(:boolean, true)
- elsif current_char == 'f'
- advance
- Token.new(:boolean, false)
- else
- raise "Unexpected character #{current_char}"
- end
- when '"', '\''
- delimiter = current_char
- advance
- string = String.new
- until current_char == delimiter
- string << current_char
- advance
- end
-
- if current_char.nil?
- raise "Unterminated string"
- end
-
- advance
-
- Token.new(:string, string)
- else
- source = @source.slice(@position..-1)
- if match = source.match(/^[0-9]+(\.[0-9]+)?/)
- advance(match[0].size)
- Token.new(:number, match[0])
- elsif match = source.match(/^[a-z!$%&*\/:<=>?~_^+\-][a-z0-9@!$%&*\/:<=>?~_^+\-]*/)
- advance(match[0].size)
- Token.new(:identifier, match[0])
- else
- raise "Unrecognized character #{current_char}"
- end
- end
- end
-
- def tokenize
- tokens = Array.new
- loop do
- token = get_next_token
- tokens << token unless token.nil?
- break if token.type == :eof
- end
- tokens
- end
- end
- end
|