12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- module Chervil
- class Lexer
- def initialize(source)
- @source = source
- @position = 0
- end
-
- def current_char
- @source[@position]
- end
-
- def advance(step = 1)
- @position += step
- end
-
- def get_next_token
- while @source.slice(@position..-1).match(/^\s/)
- advance
- end
-
- case current_char
- when nil
- Token.new(:eof, "eof")
- when '('
- advance
- Token.new(:lparen, "(")
- when ')'
- advance
- Token.new(:rparen, ")")
- when '"', '\''
- delimiter = current_char
- advance
- string = String.new
- until current_char == delimiter
- string << current_char
- advance
- end
-
- if current_char.nil?
- raise "Unterminated string"
- end
-
- advance
-
- Token.new(:string, string)
- else
- source = @source.slice(@position..-1)
- if match = source.match(/^[0-9]+(\.[0-9]+)?/)
- advance(match[0].size)
- Token.new(:number, match[0])
- elsif match = source.match(/^[a-z!$%&*\/:<=>?~_^+\-][a-z0-9@!$%&*\/:<=>?~_^+\-]*/)
- advance(match[0].size)
- Token.new(:identifier, match[0])
- else
- raise "Unrecognized character #{current_char}"
- end
- end
- end
-
- def tokenize
- tokens = Array.new
- loop do
- token = get_next_token
- tokens << token unless token.nil?
- break if token.type == :eof
- end
- tokens
- end
- end
- end
|