Chervil is a toy Lisp interpreter written in Ruby
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

lexer.rb 1.5KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. module Chervil
  2. class Lexer
  3. def initialize(source)
  4. @source = source
  5. @position = 0
  6. end
  7. def current_char
  8. @source[@position]
  9. end
  10. def advance(step = 1)
  11. @position += step
  12. end
  13. def get_next_token
  14. while @source.slice(@position..-1).match(/^\s/)
  15. advance
  16. end
  17. case current_char
  18. when nil
  19. Token.new(:eof, "eof")
  20. when '('
  21. advance
  22. Token.new(:lparen, "(")
  23. when ')'
  24. advance
  25. Token.new(:rparen, ")")
  26. when '"', '\''
  27. delimiter = current_char
  28. advance
  29. string = String.new
  30. until current_char == delimiter
  31. string << current_char
  32. advance
  33. end
  34. if current_char.nil?
  35. raise "Unterminated string"
  36. end
  37. advance
  38. Token.new(:string, string)
  39. else
  40. source = @source.slice(@position..-1)
  41. if match = source.match(/^[0-9]+(\.[0-9]+)?/)
  42. advance(match[0].size)
  43. Token.new(:number, match[0])
  44. elsif match = source.match(/^[a-z!$%&*\/:<=>?~_^+\-][a-z0-9@!$%&*\/:<=>?~_^+\-]*/)
  45. advance(match[0].size)
  46. Token.new(:identifier, match[0])
  47. else
  48. raise "Unrecognized character #{current_char}"
  49. end
  50. end
  51. end
  52. def tokenize
  53. tokens = Array.new
  54. loop do
  55. token = get_next_token
  56. tokens << token unless token.nil?
  57. break if token.type == :eof
  58. end
  59. tokens
  60. end
  61. end
  62. end