A toy dynamic programming language written in Ruby
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.rb 5.5KB


  1. class Lexer
  2. def initialize(source)
  3. @source = source
  4. @position = 0
  5. end
  6. def get_token
  7. skip_whitespace
  8. skip_comment
  9. return Token.new(TokenKinds::EOF) if at_end
  10. source = @source.slice(@position..-1)
  11. if source.match(/\Anull/)
  12. @position += 4
  13. Token.new(TokenKinds::NULL)
  14. elsif source.match(/\Atrue/)
  15. @position += 4
  16. Token.new(TokenKinds::BOOLEAN, true)
  17. elsif source.match(/\Afalse/)
  18. @position += 5
  19. Token.new(TokenKinds::BOOLEAN, false)
  20. elsif source.match(/\Aif/)
  21. @position += 2
  22. Token.new(TokenKinds::IF)
  23. elsif source.match(/\Aelseif/)
  24. @position += 6
  25. Token.new(TokenKinds::ELSEIF)
  26. elsif source.match(/\Aelse/)
  27. @position += 4
  28. Token.new(TokenKinds::ELSE)
  29. elsif source.match(/\A\d+(\.\d+)?/)
  30. number = source.match(/\A\d+(\.\d+)?/)[0]
  31. @position += number.size
  32. Token.new(TokenKinds::NUMBER, number.to_f)
  33. elsif source[0] == '"'
  34. @position += 1
  35. string = String.new
  36. while !at_end && @source[@position] != '"'
  37. if @source[@position] == '\\'
  38. if @source[@position + 1] == '"'
  39. @position += 1
  40. elsif @source[@position + 1] == 'n'
  41. @position += 2
  42. string << "\n"
  43. next
  44. elsif @source[@position + 1] == 't'
  45. @position += 2
  46. string << "\t"
  47. next
  48. end
  49. end
  50. string << @source[@position]
  51. @position += 1
  52. end
  53. if at_end
  54. raise 'Unterminated string'
  55. elsif @source[@position] == '"'
  56. @position += 1
  57. end
  58. Token.new(TokenKinds::STRING, string)
  59. elsif source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)
  60. atom = source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)[1]
  61. @position += atom.size + 1
  62. Token.new(TokenKinds::ATOM, atom.to_sym)
  63. elsif source.match(/\A\+/)
  64. @position += 1
  65. Token.new(TokenKinds::OPERATOR, :+)
  66. elsif source.match(/\A\-/)
  67. @position += 1
  68. Token.new(TokenKinds::OPERATOR, :-)
  69. elsif source.match(/\A\*/)
  70. @position += 1
  71. Token.new(TokenKinds::OPERATOR, :*)
  72. elsif source.match(%r{\A\/})
  73. @position += 1
  74. Token.new(TokenKinds::OPERATOR, :/)
  75. elsif source.match(/\A\{/)
  76. @position += 1
  77. Token.new(TokenKinds::LBRACE)
  78. elsif source.match(/\A\}/)
  79. @position += 1
  80. Token.new(TokenKinds::RBRACE)
  81. elsif source.match(/\A\(/)
  82. @position += 1
  83. Token.new(TokenKinds::LPAREN)
  84. elsif source.match(/\A\)/)
  85. @position += 1
  86. Token.new(TokenKinds::RPAREN)
  87. elsif source.match(/\A\[/)
  88. @position += 1
  89. Token.new(TokenKinds::LBRACKET)
  90. elsif source.match(/\A\]/)
  91. @position += 1
  92. Token.new(TokenKinds::RBRACKET)
  93. elsif source.match(/\A\;/)
  94. @position += 1
  95. Token.new(TokenKinds::SEMICOLON)
  96. elsif source.match(/\A,/)
  97. @position += 1
  98. Token.new(TokenKinds::COMMA)
  99. elsif source.match(/\A\./)
  100. @position += 1
  101. Token.new(TokenKinds::DOT)
  102. elsif source.match(/\A=>/)
  103. @position += 2
  104. Token.new(TokenKinds::ROCKET)
  105. elsif source.match(/\A!/)
  106. @position += 1
  107. Token.new(TokenKinds::OPERATOR, :!)
  108. elsif source.match(/\A==/)
  109. @position += 2
  110. Token.new(TokenKinds::OPERATOR, :==)
  111. elsif source.match(/\A\<=/)
  112. @position += 2
  113. Token.new(TokenKinds::OPERATOR, :<=)
  114. elsif source.match(/\A\>\=/)
  115. @position += 2
  116. Token.new(TokenKinds::OPERATOR, :>=)
  117. elsif source.match(/\A\</)
  118. @position += 1
  119. Token.new(TokenKinds::OPERATOR, :<)
  120. elsif source.match(/\A\>/)
  121. @position += 1
  122. Token.new(TokenKinds::OPERATOR, :>)
  123. elsif source.match(/\Aand/)
  124. @position += 3
  125. Token.new(TokenKinds::OPERATOR, :and)
  126. elsif source.match(/\Aor/)
  127. @position += 2
  128. Token.new(TokenKinds::OPERATOR, :or)
  129. elsif source.match(/\Anot/)
  130. @position += 3
  131. Token.new(TokenKinds::OPERATOR, :not)
  132. elsif source.match(/\A\=/)
  133. @position += 1
  134. Token.new(TokenKinds::EQUALS)
  135. elsif source.match(/\Alet/)
  136. @position += 3
  137. Token.new(TokenKinds::LET)
  138. elsif source.match(/\Afunction/)
  139. @position += 8
  140. Token.new(TokenKinds::FUNCTION)
  141. elsif source.match(/\Aclass/)
  142. @position += 5
  143. Token.new(TokenKinds::CLASS)
  144. elsif source.match(/\Apublic/)
  145. @position += 6
  146. Token.new(TokenKinds::PUBLIC)
  147. elsif source.match(/\Aprivate/)
  148. @position += 7
  149. Token.new(TokenKinds::PRIVATE)
  150. elsif source.match(/\Afor/)
  151. @position += 3
  152. Token.new(TokenKinds::FOR)
  153. elsif source.match(/\Ain/)
  154. @position += 2
  155. Token.new(TokenKinds::IN)
  156. elsif source.match(/\A[a-z][a-zA-Z0-9_]*/)
  157. identifier = source.match(/\A[a-z][a-zA-Z0-9_]*/)[0]
  158. @position += identifier.size
  159. Token.new(TokenKinds::IDENTIFIER, identifier)
  160. elsif source.match(/\A[A-Z][a-zA-Z0-9_]*/)
  161. class_name = source.match(/\A[A-Z][a-zA-Z0-9_]*/)[0]
  162. @position += class_name.size
  163. Token.new(TokenKinds::CLASS_NAME, class_name)
  164. else
  165. raise "Unrecognized character #{source[0]}"
  166. end
  167. end
  168. def scan_all
  169. tokens = Array.new
  170. until at_end
  171. tokens << get_token
  172. end
  173. tokens << Token.new(TokenKinds::EOF)
  174. end
  175. private
  176. def skip_whitespace
  177. while !at_end && @source[@position].match(/\A\s/)
  178. @position += 1
  179. end
  180. end
  181. def skip_comment
  182. if @source.slice(@position..@position + 1) == '//'
  183. @position += 2
  184. @position += 1 until @source[@position] == "\n" || at_end
  185. @position += 1 unless at_end
  186. end
  187. end
  188. def at_end
  189. @position == @source.size
  190. end
  191. end