A toy dynamic programming language written in Ruby
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

lexer.rb 5.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. class Lexer
  2. def initialize(source)
  3. @source = source
  4. @position = 0
  5. end
  6. def get_token
  7. skip_whitespace
  8. skip_comment
  9. return Token.new(TokenKinds::EOF) if at_end
  10. source = @source.slice(@position..-1)
  11. if source.match(/\Anull/)
  12. @position += 4
  13. Token.new(TokenKinds::NULL)
  14. elsif source.match(/\Atrue/)
  15. @position += 4
  16. Token.new(TokenKinds::BOOLEAN, true)
  17. elsif source.match(/\Afalse/)
  18. @position += 5
  19. Token.new(TokenKinds::BOOLEAN, false)
  20. elsif source.match(/\Aif/)
  21. @position += 2
  22. Token.new(TokenKinds::IF)
  23. elsif source.match(/\Aelseif/)
  24. @position += 6
  25. Token.new(TokenKinds::ELSEIF)
  26. elsif source.match(/\Aelse/)
  27. @position += 4
  28. Token.new(TokenKinds::ELSE)
  29. elsif source.match(/\A\d+(\.\d+)?/)
  30. number = source.match(/\A\d+(\.\d+)?/)[0]
  31. @position += number.size
  32. Token.new(TokenKinds::NUMBER, number.to_f)
  33. elsif source[0] == '"'
  34. @position += 1
  35. string = String.new
  36. while !at_end && @source[@position] != '"'
  37. if @source[@position] == '\\'
  38. if @source[@position + 1] == '"'
  39. @position += 1
  40. elsif @source[@position + 1] == 'n'
  41. @position += 2
  42. string << "\n"
  43. next
  44. elsif @source[@position + 1] == 't'
  45. @position += 2
  46. string << "\t"
  47. next
  48. end
  49. end
  50. string << @source[@position]
  51. @position += 1
  52. end
  53. if at_end
  54. raise 'Unterminated string'
  55. elsif @source[@position] == '"'
  56. @position += 1
  57. end
  58. Token.new(TokenKinds::STRING, string)
  59. elsif source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)
  60. atom = source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)[1]
  61. @position += atom.size + 1
  62. Token.new(TokenKinds::ATOM, atom.to_sym)
  63. elsif source.match(/\A\+/)
  64. @position += 1
  65. Token.new(TokenKinds::OPERATOR, :+)
  66. elsif source.match(/\A\-/)
  67. @position += 1
  68. Token.new(TokenKinds::OPERATOR, :-)
  69. elsif source.match(/\A\*/)
  70. @position += 1
  71. Token.new(TokenKinds::OPERATOR, :*)
  72. elsif source.match(%r{\A\/})
  73. @position += 1
  74. Token.new(TokenKinds::OPERATOR, :/)
  75. elsif source.match(/\A\{/)
  76. @position += 1
  77. Token.new(TokenKinds::LBRACE)
  78. elsif source.match(/\A\}/)
  79. @position += 1
  80. Token.new(TokenKinds::RBRACE)
  81. elsif source.match(/\A\(/)
  82. @position += 1
  83. Token.new(TokenKinds::LPAREN)
  84. elsif source.match(/\A\)/)
  85. @position += 1
  86. Token.new(TokenKinds::RPAREN)
  87. elsif source.match(/\A\[/)
  88. @position += 1
  89. Token.new(TokenKinds::LBRACKET)
  90. elsif source.match(/\A\]/)
  91. @position += 1
  92. Token.new(TokenKinds::RBRACKET)
  93. elsif source.match(/\A\;/)
  94. @position += 1
  95. Token.new(TokenKinds::SEMICOLON)
  96. elsif source.match(/\A,/)
  97. @position += 1
  98. Token.new(TokenKinds::COMMA)
  99. elsif source.match(/\A\./)
  100. @position += 1
  101. Token.new(TokenKinds::DOT)
  102. elsif source.match(/\A=>/)
  103. @position += 2
  104. Token.new(TokenKinds::ROCKET)
  105. elsif source.match(/\A!/)
  106. @position += 1
  107. Token.new(TokenKinds::OPERATOR, :!)
  108. elsif source.match(/\A==/)
  109. @position += 2
  110. Token.new(TokenKinds::OPERATOR, :==)
  111. elsif source.match(/\A\<=/)
  112. @position += 2
  113. Token.new(TokenKinds::OPERATOR, :<=)
  114. elsif source.match(/\A\>\=/)
  115. @position += 2
  116. Token.new(TokenKinds::OPERATOR, :>=)
  117. elsif source.match(/\A\</)
  118. @position += 1
  119. Token.new(TokenKinds::OPERATOR, :<)
  120. elsif source.match(/\A\>/)
  121. @position += 1
  122. Token.new(TokenKinds::OPERATOR, :>)
  123. elsif source.match(/\Aand/)
  124. @position += 3
  125. Token.new(TokenKinds::OPERATOR, :and)
  126. elsif source.match(/\Aor/)
  127. @position += 2
  128. Token.new(TokenKinds::OPERATOR, :or)
  129. elsif source.match(/\Anot/)
  130. @position += 3
  131. Token.new(TokenKinds::OPERATOR, :not)
  132. elsif source.match(/\A\=/)
  133. @position += 1
  134. Token.new(TokenKinds::EQUALS)
  135. elsif source.match(/\Alet/)
  136. @position += 3
  137. Token.new(TokenKinds::LET)
  138. elsif source.match(/\Afunction/)
  139. @position += 8
  140. Token.new(TokenKinds::FUNCTION)
  141. elsif source.match(/\Aclass/)
  142. @position += 5
  143. Token.new(TokenKinds::CLASS)
  144. elsif source.match(/\Apublic/)
  145. @position += 6
  146. Token.new(TokenKinds::PUBLIC)
  147. elsif source.match(/\Aprivate/)
  148. @position += 7
  149. Token.new(TokenKinds::PRIVATE)
  150. elsif source.match(/\Afor/)
  151. @position += 3
  152. Token.new(TokenKinds::FOR)
  153. elsif source.match(/\Ain/)
  154. @position += 2
  155. Token.new(TokenKinds::IN)
  156. elsif source.match(/\A[a-z][a-zA-Z0-9_]*/)
  157. identifier = source.match(/\A[a-z][a-zA-Z0-9_]*/)[0]
  158. @position += identifier.size
  159. Token.new(TokenKinds::IDENTIFIER, identifier)
  160. elsif source.match(/\A[A-Z][a-zA-Z0-9_]*/)
  161. class_name = source.match(/\A[A-Z][a-zA-Z0-9_]*/)[0]
  162. @position += class_name.size
  163. Token.new(TokenKinds::CLASS_NAME, class_name)
  164. else
  165. raise "Unrecognized character #{source[0]}"
  166. end
  167. end
  168. def peek
  169. position = @position
  170. token = get_token
  171. @position = position
  172. token
  173. end
  174. def scan_all
  175. tokens = Array.new
  176. until at_end
  177. tokens << get_token
  178. end
  179. tokens << Token.new(TokenKinds::EOF)
  180. end
  181. private
  182. def skip_whitespace
  183. while !at_end && @source[@position].match(/\A\s/)
  184. @position += 1
  185. end
  186. end
  187. def skip_comment
  188. if @source.slice(@position..@position + 1) == '//'
  189. @position += 2
  190. @position += 1 until @source[@position] == "\n" || at_end
  191. @position += 1 unless at_end
  192. end
  193. end
  194. def at_end
  195. @position == @source.size
  196. end
  197. end