A toy dynamic programming language written in Ruby
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

lexer.rb 5.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. class Lexer
  2. def initialize(source)
  3. @source = source
  4. @position = 0
  5. end
  6. def get_token
  7. skip_whitespace
  8. skip_comment
  9. return Token.new(TokenKinds::EOF) if at_end
  10. source = @source.slice(@position..-1)
  11. if source.match(/\Anull/)
  12. @position += 4
  13. Token.new(TokenKinds::NULL)
  14. elsif source.match(/\Atrue/)
  15. @position += 4
  16. Token.new(TokenKinds::BOOLEAN, true)
  17. elsif source.match(/\Afalse/)
  18. @position += 5
  19. Token.new(TokenKinds::BOOLEAN, false)
  20. elsif source.match(/\Aif/)
  21. @position += 2
  22. Token.new(TokenKinds::IF)
  23. elsif source.match(/\Aelseif/)
  24. @position += 6
  25. Token.new(TokenKinds::ELSEIF)
  26. elsif source.match(/\Aelse/)
  27. @position += 4
  28. Token.new(TokenKinds::ELSE)
  29. elsif source.match(/\A\d+(\.\d+)?/)
  30. number = source.match(/\A\d+(\.\d+)?/)[0]
  31. @position += number.size
  32. Token.new(TokenKinds::NUMBER, number.to_f)
  33. elsif source[0] == '"'
  34. @position += 1
  35. string = String.new
  36. while !at_end && @source[@position] != '"'
  37. if @source[@position] == '\\'
  38. if @source[@position + 1] == '"'
  39. @position += 1
  40. elsif @source[@position + 1] == 'n'
  41. @position += 2
  42. string << "\n"
  43. next
  44. elsif @source[@position + 1] == 't'
  45. @position += 2
  46. string << "\t"
  47. next
  48. end
  49. end
  50. string << @source[@position]
  51. @position += 1
  52. end
  53. if at_end
  54. raise 'Unterminated string'
  55. elsif @source[@position] == '"'
  56. @position += 1
  57. end
  58. Token.new(TokenKinds::STRING, string)
  59. elsif source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)
  60. atom = source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)[1]
  61. @position += atom.size + 1
  62. Token.new(TokenKinds::ATOM, atom.to_sym)
  63. elsif source.match(/\A\+/)
  64. @position += 1
  65. Token.new(TokenKinds::OPERATOR, :+)
  66. elsif source.match(/\A\-/)
  67. @position += 1
  68. Token.new(TokenKinds::OPERATOR, :-)
  69. elsif source.match(/\A\*/)
  70. @position += 1
  71. Token.new(TokenKinds::OPERATOR, :*)
  72. elsif source.match(%r{\A\/})
  73. @position += 1
  74. Token.new(TokenKinds::OPERATOR, :/)
  75. elsif source.match(/\A\{/)
  76. @position += 1
  77. Token.new(TokenKinds::LBRACE)
  78. elsif source.match(/\A\}/)
  79. @position += 1
  80. Token.new(TokenKinds::RBRACE)
  81. elsif source.match(/\A\(/)
  82. @position += 1
  83. Token.new(TokenKinds::LPAREN)
  84. elsif source.match(/\A\)/)
  85. @position += 1
  86. Token.new(TokenKinds::RPAREN)
  87. elsif source.match(/\A\[/)
  88. @position += 1
  89. Token.new(TokenKinds::LBRACKET)
  90. elsif source.match(/\A\]/)
  91. @position += 1
  92. Token.new(TokenKinds::RBRACKET)
  93. elsif source.match(/\A\;/)
  94. @position += 1
  95. Token.new(TokenKinds::SEMICOLON)
  96. elsif source.match(/\A,/)
  97. @position += 1
  98. Token.new(TokenKinds::COMMA)
  99. elsif source.match(/\A\./)
  100. @position += 1
  101. Token.new(TokenKinds::DOT)
  102. elsif source.match(/\A=>/)
  103. @position += 2
  104. Token.new(TokenKinds::ROCKET)
  105. elsif source.match(/\A!/)
  106. @position += 1
  107. Token.new(TokenKinds::OPERATOR, :!)
  108. elsif source.match(/\A==/)
  109. @position += 2
  110. Token.new(TokenKinds::OPERATOR, :==)
  111. elsif source.match(/\A\<=/)
  112. @position += 2
  113. Token.new(TokenKinds::OPERATOR, :<=)
  114. elsif source.match(/\A\>\=/)
  115. @position += 2
  116. Token.new(TokenKinds::OPERATOR, :>=)
  117. elsif source.match(/\A\</)
  118. @position += 1
  119. Token.new(TokenKinds::OPERATOR, :<)
  120. elsif source.match(/\A\>/)
  121. @position += 1
  122. Token.new(TokenKinds::OPERATOR, :>)
  123. elsif source.match(/\A\#/)
  124. @position += 1
  125. Token.new(TokenKinds::OPERATOR, :'#')
  126. elsif source.match(/\Aand/)
  127. @position += 3
  128. Token.new(TokenKinds::OPERATOR, :and)
  129. elsif source.match(/\Aor/)
  130. @position += 2
  131. Token.new(TokenKinds::OPERATOR, :or)
  132. elsif source.match(/\Anot/)
  133. @position += 3
  134. Token.new(TokenKinds::OPERATOR, :not)
  135. elsif source.match(/\A\=/)
  136. @position += 1
  137. Token.new(TokenKinds::EQUALS)
  138. elsif source.match(/\Alet/)
  139. @position += 3
  140. Token.new(TokenKinds::LET)
  141. elsif source.match(/\Afunction/)
  142. @position += 8
  143. Token.new(TokenKinds::FUNCTION)
  144. elsif source.match(/\Aclass/)
  145. @position += 5
  146. Token.new(TokenKinds::CLASS)
  147. elsif source.match(/\Apublic/)
  148. @position += 6
  149. Token.new(TokenKinds::PUBLIC)
  150. elsif source.match(/\Aprivate/)
  151. @position += 7
  152. Token.new(TokenKinds::PRIVATE)
  153. elsif source.match(/\Afor/)
  154. @position += 3
  155. Token.new(TokenKinds::FOR)
  156. elsif source.match(/\Ain/)
  157. @position += 2
  158. Token.new(TokenKinds::IN)
  159. elsif source.match(/\A[a-z][a-zA-Z0-9_]*/)
  160. identifier = source.match(/\A[a-z][a-zA-Z0-9_]*/)[0]
  161. @position += identifier.size
  162. Token.new(TokenKinds::IDENTIFIER, identifier)
  163. elsif source.match(/\A[A-Z][a-zA-Z0-9_]*/)
  164. class_name = source.match(/\A[A-Z][a-zA-Z0-9_]*/)[0]
  165. @position += class_name.size
  166. Token.new(TokenKinds::CLASS_NAME, class_name)
  167. else
  168. raise "Unrecognized character #{source[0]}"
  169. end
  170. end
  171. def peek
  172. position = @position
  173. token = get_token
  174. @position = position
  175. token
  176. end
  177. def scan_all
  178. tokens = Array.new
  179. tokens << get_token until at_end
  180. tokens << Token.new(TokenKinds::EOF)
  181. end
  182. private
  183. def skip_whitespace
  184. @position += 1 while !at_end && @source[@position].match(/\A\s/)
  185. end
  186. def skip_comment
  187. if @source.slice(@position..@position + 1) == '//'
  188. @position += 2
  189. @position += 1 until @source[@position] == "\n" || at_end
  190. @position += 1 unless at_end
  191. end
  192. end
  193. def at_end
  194. @position == @source.size
  195. end
  196. end