A toy dynamic programming language written in Ruby
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

lexer.rb 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. class Lexer
  2. def initialize(source)
  3. @source = source
  4. @position = 0
  5. end
  6. def get_token
  7. skip_whitespace
  8. skip_comment
  9. return Token.new(TokenKinds::EOF) if at_end
  10. source = @source.slice(@position..-1)
  11. if source.match(/\Anull/)
  12. @position += 4
  13. Token.new(TokenKinds::NULL)
  14. elsif source.match(/\Atrue/)
  15. @position += 4
  16. Token.new(TokenKinds::BOOLEAN, true)
  17. elsif source.match(/\Afalse/)
  18. @position += 5
  19. Token.new(TokenKinds::BOOLEAN, false)
  20. elsif source.match(/\Aif/)
  21. @position += 2
  22. Token.new(TokenKinds::IF)
  23. elsif source.match(/\Aelseif/)
  24. @position += 6
  25. Token.new(TokenKinds::ELSEIF)
  26. elsif source.match(/\Aelse/)
  27. @position += 4
  28. Token.new(TokenKinds::ELSE)
  29. elsif source.match(/\A\d+(\.\d+)?/)
  30. number = source.match(/\A\d+(\.\d+)?/)[0]
  31. @position += number.size
  32. Token.new(TokenKinds::NUMBER, number.to_f)
  33. elsif source[0] == '"'
  34. @position += 1
  35. string = String.new
  36. while !at_end && @source[@position] != '"'
  37. if @source[@position] == '\\'
  38. if @source[@position + 1] == '"'
  39. @position += 1
  40. elsif @source[@position + 1] == 'n'
  41. @position += 2
  42. string << "\n"
  43. next
  44. elsif @source[@position + 1] == 't'
  45. @position += 2
  46. string << "\t"
  47. next
  48. end
  49. end
  50. string << @source[@position]
  51. @position += 1
  52. end
  53. if at_end
  54. raise 'Unterminated string'
  55. elsif @source[@position] == '"'
  56. @position += 1
  57. end
  58. Token.new(TokenKinds::STRING, string)
  59. elsif source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)
  60. atom = source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)[1]
  61. @position += atom.size + 1
  62. Token.new(TokenKinds::ATOM, atom.to_sym)
  63. elsif source.match(/\A\+/)
  64. @position += 1
  65. Token.new(TokenKinds::OPERATOR, :+)
  66. elsif source.match(/\A\-/)
  67. @position += 1
  68. Token.new(TokenKinds::OPERATOR, :-)
  69. elsif source.match(/\A\*/)
  70. @position += 1
  71. Token.new(TokenKinds::OPERATOR, :*)
  72. elsif source.match(%r{\A\/})
  73. @position += 1
  74. Token.new(TokenKinds::OPERATOR, :/)
  75. elsif source.match(/\A\{/)
  76. @position += 1
  77. Token.new(TokenKinds::LBRACE)
  78. elsif source.match(/\A\}/)
  79. @position += 1
  80. Token.new(TokenKinds::RBRACE)
  81. elsif source.match(/\A\(/)
  82. @position += 1
  83. Token.new(TokenKinds::LPAREN)
  84. elsif source.match(/\A\)/)
  85. @position += 1
  86. Token.new(TokenKinds::RPAREN)
  87. elsif source.match(/\A\[/)
  88. @position += 1
  89. Token.new(TokenKinds::LBRACKET)
  90. elsif source.match(/\A\]/)
  91. @position += 1
  92. Token.new(TokenKinds::RBRACKET)
  93. elsif source.match(/\A\;/)
  94. @position += 1
  95. Token.new(TokenKinds::SEMICOLON)
  96. elsif source.match(/\A,/)
  97. @position += 1
  98. Token.new(TokenKinds::COMMA)
  99. elsif source.match(/\A\./)
  100. @position += 1
  101. Token.new(TokenKinds::DOT)
  102. elsif source.match(/\A=>/)
  103. @position += 2
  104. Token.new(TokenKinds::ROCKET)
  105. elsif source.match(/\A!/)
  106. @position += 1
  107. Token.new(TokenKinds::OPERATOR, :!)
  108. elsif source.match(/\A==/)
  109. @position += 2
  110. Token.new(TokenKinds::OPERATOR, :==)
  111. elsif source.match(/\A\<=/)
  112. @position += 2
  113. Token.new(TokenKinds::OPERATOR, :<=)
  114. elsif source.match(/\A\>\=/)
  115. @position += 2
  116. Token.new(TokenKinds::OPERATOR, :>=)
  117. elsif source.match(/\A\</)
  118. @position += 1
  119. Token.new(TokenKinds::OPERATOR, :<)
  120. elsif source.match(/\A\>/)
  121. @position += 1
  122. Token.new(TokenKinds::OPERATOR, :>)
  123. elsif source.match(/\Aand/)
  124. @position += 3
  125. Token.new(TokenKinds::OPERATOR, :and)
  126. elsif source.match(/\Aor/)
  127. @position += 2
  128. Token.new(TokenKinds::OPERATOR, :or)
  129. elsif source.match(/\Anot/)
  130. @position += 3
  131. Token.new(TokenKinds::OPERATOR, :not)
  132. elsif source.match(/\A\=/)
  133. @position += 1
  134. Token.new(TokenKinds::EQUALS)
  135. elsif source.match(/\Alet/)
  136. @position += 3
  137. Token.new(TokenKinds::LET)
  138. elsif source.match(/\Afunction/)
  139. @position += 8
  140. Token.new(TokenKinds::FUNCTION)
  141. elsif source.match(/\Aclass/)
  142. @position += 5
  143. Token.new(TokenKinds::CLASS)
  144. elsif source.match(/\Apublic/)
  145. @position += 6
  146. Token.new(TokenKinds::PUBLIC)
  147. elsif source.match(/\Aprivate/)
  148. @position += 7
  149. Token.new(TokenKinds::PRIVATE)
  150. elsif source.match(/\Afor/)
  151. @position += 3
  152. Token.new(TokenKinds::FOR)
  153. elsif source.match(/\Ain/)
  154. @position += 2
  155. Token.new(TokenKinds::IN)
  156. elsif source.match(/\A[a-z][a-zA-Z0-9_]*/)
  157. identifier = source.match(/\A[a-z][a-zA-Z0-9_]*/)[0]
  158. @position += identifier.size
  159. Token.new(TokenKinds::IDENTIFIER, identifier)
  160. elsif source.match(/\A[A-Z][a-zA-Z0-9_]*/)
  161. class_name = source.match(/\A[A-Z][a-zA-Z0-9_]*/)[0]
  162. @position += class_name.size
  163. Token.new(TokenKinds::CLASS_NAME, class_name)
  164. else
  165. raise "Unrecognized character #{source[0]}"
  166. end
  167. end
  168. def scan_all
  169. tokens = Array.new
  170. until at_end
  171. tokens << get_token
  172. end
  173. tokens << Token.new(TokenKinds::EOF)
  174. end
  175. private
  176. def skip_whitespace
  177. while !at_end && @source[@position].match(/\A\s/)
  178. @position += 1
  179. end
  180. end
  181. def skip_comment
  182. if @source.slice(@position..@position + 1) == '//'
  183. @position += 2
  184. @position += 1 until @source[@position] == "\n" || at_end
  185. @position += 1 unless at_end
  186. end
  187. end
  188. def at_end
  189. @position == @source.size
  190. end
  191. end