A toy dynamic programming language written in Ruby
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

lexer.rb 5.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. class Lexer
  2. def initialize(source)
  3. @source = source
  4. @position = 0
  5. end
  6. def get_token
  7. skip_whitespace
  8. skip_comment
  9. return Token.new(TokenKinds::EOF) if at_end
  10. source = @source.slice(@position..-1)
  11. if source.match(/\Anull/)
  12. @position += 4
  13. Token.new(TokenKinds::NULL)
  14. elsif source.match(/\Atrue/)
  15. @position += 4
  16. Token.new(TokenKinds::BOOLEAN, true)
  17. elsif source.match(/\Afalse/)
  18. @position += 5
  19. Token.new(TokenKinds::BOOLEAN, false)
  20. elsif source.match(/\Aif/)
  21. @position += 2
  22. Token.new(TokenKinds::IF)
  23. elsif source.match(/\Aelseif/)
  24. @position += 6
  25. Token.new(TokenKinds::ELSEIF)
  26. elsif source.match(/\Aelse/)
  27. @position += 4
  28. Token.new(TokenKinds::ELSE)
  29. elsif source.match(/\A\d+(\.\d+)?/)
  30. number = source.match(/\A\d+(\.\d+)?/)[0]
  31. @position += number.size
  32. Token.new(TokenKinds::NUMBER, number.to_f)
  33. elsif source[0] == '"'
  34. @position += 1
  35. string = String.new
  36. while !at_end && @source[@position] != '"'
  37. if @source[@position] == '\\'
  38. if @source[@position + 1] == '"'
  39. @position += 1
  40. elsif @source[@position + 1] == 'n'
  41. @position += 2
  42. string << "\n"
  43. next
  44. elsif @source[@position + 1] == 't'
  45. @position += 2
  46. string << "\t"
  47. next
  48. end
  49. end
  50. string << @source[@position]
  51. @position += 1
  52. end
  53. if at_end
  54. raise 'Unterminated string'
  55. elsif @source[@position] == '"'
  56. @position += 1
  57. end
  58. Token.new(TokenKinds::STRING, string)
  59. elsif source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)
  60. atom = source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)[1]
  61. @position += atom.size + 1
  62. Token.new(TokenKinds::ATOM, atom.to_sym)
  63. elsif source.match(/\A\+/)
  64. @position += 1
  65. Token.new(TokenKinds::OPERATOR, :+)
  66. elsif source.match(/\A\-/)
  67. @position += 1
  68. Token.new(TokenKinds::OPERATOR, :-)
  69. elsif source.match(/\A\*/)
  70. @position += 1
  71. Token.new(TokenKinds::OPERATOR, :*)
  72. elsif source.match(%r{\A\/})
  73. @position += 1
  74. Token.new(TokenKinds::OPERATOR, :/)
  75. elsif source.match(/\A\{/)
  76. @position += 1
  77. Token.new(TokenKinds::LBRACE)
  78. elsif source.match(/\A\}/)
  79. @position += 1
  80. Token.new(TokenKinds::RBRACE)
  81. elsif source.match(/\A\(/)
  82. @position += 1
  83. Token.new(TokenKinds::LPAREN)
  84. elsif source.match(/\A\)/)
  85. @position += 1
  86. Token.new(TokenKinds::RPAREN)
  87. elsif source.match(/\A\[/)
  88. @position += 1
  89. Token.new(TokenKinds::LBRACKET)
  90. elsif source.match(/\A\]/)
  91. @position += 1
  92. Token.new(TokenKinds::RBRACKET)
  93. elsif source.match(/\A\;/)
  94. @position += 1
  95. Token.new(TokenKinds::SEMICOLON)
  96. elsif source.match(/\A,/)
  97. @position += 1
  98. Token.new(TokenKinds::COMMA)
  99. elsif source.match(/\A\./)
  100. @position += 1
  101. Token.new(TokenKinds::DOT)
  102. elsif source.match(/\A=>/)
  103. @position += 2
  104. Token.new(TokenKinds::ROCKET)
  105. elsif source.match(/\A!/)
  106. @position += 1
  107. Token.new(TokenKinds::OPERATOR, :!)
  108. elsif source.match(/\A==/)
  109. @position += 2
  110. Token.new(TokenKinds::OPERATOR, :==)
  111. elsif source.match(/\A\<=/)
  112. @position += 2
  113. Token.new(TokenKinds::OPERATOR, :<=)
  114. elsif source.match(/\A\>\=/)
  115. @position += 2
  116. Token.new(TokenKinds::OPERATOR, :>=)
  117. elsif source.match(/\A\</)
  118. @position += 1
  119. Token.new(TokenKinds::OPERATOR, :<)
  120. elsif source.match(/\A\>/)
  121. @position += 1
  122. Token.new(TokenKinds::OPERATOR, :>)
  123. elsif source.match(/\Aand/)
  124. @position += 3
  125. Token.new(TokenKinds::OPERATOR, :and)
  126. elsif source.match(/\Aor/)
  127. @position += 2
  128. Token.new(TokenKinds::OPERATOR, :or)
  129. elsif source.match(/\Anot/)
  130. @position += 3
  131. Token.new(TokenKinds::OPERATOR, :not)
  132. elsif source.match(/\A\=/)
  133. @position += 1
  134. Token.new(TokenKinds::EQUALS)
  135. elsif source.match(/\Alet/)
  136. @position += 3
  137. Token.new(TokenKinds::LET)
  138. elsif source.match(/\Afunction/)
  139. @position += 8
  140. Token.new(TokenKinds::FUNCTION)
  141. elsif source.match(/\Aclass/)
  142. @position += 5
  143. Token.new(TokenKinds::CLASS)
  144. elsif source.match(/\Apublic/)
  145. @position += 6
  146. Token.new(TokenKinds::PUBLIC)
  147. elsif source.match(/\Aprivate/)
  148. @position += 7
  149. Token.new(TokenKinds::PRIVATE)
  150. elsif source.match(/\Afor/)
  151. @position += 3
  152. Token.new(TokenKinds::FOR)
  153. elsif source.match(/\Ain/)
  154. @position += 2
  155. Token.new(TokenKinds::IN)
  156. elsif source.match(/\A[a-z][a-zA-Z0-9_]*/)
  157. identifier = source.match(/\A[a-z][a-zA-Z0-9_]*/)[0]
  158. @position += identifier.size
  159. Token.new(TokenKinds::IDENTIFIER, identifier)
  160. elsif source.match(/\A[A-Z][a-zA-Z0-9_]*/)
  161. class_name = source.match(/\A[A-Z][a-zA-Z0-9_]*/)[0]
  162. @position += class_name.size
  163. Token.new(TokenKinds::CLASS_NAME, class_name)
  164. else
  165. raise "Unrecognized character #{source[0]}"
  166. end
  167. end
  168. def peek
  169. position = @position
  170. token = get_token
  171. @position = position
  172. token
  173. end
  174. def scan_all
  175. tokens = Array.new
  176. tokens << get_token until at_end
  177. tokens << Token.new(TokenKinds::EOF)
  178. end
  179. private
  180. def skip_whitespace
  181. @position += 1 while !at_end && @source[@position].match(/\A\s/)
  182. end
  183. def skip_comment
  184. if @source.slice(@position..@position + 1) == '//'
  185. @position += 2
  186. @position += 1 until @source[@position] == "\n" || at_end
  187. @position += 1 unless at_end
  188. end
  189. end
  190. def at_end
  191. @position == @source.size
  192. end
  193. end