A toy dynamic programming language written in Ruby
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

lexer.rb 5.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. class Lexer
  2. def initialize(source)
  3. @source = source
  4. @position = 0
  5. end
  6. def get_token
  7. skip_whitespace
  8. skip_comment
  9. return Token.new(TokenKinds::EOF) if at_end
  10. source = @source.slice(@position..-1)
  11. if source.match(/\Anull/)
  12. @position += 4
  13. Token.new(TokenKinds::NULL)
  14. elsif source.match(/\Atrue/)
  15. @position += 4
  16. Token.new(TokenKinds::BOOLEAN, true)
  17. elsif source.match(/\Afalse/)
  18. @position += 5
  19. Token.new(TokenKinds::BOOLEAN, false)
  20. elsif source.match(/\Aif/)
  21. @position += 2
  22. Token.new(TokenKinds::IF)
  23. elsif source.match(/\Aelseif/)
  24. @position += 6
  25. Token.new(TokenKinds::ELSEIF)
  26. elsif source.match(/\Aelse/)
  27. @position += 4
  28. Token.new(TokenKinds::ELSE)
  29. elsif source.match(/\A\d+(\.\d+)?/)
  30. number = source.match(/\A\d+(\.\d+)?/)[0]
  31. @position += number.size
  32. Token.new(TokenKinds::NUMBER, number.to_f)
  33. elsif source[0] == '"'
  34. @position += 1
  35. string = String.new
  36. while !at_end && @source[@position] != '"'
  37. if @source[@position] == '\\'
  38. if @source[@position + 1] == '"'
  39. @position += 1
  40. elsif @source[@position + 1] == 'n'
  41. @position += 2
  42. string << "\n"
  43. next
  44. elsif @source[@position + 1] == 't'
  45. @position += 2
  46. string << "\t"
  47. next
  48. end
  49. end
  50. string << @source[@position]
  51. @position += 1
  52. end
  53. if at_end
  54. raise 'Unterminated string'
  55. elsif @source[@position] == '"'
  56. @position += 1
  57. end
  58. Token.new(TokenKinds::STRING, string)
  59. elsif source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)
  60. atom = source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)[1]
  61. @position += atom.size + 1
  62. Token.new(TokenKinds::ATOM, atom.to_sym)
  63. elsif source.match(/\A\+/)
  64. @position += 1
  65. Token.new(TokenKinds::OPERATOR, :+)
  66. elsif source.match(/\A\-/)
  67. @position += 1
  68. Token.new(TokenKinds::OPERATOR, :-)
  69. elsif source.match(/\A\*/)
  70. @position += 1
  71. Token.new(TokenKinds::OPERATOR, :*)
  72. elsif source.match(%r{\A\/})
  73. @position += 1
  74. Token.new(TokenKinds::OPERATOR, :/)
  75. elsif source.match(/\A\{/)
  76. @position += 1
  77. Token.new(TokenKinds::LBRACE)
  78. elsif source.match(/\A\}/)
  79. @position += 1
  80. Token.new(TokenKinds::RBRACE)
  81. elsif source.match(/\A\(/)
  82. @position += 1
  83. Token.new(TokenKinds::LPAREN)
  84. elsif source.match(/\A\)/)
  85. @position += 1
  86. Token.new(TokenKinds::RPAREN)
  87. elsif source.match(/\A\[/)
  88. @position += 1
  89. Token.new(TokenKinds::LBRACKET)
  90. elsif source.match(/\A\]/)
  91. @position += 1
  92. Token.new(TokenKinds::RBRACKET)
  93. elsif source.match(/\A\;/)
  94. @position += 1
  95. Token.new(TokenKinds::SEMICOLON)
  96. elsif source.match(/\A,/)
  97. @position += 1
  98. Token.new(TokenKinds::COMMA)
  99. elsif source.match(/\A\./)
  100. @position += 1
  101. Token.new(TokenKinds::DOT)
  102. elsif source.match(/\A=>/)
  103. @position += 2
  104. Token.new(TokenKinds::ROCKET)
  105. elsif source.match(/\A\#/)
  106. @position += 1
  107. Token.new(TokenKinds::HASH)
  108. elsif source.match(/\A!/)
  109. @position += 1
  110. Token.new(TokenKinds::OPERATOR, :!)
  111. elsif source.match(/\A==/)
  112. @position += 2
  113. Token.new(TokenKinds::OPERATOR, :==)
  114. elsif source.match(/\A\<=/)
  115. @position += 2
  116. Token.new(TokenKinds::OPERATOR, :<=)
  117. elsif source.match(/\A\>\=/)
  118. @position += 2
  119. Token.new(TokenKinds::OPERATOR, :>=)
  120. elsif source.match(/\A\</)
  121. @position += 1
  122. Token.new(TokenKinds::OPERATOR, :<)
  123. elsif source.match(/\A\>/)
  124. @position += 1
  125. Token.new(TokenKinds::OPERATOR, :>)
  126. elsif source.match(/\Aand/)
  127. @position += 3
  128. Token.new(TokenKinds::OPERATOR, :and)
  129. elsif source.match(/\Aor/)
  130. @position += 2
  131. Token.new(TokenKinds::OPERATOR, :or)
  132. elsif source.match(/\Anot/)
  133. @position += 3
  134. Token.new(TokenKinds::OPERATOR, :not)
  135. elsif source.match(/\A\=/)
  136. @position += 1
  137. Token.new(TokenKinds::EQUALS)
  138. elsif source.match(/\Alet/)
  139. @position += 3
  140. Token.new(TokenKinds::LET)
  141. elsif source.match(/\Afunction/)
  142. @position += 8
  143. Token.new(TokenKinds::FUNCTION)
  144. elsif source.match(/\Amethod/)
  145. @position += 6
  146. Token.new(TokenKinds::METHOD)
  147. elsif source.match(/\Aclassmethod/)
  148. @position += 11
  149. Token.new(TokenKinds::CLASSMETHOD)
  150. elsif source.match(/\Aclass/)
  151. @position += 5
  152. Token.new(TokenKinds::CLASS)
  153. elsif source.match(/\Apublic/)
  154. @position += 6
  155. Token.new(TokenKinds::PUBLIC)
  156. elsif source.match(/\Aprivate/)
  157. @position += 7
  158. Token.new(TokenKinds::PRIVATE)
  159. elsif source.match(/\Afor/)
  160. @position += 3
  161. Token.new(TokenKinds::FOR)
  162. elsif source.match(/\Ain/)
  163. @position += 2
  164. Token.new(TokenKinds::IN)
  165. elsif source.match(/\A[a-z][a-zA-Z0-9_]*/)
  166. identifier = source.match(/\A[a-z][a-zA-Z0-9_]*/)[0]
  167. @position += identifier.size
  168. Token.new(TokenKinds::IDENTIFIER, identifier)
  169. elsif source.match(/\A[A-Z][a-zA-Z0-9_]*/)
  170. class_name = source.match(/\A[A-Z][a-zA-Z0-9_]*/)[0]
  171. @position += class_name.size
  172. Token.new(TokenKinds::CLASS_NAME, class_name)
  173. else
  174. raise "Unrecognized character #{source[0]}"
  175. end
  176. end
  177. def peek
  178. position = @position
  179. token = get_token
  180. @position = position
  181. token
  182. end
  183. def scan_all
  184. tokens = Array.new
  185. tokens << get_token until at_end
  186. tokens << Token.new(TokenKinds::EOF)
  187. end
  188. private
  189. def skip_whitespace
  190. @position += 1 while !at_end && @source[@position].match(/\A\s/)
  191. end
  192. def skip_comment
  193. if @source.slice(@position..@position + 1) == '//'
  194. @position += 2
  195. @position += 1 until @source[@position] == "\n" || at_end
  196. @position += 1 unless at_end
  197. end
  198. end
  199. def at_end
  200. @position == @source.size
  201. end
  202. end