A toy dynamic programming language written in Ruby
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

lexer.rb 5.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. class Lexer
  2. def initialize(source)
  3. @source = source.split("\n").join(' ')
  4. @position = 0
  5. end
  6. def get_token
  7. skip_whitespace
  8. return Token.new(TokenKinds::EOF) if at_end
  9. source = @source.slice(@position..-1)
  10. if source.match(/\Anull/)
  11. @position += 4
  12. Token.new(TokenKinds::NULL)
  13. elsif source.match(/\Atrue/)
  14. @position += 4
  15. Token.new(TokenKinds::BOOLEAN, true)
  16. elsif source.match(/\Afalse/)
  17. @position += 5
  18. Token.new(TokenKinds::BOOLEAN, false)
  19. elsif source.match(/\Aif/)
  20. @position += 2
  21. Token.new(TokenKinds::IF)
  22. elsif source.match(/\Aelseif/)
  23. @position += 6
  24. Token.new(TokenKinds::ELSEIF)
  25. elsif source.match(/\Aelse/)
  26. @position += 4
  27. Token.new(TokenKinds::ELSE)
  28. elsif source.match(/\A\d+(\.\d+)?/)
  29. number = source.match(/\A\d+(\.\d+)?/)[0]
  30. @position += number.size
  31. Token.new(TokenKinds::NUMBER, number.to_f)
  32. elsif source[0] == '"'
  33. @position += 1
  34. string = String.new
  35. while !at_end && @source[@position] != '"'
  36. if @source[@position] == '\\'
  37. if @source[@position + 1] == '"'
  38. @position += 1
  39. elsif @source[@position + 1] == 'n'
  40. @position += 2
  41. string << "\n"
  42. next
  43. elsif @source[@position + 1] == 't'
  44. @position += 2
  45. string << "\t"
  46. next
  47. end
  48. end
  49. string << @source[@position]
  50. @position += 1
  51. end
  52. if at_end
  53. raise 'Unterminated string'
  54. elsif @source[@position] == '"'
  55. @position += 1
  56. end
  57. Token.new(TokenKinds::STRING, string)
  58. elsif source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)
  59. atom = source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)[1]
  60. @position += atom.size + 1
  61. Token.new(TokenKinds::ATOM, atom.to_sym)
  62. elsif source.match(/\A\+/)
  63. @position += 1
  64. Token.new(TokenKinds::OPERATOR, :+)
  65. elsif source.match(/\A\-/)
  66. @position += 1
  67. Token.new(TokenKinds::OPERATOR, :-)
  68. elsif source.match(/\A\*/)
  69. @position += 1
  70. Token.new(TokenKinds::OPERATOR, :*)
  71. elsif source.match(%r{\A\/})
  72. @position += 1
  73. Token.new(TokenKinds::OPERATOR, :/)
  74. elsif source.match(/\A\{/)
  75. @position += 1
  76. Token.new(TokenKinds::LBRACE)
  77. elsif source.match(/\A\}/)
  78. @position += 1
  79. Token.new(TokenKinds::RBRACE)
  80. elsif source.match(/\A\(/)
  81. @position += 1
  82. Token.new(TokenKinds::LPAREN)
  83. elsif source.match(/\A\)/)
  84. @position += 1
  85. Token.new(TokenKinds::RPAREN)
  86. elsif source.match(/\A\[/)
  87. @position += 1
  88. Token.new(TokenKinds::LBRACKET)
  89. elsif source.match(/\A\]/)
  90. @position += 1
  91. Token.new(TokenKinds::RBRACKET)
  92. elsif source.match(/\A\;/)
  93. @position += 1
  94. Token.new(TokenKinds::SEMICOLON)
  95. elsif source.match(/\A,/)
  96. @position += 1
  97. Token.new(TokenKinds::COMMA)
  98. elsif source.match(/\A\./)
  99. @position += 1
  100. Token.new(TokenKinds::DOT)
  101. elsif source.match(/\A=>/)
  102. @position += 2
  103. Token.new(TokenKinds::ROCKET)
  104. elsif source.match(/\A!/)
  105. @position += 1
  106. Token.new(TokenKinds::OPERATOR, :!)
  107. elsif source.match(/\A==/)
  108. @position += 2
  109. Token.new(TokenKinds::OPERATOR, :==)
  110. elsif source.match(/\A\<=/)
  111. @position += 2
  112. Token.new(TokenKinds::OPERATOR, :<=)
  113. elsif source.match(/\A\>\=/)
  114. @position += 2
  115. Token.new(TokenKinds::OPERATOR, :>=)
  116. elsif source.match(/\A\</)
  117. @position += 1
  118. Token.new(TokenKinds::OPERATOR, :<)
  119. elsif source.match(/\A\>/)
  120. @position += 1
  121. Token.new(TokenKinds::OPERATOR, :>)
  122. elsif source.match(/\Aand/)
  123. @position += 3
  124. Token.new(TokenKinds::OPERATOR, :and)
  125. elsif source.match(/\Aor/)
  126. @position += 2
  127. Token.new(TokenKinds::OPERATOR, :or)
  128. elsif source.match(/\Anot/)
  129. @position += 3
  130. Token.new(TokenKinds::OPERATOR, :not)
  131. elsif source.match(/\A\=/)
  132. @position += 1
  133. Token.new(TokenKinds::EQUALS)
  134. elsif source.match(/\Alet/)
  135. @position += 3
  136. Token.new(TokenKinds::LET)
  137. elsif source.match(/\Afunction/)
  138. @position += 8
  139. Token.new(TokenKinds::FUNCTION)
  140. elsif source.match(/\Aclass/)
  141. @position += 5
  142. Token.new(TokenKinds::CLASS)
  143. elsif source.match(/\Apublic/)
  144. @position += 6
  145. Token.new(TokenKinds::PUBLIC)
  146. elsif source.match(/\Aprivate/)
  147. @position += 7
  148. Token.new(TokenKinds::PRIVATE)
  149. elsif source.match(/\Afor/)
  150. @position += 3
  151. Token.new(TokenKinds::FOR)
  152. elsif source.match(/\Ain/)
  153. @position += 2
  154. Token.new(TokenKinds::IN)
  155. elsif source.match(/\A[a-z][a-zA-Z0-9_]*/)
  156. identifier = source.match(/\A[a-z][a-zA-Z0-9_]*/)[0]
  157. @position += identifier.size
  158. Token.new(TokenKinds::IDENTIFIER, identifier)
  159. elsif source.match(/\A[A-Z][a-zA-Z0-9_]*/)
  160. class_name = source.match(/\A[A-Z][a-zA-Z0-9_]*/)[0]
  161. @position += class_name.size
  162. Token.new(TokenKinds::CLASS_NAME, class_name)
  163. else
  164. raise "Unrecognized character #{source[0]}"
  165. end
  166. end
  167. def scan_all
  168. tokens = Array.new
  169. until at_end
  170. tokens << get_token
  171. end
  172. tokens << Token.new(TokenKinds::EOF)
  173. end
  174. private
  175. def skip_whitespace
  176. while !at_end && @source[@position].match(/\A\s/)
  177. @position += 1
  178. end
  179. end
  180. def at_end
  181. @position == @source.size
  182. end
  183. end