A toy dynamic programming language written in Ruby
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.rb 5.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. class Lexer
  2. def initialize(source)
  3. @source = source
  4. @position = 0
  5. end
  6. def get_token
  7. skip_whitespace
  8. skip_comment
  9. return Token.new(TokenKinds::EOF) if at_end
  10. source = @source.slice(@position..-1)
  11. if source.match(/\Anull/)
  12. @position += 4
  13. Token.new(TokenKinds::NULL)
  14. elsif source.match(/\Atrue/)
  15. @position += 4
  16. Token.new(TokenKinds::BOOLEAN, true)
  17. elsif source.match(/\Afalse/)
  18. @position += 5
  19. Token.new(TokenKinds::BOOLEAN, false)
  20. elsif source.match(/\Aif/)
  21. @position += 2
  22. Token.new(TokenKinds::IF)
  23. elsif source.match(/\Aelseif/)
  24. @position += 6
  25. Token.new(TokenKinds::ELSEIF)
  26. elsif source.match(/\Aelse/)
  27. @position += 4
  28. Token.new(TokenKinds::ELSE)
  29. elsif source.match(/\A\d+(\.\d+)?/)
  30. number = source.match(/\A\d+(\.\d+)?/)[0]
  31. @position += number.size
  32. Token.new(TokenKinds::NUMBER, number.to_f)
  33. elsif source[0] == '"'
  34. @position += 1
  35. string = String.new
  36. while !at_end && @source[@position] != '"'
  37. if @source[@position] == '\\'
  38. if @source[@position + 1] == '"'
  39. @position += 1
  40. elsif @source[@position + 1] == 'n'
  41. @position += 2
  42. string << "\n"
  43. next
  44. elsif @source[@position + 1] == 't'
  45. @position += 2
  46. string << "\t"
  47. next
  48. end
  49. end
  50. string << @source[@position]
  51. @position += 1
  52. end
  53. if at_end
  54. raise 'Unterminated string'
  55. elsif @source[@position] == '"'
  56. @position += 1
  57. end
  58. Token.new(TokenKinds::STRING, string)
  59. elsif source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)
  60. atom = source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)[1]
  61. @position += atom.size + 1
  62. Token.new(TokenKinds::ATOM, atom.to_sym)
  63. elsif source.match(/\A\+/)
  64. @position += 1
  65. Token.new(TokenKinds::OPERATOR, :+)
  66. elsif source.match(/\A\-/)
  67. @position += 1
  68. Token.new(TokenKinds::OPERATOR, :-)
  69. elsif source.match(/\A\*/)
  70. @position += 1
  71. Token.new(TokenKinds::OPERATOR, :*)
  72. elsif source.match(%r{\A\/})
  73. @position += 1
  74. Token.new(TokenKinds::OPERATOR, :/)
  75. elsif source.match(/\A\{/)
  76. @position += 1
  77. Token.new(TokenKinds::LBRACE)
  78. elsif source.match(/\A\}/)
  79. @position += 1
  80. Token.new(TokenKinds::RBRACE)
  81. elsif source.match(/\A\(/)
  82. @position += 1
  83. Token.new(TokenKinds::LPAREN)
  84. elsif source.match(/\A\)/)
  85. @position += 1
  86. Token.new(TokenKinds::RPAREN)
  87. elsif source.match(/\A\[/)
  88. @position += 1
  89. Token.new(TokenKinds::LBRACKET)
  90. elsif source.match(/\A\]/)
  91. @position += 1
  92. Token.new(TokenKinds::RBRACKET)
  93. elsif source.match(/\A\;/)
  94. @position += 1
  95. Token.new(TokenKinds::SEMICOLON)
  96. elsif source.match(/\A,/)
  97. @position += 1
  98. Token.new(TokenKinds::COMMA)
  99. elsif source.match(/\A\./)
  100. @position += 1
  101. Token.new(TokenKinds::DOT)
  102. elsif source.match(/\A=>/)
  103. @position += 2
  104. Token.new(TokenKinds::ROCKET)
  105. elsif source.match(/\A\#/)
  106. @position += 1
  107. Token.new(TokenKinds::HASH)
  108. elsif source.match(/\A!/)
  109. @position += 1
  110. Token.new(TokenKinds::OPERATOR, :!)
  111. elsif source.match(/\A==/)
  112. @position += 2
  113. Token.new(TokenKinds::OPERATOR, :==)
  114. elsif source.match(/\A\<=/)
  115. @position += 2
  116. Token.new(TokenKinds::OPERATOR, :<=)
  117. elsif source.match(/\A\>\=/)
  118. @position += 2
  119. Token.new(TokenKinds::OPERATOR, :>=)
  120. elsif source.match(/\A\</)
  121. @position += 1
  122. Token.new(TokenKinds::OPERATOR, :<)
  123. elsif source.match(/\A\>/)
  124. @position += 1
  125. Token.new(TokenKinds::OPERATOR, :>)
  126. elsif source.match(/\Aand/)
  127. @position += 3
  128. Token.new(TokenKinds::OPERATOR, :and)
  129. elsif source.match(/\Aor/)
  130. @position += 2
  131. Token.new(TokenKinds::OPERATOR, :or)
  132. elsif source.match(/\Anot/)
  133. @position += 3
  134. Token.new(TokenKinds::OPERATOR, :not)
  135. elsif source.match(/\A\=/)
  136. @position += 1
  137. Token.new(TokenKinds::EQUALS)
  138. elsif source.match(/\Alet/)
  139. @position += 3
  140. Token.new(TokenKinds::LET)
  141. elsif source.match(/\Afunction/)
  142. @position += 8
  143. Token.new(TokenKinds::FUNCTION)
  144. elsif source.match(/\Amethod/)
  145. @position += 6
  146. Token.new(TokenKinds::METHOD)
  147. elsif source.match(/\Aclassmethod/)
  148. @position += 11
  149. Token.new(TokenKinds::CLASSMETHOD)
  150. elsif source.match(/\Aclass/)
  151. @position += 5
  152. Token.new(TokenKinds::CLASS)
  153. elsif source.match(/\Apublic/)
  154. @position += 6
  155. Token.new(TokenKinds::PUBLIC)
  156. elsif source.match(/\Aprivate/)
  157. @position += 7
  158. Token.new(TokenKinds::PRIVATE)
  159. elsif source.match(/\Afor/)
  160. @position += 3
  161. Token.new(TokenKinds::FOR)
  162. elsif source.match(/\Ain/)
  163. @position += 2
  164. Token.new(TokenKinds::IN)
  165. elsif source.match(/\A[a-z][a-zA-Z0-9_]*/)
  166. identifier = source.match(/\A[a-z][a-zA-Z0-9_]*/)[0]
  167. @position += identifier.size
  168. Token.new(TokenKinds::IDENTIFIER, identifier)
  169. elsif source.match(/\A[A-Z][a-zA-Z0-9_]*/)
  170. class_name = source.match(/\A[A-Z][a-zA-Z0-9_]*/)[0]
  171. @position += class_name.size
  172. Token.new(TokenKinds::CLASS_NAME, class_name)
  173. else
  174. raise "Unrecognized character #{source[0]}"
  175. end
  176. end
  177. def peek
  178. position = @position
  179. token = get_token
  180. @position = position
  181. token
  182. end
  183. def scan_all
  184. tokens = Array.new
  185. tokens << get_token until at_end
  186. tokens << Token.new(TokenKinds::EOF)
  187. end
  188. private
  189. def skip_whitespace
  190. @position += 1 while !at_end && @source[@position].match(/\A\s/)
  191. end
  192. def skip_comment
  193. if @source.slice(@position..@position + 1) == '//'
  194. @position += 2
  195. @position += 1 until @source[@position] == "\n" || at_end
  196. @position += 1 unless at_end
  197. end
  198. end
  199. def at_end
  200. @position == @source.size
  201. end
  202. end