A toy dynamic programming language written in Ruby
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.rb 5.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. class Lexer
  2. def initialize(source)
  3. @source = source.split("\n").join(' ')
  4. @position = 0
  5. end
  6. def get_token
  7. skip_whitespace
  8. return Token.new(TokenKinds::EOF) if at_end
  9. source = @source.slice(@position..-1)
  10. if source.match(/\Anull/)
  11. @position += 4
  12. Token.new(TokenKinds::NULL)
  13. elsif source.match(/\Atrue/)
  14. @position += 4
  15. Token.new(TokenKinds::BOOLEAN, true)
  16. elsif source.match(/\Afalse/)
  17. @position += 5
  18. Token.new(TokenKinds::BOOLEAN, false)
  19. elsif source.match(/\Aif/)
  20. @position += 2
  21. Token.new(TokenKinds::IF)
  22. elsif source.match(/\Aelseif/)
  23. @position += 6
  24. Token.new(TokenKinds::ELSEIF)
  25. elsif source.match(/\Aelse/)
  26. @position += 4
  27. Token.new(TokenKinds::ELSE)
  28. elsif source.match(/\A\d+(\.\d+)?/)
  29. number = source.match(/\A\d+(\.\d+)?/)[0]
  30. @position += number.size
  31. Token.new(TokenKinds::NUMBER, number.to_f)
  32. elsif source[0] == '"'
  33. @position += 1
  34. string = String.new
  35. while !at_end && @source[@position] != '"'
  36. if @source[@position] == '\\'
  37. if @source[@position + 1] == '"'
  38. @position += 1
  39. elsif @source[@position + 1] == 'n'
  40. @position += 2
  41. string << "\n"
  42. next
  43. elsif @source[@position + 1] == 't'
  44. @position += 2
  45. string << "\t"
  46. next
  47. end
  48. end
  49. string << @source[@position]
  50. @position += 1
  51. end
  52. if at_end
  53. raise 'Unterminated string'
  54. elsif @source[@position] == '"'
  55. @position += 1
  56. end
  57. Token.new(TokenKinds::STRING, string)
  58. elsif source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)
  59. atom = source.match(/\A\:([a-z][a-zA-Z0-9_]*)/)[1]
  60. @position += atom.size + 1
  61. Token.new(TokenKinds::ATOM, atom.to_sym)
  62. elsif source.match(/\A\+/)
  63. @position += 1
  64. Token.new(TokenKinds::OPERATOR, :+)
  65. elsif source.match(/\A\-/)
  66. @position += 1
  67. Token.new(TokenKinds::OPERATOR, :-)
  68. elsif source.match(/\A\*/)
  69. @position += 1
  70. Token.new(TokenKinds::OPERATOR, :*)
  71. elsif source.match(%r{\A\/})
  72. @position += 1
  73. Token.new(TokenKinds::OPERATOR, :/)
  74. elsif source.match(/\A\{/)
  75. @position += 1
  76. Token.new(TokenKinds::LBRACE)
  77. elsif source.match(/\A\}/)
  78. @position += 1
  79. Token.new(TokenKinds::RBRACE)
  80. elsif source.match(/\A\(/)
  81. @position += 1
  82. Token.new(TokenKinds::LPAREN)
  83. elsif source.match(/\A\)/)
  84. @position += 1
  85. Token.new(TokenKinds::RPAREN)
  86. elsif source.match(/\A\[/)
  87. @position += 1
  88. Token.new(TokenKinds::LBRACKET)
  89. elsif source.match(/\A\]/)
  90. @position += 1
  91. Token.new(TokenKinds::RBRACKET)
  92. elsif source.match(/\A\;/)
  93. @position += 1
  94. Token.new(TokenKinds::SEMICOLON)
  95. elsif source.match(/\A,/)
  96. @position += 1
  97. Token.new(TokenKinds::COMMA)
  98. elsif source.match(/\A\./)
  99. @position += 1
  100. Token.new(TokenKinds::DOT)
  101. elsif source.match(/\A=>/)
  102. @position += 2
  103. Token.new(TokenKinds::ROCKET)
  104. elsif source.match(/\A!/)
  105. @position += 1
  106. Token.new(TokenKinds::OPERATOR, :!)
  107. elsif source.match(/\A==/)
  108. @position += 2
  109. Token.new(TokenKinds::OPERATOR, :==)
  110. elsif source.match(/\A\<=/)
  111. @position += 2
  112. Token.new(TokenKinds::OPERATOR, :<=)
  113. elsif source.match(/\A\>\=/)
  114. @position += 2
  115. Token.new(TokenKinds::OPERATOR, :>=)
  116. elsif source.match(/\A\</)
  117. @position += 1
  118. Token.new(TokenKinds::OPERATOR, :<)
  119. elsif source.match(/\A\>/)
  120. @position += 1
  121. Token.new(TokenKinds::OPERATOR, :>)
  122. elsif source.match(/\Aand/)
  123. @position += 3
  124. Token.new(TokenKinds::OPERATOR, :and)
  125. elsif source.match(/\Aor/)
  126. @position += 2
  127. Token.new(TokenKinds::OPERATOR, :or)
  128. elsif source.match(/\Anot/)
  129. @position += 3
  130. Token.new(TokenKinds::OPERATOR, :not)
  131. elsif source.match(/\A\=/)
  132. @position += 1
  133. Token.new(TokenKinds::EQUALS)
  134. elsif source.match(/\Alet/)
  135. @position += 3
  136. Token.new(TokenKinds::LET)
  137. elsif source.match(/\Afunction/)
  138. @position += 8
  139. Token.new(TokenKinds::FUNCTION)
  140. elsif source.match(/\Aclass/)
  141. @position += 5
  142. Token.new(TokenKinds::CLASS)
  143. elsif source.match(/\Apublic/)
  144. @position += 6
  145. Token.new(TokenKinds::PUBLIC)
  146. elsif source.match(/\Aprivate/)
  147. @position += 7
  148. Token.new(TokenKinds::PRIVATE)
  149. elsif source.match(/\Afor/)
  150. @position += 3
  151. Token.new(TokenKinds::FOR)
  152. elsif source.match(/\Ain/)
  153. @position += 2
  154. Token.new(TokenKinds::IN)
  155. elsif source.match(/\A[a-z][a-zA-Z0-9_]*/)
  156. identifier = source.match(/\A[a-z][a-zA-Z0-9_]*/)[0]
  157. @position += identifier.size
  158. Token.new(TokenKinds::IDENTIFIER, identifier)
  159. elsif source.match(/\A[A-Z][a-zA-Z0-9_]*/)
  160. class_name = source.match(/\A[A-Z][a-zA-Z0-9_]*/)[0]
  161. @position += class_name.size
  162. Token.new(TokenKinds::CLASS_NAME, class_name)
  163. else
  164. raise "Unrecognized character #{source[0]}"
  165. end
  166. end
  167. def scan_all
  168. tokens = Array.new
  169. until at_end
  170. tokens << get_token
  171. end
  172. tokens << Token.new(TokenKinds::EOF)
  173. end
  174. private
  175. def skip_whitespace
  176. while !at_end && @source[@position].match(/\A\s/)
  177. @position += 1
  178. end
  179. end
  180. def at_end
  181. @position == @source.size
  182. end
  183. end