A toy dynamic programming language written in Ruby
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.rb 4.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. class Lexer
  2. def initialize(source)
  3. @source = source.split("\n").join(' ')
  4. @position = 0
  5. end
  6. def get_token
  7. return Token.new(TokenKinds::EOF) if at_end
  8. @position += 1 while !at_end && @source.slice(@position).match(/\s/)
  9. source = @source.slice(@position..-1)
  10. if source.match(/^null/)
  11. @position += 4
  12. Token.new(TokenKinds::NULL)
  13. elsif source.match(/^true/)
  14. @position += 4
  15. Token.new(TokenKinds::BOOLEAN, true)
  16. elsif source.match(/^false/)
  17. @position += 5
  18. Token.new(TokenKinds::BOOLEAN, false)
  19. elsif source.match(/^if/)
  20. @position += 2
  21. Token.new(TokenKinds::IF)
  22. elsif source.match(/^elseif/)
  23. @position += 6
  24. Token.new(TokenKinds::ELSEIF)
  25. elsif source.match(/^else/)
  26. @position += 4
  27. Token.new(TokenKinds::ELSE)
  28. elsif source.match(/^\d+(\.\d+)?/)
  29. number = source.match(/^\d+(\.\d+)?/)[0]
  30. @position += number.size
  31. Token.new(TokenKinds::NUMBER, number.to_f)
  32. elsif source.match(/^"(.*)"/)
  33. string = source.match(/^"([^"]*)"/)[1]
  34. @position += (string.size + 2)
  35. Token.new(TokenKinds::STRING, string)
  36. elsif source.match(/^\:([a-z][a-zA-Z0-9_]*)/)
  37. atom = source.match(/^\:([a-z][a-zA-Z0-9_]*)/)[1]
  38. @position += atom.size + 1
  39. Token.new(TokenKinds::ATOM, atom.to_sym)
  40. elsif source.match(/^\+/)
  41. @position += 1
  42. Token.new(TokenKinds::OPERATOR, :+)
  43. elsif source.match(/^\-/)
  44. @position += 1
  45. Token.new(TokenKinds::OPERATOR, :-)
  46. elsif source.match(/^\*/)
  47. @position += 1
  48. Token.new(TokenKinds::OPERATOR, :*)
  49. elsif source.match(%r{^\/})
  50. @position += 1
  51. Token.new(TokenKinds::OPERATOR, :/)
  52. elsif source.match(/^\{/)
  53. @position += 1
  54. Token.new(TokenKinds::LBRACE)
  55. elsif source.match(/^\}/)
  56. @position += 1
  57. Token.new(TokenKinds::RBRACE)
  58. elsif source.match(/^\(/)
  59. @position += 1
  60. Token.new(TokenKinds::LPAREN)
  61. elsif source.match(/^\)/)
  62. @position += 1
  63. Token.new(TokenKinds::RPAREN)
  64. elsif source.match(/^\[/)
  65. @position += 1
  66. Token.new(TokenKinds::LBRACKET)
  67. elsif source.match(/^\]/)
  68. @position += 1
  69. Token.new(TokenKinds::RBRACKET)
  70. elsif source.match(/^\;/)
  71. @position += 1
  72. Token.new(TokenKinds::SEMICOLON)
  73. elsif source.match(/^,/)
  74. @position += 1
  75. Token.new(TokenKinds::COMMA)
  76. elsif source.match(/^\./)
  77. @position += 1
  78. Token.new(TokenKinds::DOT)
  79. elsif source.match(/^=>/)
  80. @position += 2
  81. Token.new(TokenKinds::ROCKET)
  82. elsif source.match(/^!/)
  83. @position += 1
  84. Token.new(TokenKinds::OPERATOR, :!)
  85. elsif source.match(/^==/)
  86. @position += 2
  87. Token.new(TokenKinds::OPERATOR, :==)
  88. elsif source.match(/^\<=/)
  89. @position += 2
  90. Token.new(TokenKinds::OPERATOR, :<=)
  91. elsif source.match(/^\>\=/)
  92. @position += 2
  93. Token.new(TokenKinds::OPERATOR, :>=)
  94. elsif source.match(/^\</)
  95. @position += 1
  96. Token.new(TokenKinds::OPERATOR, :<)
  97. elsif source.match(/^\>/)
  98. @position += 1
  99. Token.new(TokenKinds::OPERATOR, :>)
  100. elsif source.match(/^and/)
  101. @position += 3
  102. Token.new(TokenKinds::OPERATOR, :and)
  103. elsif source.match(/^or/)
  104. @position += 2
  105. Token.new(TokenKinds::OPERATOR, :or)
  106. elsif source.match(/^not/)
  107. @position += 3
  108. Token.new(TokenKinds::OPERATOR, :not)
  109. elsif source.match(/^\=/)
  110. @position += 1
  111. Token.new(TokenKinds::EQUALS)
  112. elsif source.match(/^let/)
  113. @position += 3
  114. Token.new(TokenKinds::LET)
  115. elsif source.match(/^function/)
  116. @position += 8
  117. Token.new(TokenKinds::FUNCTION)
  118. elsif source.match(/^class/)
  119. @position += 5
  120. Token.new(TokenKinds::CLASS)
  121. elsif source.match(/^public/)
  122. @position += 6
  123. Token.new(TokenKinds::PUBLIC)
  124. elsif source.match(/^private/)
  125. @position += 7
  126. Token.new(TokenKinds::PRIVATE)
  127. elsif source.match(/^for/)
  128. @position += 3
  129. Token.new(TokenKinds::FOR)
  130. elsif source.match(/^in/)
  131. @position += 2
  132. Token.new(TokenKinds::IN)
  133. elsif source.match(/^[a-z][a-zA-Z0-9_]*/)
  134. identifier = source.match(/^[a-z][a-zA-Z0-9_]*/)[0]
  135. @position += identifier.size
  136. Token.new(TokenKinds::IDENTIFIER, identifier)
  137. elsif source.match(/^[A-Z][a-zA-Z0-9_]*/)
  138. class_name = source.match(/^[A-Z][a-zA-Z0-9_]*/)[0]
  139. @position += class_name.size
  140. Token.new(TokenKinds::CLASS_NAME, class_name)
  141. else
  142. raise "Unrecognized character #{source[0]}"
  143. end
  144. end
  145. def scan_all
  146. tokens = Array.new
  147. until at_end
  148. if @source.slice(@position).match(/\s/)
  149. @position += 1
  150. else
  151. tokens << get_token
  152. end
  153. end
  154. tokens << Token.new(TokenKinds::EOF)
  155. tokens
  156. end
  157. private
  158. def at_end
  159. @position == @source.size
  160. end
  161. end