A toy dynamic programming language written in Ruby
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.rb 4.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. class Lexer
  2. def initialize(source)
  3. @source = source.split("\n").join(' ')
  4. @position = 0
  5. end
  6. def get_token
  7. return Token.new(TokenKinds::EOF) if at_end
  8. @position += 1 while !at_end && @source.slice(@position).match(/\s/)
  9. source = @source.slice(@position..-1)
  10. if source.match(/^null/)
  11. @position += 4
  12. Token.new(TokenKinds::NULL)
  13. elsif source.match(/^true/)
  14. @position += 4
  15. Token.new(TokenKinds::BOOLEAN, true)
  16. elsif source.match(/^false/)
  17. @position += 5
  18. Token.new(TokenKinds::BOOLEAN, false)
  19. elsif source.match(/^if/)
  20. @position += 2
  21. Token.new(TokenKinds::IF)
  22. elsif source.match(/^elseif/)
  23. @position += 6
  24. Token.new(TokenKinds::ELSEIF)
  25. elsif source.match(/^else/)
  26. @position += 4
  27. Token.new(TokenKinds::ELSE)
  28. elsif source.match(/^\d+(\.\d+)?/)
  29. number = source.match(/^\d+(\.\d+)?/)[0]
  30. @position += number.size
  31. Token.new(TokenKinds::NUMBER, number.to_f)
  32. elsif source.match(/^"(.*)"/)
  33. string = source.match(/^"([^"]*)"/)[1]
  34. @position += (string.size + 2)
  35. Token.new(TokenKinds::STRING, string)
  36. elsif source.match(/^\:([a-z][a-zA-Z0-9_]*)/)
  37. atom = source.match(/^\:([a-z][a-zA-Z0-9_]*)/)[1]
  38. @position += atom.size + 1
  39. Token.new(TokenKinds::ATOM, atom.to_sym)
  40. elsif source.match(/^\+/)
  41. @position += 1
  42. Token.new(TokenKinds::OPERATOR, :+)
  43. elsif source.match(/^\-/)
  44. @position += 1
  45. Token.new(TokenKinds::OPERATOR, :-)
  46. elsif source.match(/^\*/)
  47. @position += 1
  48. Token.new(TokenKinds::OPERATOR, :*)
  49. elsif source.match(%r{^\/})
  50. @position += 1
  51. Token.new(TokenKinds::OPERATOR, :/)
  52. elsif source.match(/^\{/)
  53. @position += 1
  54. Token.new(TokenKinds::LBRACE)
  55. elsif source.match(/^\}/)
  56. @position += 1
  57. Token.new(TokenKinds::RBRACE)
  58. elsif source.match(/^\(/)
  59. @position += 1
  60. Token.new(TokenKinds::LPAREN)
  61. elsif source.match(/^\)/)
  62. @position += 1
  63. Token.new(TokenKinds::RPAREN)
  64. elsif source.match(/^\[/)
  65. @position += 1
  66. Token.new(TokenKinds::LBRACKET)
  67. elsif source.match(/^\]/)
  68. @position += 1
  69. Token.new(TokenKinds::RBRACKET)
  70. elsif source.match(/^\;/)
  71. @position += 1
  72. Token.new(TokenKinds::SEMICOLON)
  73. elsif source.match(/^,/)
  74. @position += 1
  75. Token.new(TokenKinds::COMMA)
  76. elsif source.match(/^\./)
  77. @position += 1
  78. Token.new(TokenKinds::DOT)
  79. elsif source.match(/^=>/)
  80. @position += 2
  81. Token.new(TokenKinds::ROCKET)
  82. elsif source.match(/^==/)
  83. @position += 2
  84. Token.new(TokenKinds::OPERATOR, :==)
  85. elsif source.match(/^\<=/)
  86. @position += 2
  87. Token.new(TokenKinds::OPERATOR, :<=)
  88. elsif source.match(/^\>\=/)
  89. @position += 2
  90. Token.new(TokenKinds::OPERATOR, :>=)
  91. elsif source.match(/^\</)
  92. @position += 1
  93. Token.new(TokenKinds::OPERATOR, :<)
  94. elsif source.match(/^\>/)
  95. @position += 1
  96. Token.new(TokenKinds::OPERATOR, :>)
  97. elsif source.match(/^and/)
  98. @position += 3
  99. Token.new(TokenKinds::OPERATOR, :and)
  100. elsif source.match(/^or/)
  101. @position += 2
  102. Token.new(TokenKinds::OPERATOR, :or)
  103. elsif source.match(/^\=/)
  104. @position += 1
  105. Token.new(TokenKinds::EQUALS)
  106. elsif source.match(/^let/)
  107. @position += 3
  108. Token.new(TokenKinds::LET)
  109. elsif source.match(/^function/)
  110. @position += 8
  111. Token.new(TokenKinds::FUNCTION)
  112. elsif source.match(/^class/)
  113. @position += 5
  114. Token.new(TokenKinds::CLASS)
  115. elsif source.match(/^public/)
  116. @position += 6
  117. Token.new(TokenKinds::PUBLIC)
  118. elsif source.match(/^private/)
  119. @position += 7
  120. Token.new(TokenKinds::PRIVATE)
  121. elsif source.match(/^[a-z][a-zA-Z0-9_]*/)
  122. identifier = source.match(/^[a-z][a-zA-Z0-9_]*/)[0]
  123. @position += identifier.size
  124. Token.new(TokenKinds::IDENTIFIER, identifier)
  125. elsif source.match(/^[A-Z][a-zA-Z0-9_]*/)
  126. class_name = source.match(/^[A-Z][a-zA-Z0-9_]*/)[0]
  127. @position += class_name.size
  128. Token.new(TokenKinds::CLASS_NAME, class_name)
  129. else
  130. throw "Unrecognized character #{source[0]}"
  131. end
  132. end
  133. def scan_all
  134. tokens = Array.new
  135. until at_end
  136. if @source.slice(@position).match(/\s/)
  137. @position += 1
  138. else
  139. tokens << get_token
  140. end
  141. end
  142. tokens << Token.new(TokenKinds::EOF)
  143. tokens
  144. end
  145. private
  146. def at_end
  147. @position == @source.size
  148. end
  149. end