A toy dynamic programming language written in Ruby
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.rb 4.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. class Lexer
  2. def initialize(source)
  3. @source = source.split("\n").join(' ')
  4. @position = 0
  5. end
  6. def get_token
  7. return Token.new(TokenKinds::EOF) if at_end
  8. @position += 1 while !at_end && @source.slice(@position).match(/\s/)
  9. source = @source.slice(@position..-1)
  10. if source.match(/^null/)
  11. @position += 4
  12. Token.new(TokenKinds::NULL)
  13. elsif source.match(/^true/)
  14. @position += 4
  15. Token.new(TokenKinds::BOOLEAN, true)
  16. elsif source.match(/^false/)
  17. @position += 5
  18. Token.new(TokenKinds::BOOLEAN, false)
  19. elsif source.match(/^if/)
  20. @position += 2
  21. Token.new(TokenKinds::IF)
  22. elsif source.match(/^elseif/)
  23. @position += 6
  24. Token.new(TokenKinds::ELSEIF)
  25. elsif source.match(/^else/)
  26. @position += 4
  27. Token.new(TokenKinds::ELSE)
  28. elsif source.match(/^\d+(\.\d+)?/)
  29. number = source.match(/^\d+(\.\d+)?/)[0]
  30. @position += number.size
  31. Token.new(TokenKinds::NUMBER, number.to_f)
  32. elsif source.match(/^"(.*)"/)
  33. string = source.match(/^"([^"]*)"/)[1]
  34. @position += (string.size + 2)
  35. Token.new(TokenKinds::STRING, string)
  36. elsif source.match(/^\+/)
  37. @position += 1
  38. Token.new(TokenKinds::OPERATOR, :+)
  39. elsif source.match(/^\-/)
  40. @position += 1
  41. Token.new(TokenKinds::OPERATOR, :-)
  42. elsif source.match(/^\*/)
  43. @position += 1
  44. Token.new(TokenKinds::OPERATOR, :*)
  45. elsif source.match(%r{^\/})
  46. @position += 1
  47. Token.new(TokenKinds::OPERATOR, :/)
  48. elsif source.match(/^\{/)
  49. @position += 1
  50. Token.new(TokenKinds::LBRACE)
  51. elsif source.match(/^\}/)
  52. @position += 1
  53. Token.new(TokenKinds::RBRACE)
  54. elsif source.match(/^\(/)
  55. @position += 1
  56. Token.new(TokenKinds::LPAREN)
  57. elsif source.match(/^\)/)
  58. @position += 1
  59. Token.new(TokenKinds::RPAREN)
  60. elsif source.match(/^\[/)
  61. @position += 1
  62. Token.new(TokenKinds::LBRACKET)
  63. elsif source.match(/^\]/)
  64. @position += 1
  65. Token.new(TokenKinds::RBRACKET)
  66. elsif source.match(/^\;/)
  67. @position += 1
  68. Token.new(TokenKinds::SEMICOLON)
  69. elsif source.match(/^,/)
  70. @position += 1
  71. Token.new(TokenKinds::COMMA)
  72. elsif source.match(/^\./)
  73. @position += 1
  74. Token.new(TokenKinds::DOT)
  75. elsif source.match(/^==/)
  76. @position += 2
  77. Token.new(TokenKinds::OPERATOR, :==)
  78. elsif source.match(/^\<=/)
  79. @position += 2
  80. Token.new(TokenKinds::OPERATOR, :<=)
  81. elsif source.match(/^\>\=/)
  82. @position += 2
  83. Token.new(TokenKinds::OPERATOR, :>=)
  84. elsif source.match(/^\</)
  85. @position += 1
  86. Token.new(TokenKinds::OPERATOR, :<)
  87. elsif source.match(/^\>/)
  88. @position += 1
  89. Token.new(TokenKinds::OPERATOR, :>)
  90. elsif source.match(/^and/)
  91. @position += 3
  92. Token.new(TokenKinds::OPERATOR, :and)
  93. elsif source.match(/^or/)
  94. @position += 2
  95. Token.new(TokenKinds::OPERATOR, :or)
  96. elsif source.match(/^\=/)
  97. @position += 1
  98. Token.new(TokenKinds::EQUALS)
  99. elsif source.match(/^let/)
  100. @position += 3
  101. Token.new(TokenKinds::LET)
  102. elsif source.match(/^function/)
  103. @position += 8
  104. Token.new(TokenKinds::FUNCTION)
  105. elsif source.match(/^class/)
  106. @position += 5
  107. Token.new(TokenKinds::CLASS)
  108. elsif source.match(/^public/)
  109. @position += 6
  110. Token.new(TokenKinds::PUBLIC)
  111. elsif source.match(/^private/)
  112. @position += 7
  113. Token.new(TokenKinds::PRIVATE)
  114. elsif source.match(/^[a-z][a-zA-Z0-9_]*/)
  115. identifier = source.match(/^[a-z][a-zA-Z0-9_]*/)[0]
  116. @position += identifier.size
  117. Token.new(TokenKinds::IDENTIFIER, identifier)
  118. elsif source.match(/^[A-Z][a-zA-Z0-9_]*/)
  119. class_name = source.match(/^[A-Z][a-zA-Z0-9_]*/)[0]
  120. @position += class_name.size
  121. Token.new(TokenKinds::CLASS_NAME, class_name)
  122. else
  123. throw "Unrecognized character #{source[0]}"
  124. end
  125. end
  126. def scan_all
  127. tokens = Array.new
  128. until at_end
  129. if @source.slice(@position).match(/\s/)
  130. @position += 1
  131. else
  132. tokens << get_token
  133. end
  134. end
  135. tokens << Token.new(TokenKinds::EOF)
  136. tokens
  137. end
  138. private
  139. def at_end
  140. @position == @source.size
  141. end
  142. end