A toy dynamic programming language written in Ruby
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

lexer.rb 4.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. class Lexer
  2. def initialize(source)
  3. @source = source.split("\n").join(' ')
  4. @position = 0
  5. end
  6. def get_token
  7. return Token.new(TokenKinds::EOF) if at_end
  8. @position += 1 while !at_end && @source.slice(@position).match(/\s/)
  9. source = @source.slice(@position..-1)
  10. if source.match(/^null/)
  11. @position += 4
  12. Token.new(TokenKinds::NULL)
  13. elsif source.match(/^true/)
  14. @position += 4
  15. Token.new(TokenKinds::BOOLEAN, true)
  16. elsif source.match(/^false/)
  17. @position += 5
  18. Token.new(TokenKinds::BOOLEAN, false)
  19. elsif source.match(/^if/)
  20. @position += 2
  21. Token.new(TokenKinds::IF)
  22. elsif source.match(/^elseif/)
  23. @position += 6
  24. Token.new(TokenKinds::ELSEIF)
  25. elsif source.match(/^else/)
  26. @position += 4
  27. Token.new(TokenKinds::ELSE)
  28. elsif source.match(/^\d+(\.\d+)?/)
  29. number = source.match(/^\d+(\.\d+)?/)[0]
  30. @position += number.size
  31. Token.new(TokenKinds::NUMBER, number.to_f)
  32. elsif source.match(/^"(.*)"/)
  33. string = source.match(/^"([^"]*)"/)[1]
  34. @position += (string.size + 2)
  35. Token.new(TokenKinds::STRING, string)
  36. elsif source.match(/^\:([a-z][a-zA-Z0-9_]*)/)
  37. atom = source.match(/^\:([a-z][a-zA-Z0-9_]*)/)[1]
  38. @position += atom.size + 1
  39. Token.new(TokenKinds::ATOM, atom.to_sym)
  40. elsif source.match(/^\+/)
  41. @position += 1
  42. Token.new(TokenKinds::OPERATOR, :+)
  43. elsif source.match(/^\-/)
  44. @position += 1
  45. Token.new(TokenKinds::OPERATOR, :-)
  46. elsif source.match(/^\*/)
  47. @position += 1
  48. Token.new(TokenKinds::OPERATOR, :*)
  49. elsif source.match(%r{^\/})
  50. @position += 1
  51. Token.new(TokenKinds::OPERATOR, :/)
  52. elsif source.match(/^\{/)
  53. @position += 1
  54. Token.new(TokenKinds::LBRACE)
  55. elsif source.match(/^\}/)
  56. @position += 1
  57. Token.new(TokenKinds::RBRACE)
  58. elsif source.match(/^\(/)
  59. @position += 1
  60. Token.new(TokenKinds::LPAREN)
  61. elsif source.match(/^\)/)
  62. @position += 1
  63. Token.new(TokenKinds::RPAREN)
  64. elsif source.match(/^\[/)
  65. @position += 1
  66. Token.new(TokenKinds::LBRACKET)
  67. elsif source.match(/^\]/)
  68. @position += 1
  69. Token.new(TokenKinds::RBRACKET)
  70. elsif source.match(/^\;/)
  71. @position += 1
  72. Token.new(TokenKinds::SEMICOLON)
  73. elsif source.match(/^,/)
  74. @position += 1
  75. Token.new(TokenKinds::COMMA)
  76. elsif source.match(/^\./)
  77. @position += 1
  78. Token.new(TokenKinds::DOT)
  79. elsif source.match(/^=>/)
  80. @position += 2
  81. Token.new(TokenKinds::ROCKET)
  82. elsif source.match(/^==/)
  83. @position += 2
  84. Token.new(TokenKinds::OPERATOR, :==)
  85. elsif source.match(/^\<=/)
  86. @position += 2
  87. Token.new(TokenKinds::OPERATOR, :<=)
  88. elsif source.match(/^\>\=/)
  89. @position += 2
  90. Token.new(TokenKinds::OPERATOR, :>=)
  91. elsif source.match(/^\</)
  92. @position += 1
  93. Token.new(TokenKinds::OPERATOR, :<)
  94. elsif source.match(/^\>/)
  95. @position += 1
  96. Token.new(TokenKinds::OPERATOR, :>)
  97. elsif source.match(/^and/)
  98. @position += 3
  99. Token.new(TokenKinds::OPERATOR, :and)
  100. elsif source.match(/^or/)
  101. @position += 2
  102. Token.new(TokenKinds::OPERATOR, :or)
  103. elsif source.match(/^\=/)
  104. @position += 1
  105. Token.new(TokenKinds::EQUALS)
  106. elsif source.match(/^let/)
  107. @position += 3
  108. Token.new(TokenKinds::LET)
  109. elsif source.match(/^function/)
  110. @position += 8
  111. Token.new(TokenKinds::FUNCTION)
  112. elsif source.match(/^class/)
  113. @position += 5
  114. Token.new(TokenKinds::CLASS)
  115. elsif source.match(/^public/)
  116. @position += 6
  117. Token.new(TokenKinds::PUBLIC)
  118. elsif source.match(/^private/)
  119. @position += 7
  120. Token.new(TokenKinds::PRIVATE)
  121. elsif source.match(/^[a-z][a-zA-Z0-9_]*/)
  122. identifier = source.match(/^[a-z][a-zA-Z0-9_]*/)[0]
  123. @position += identifier.size
  124. Token.new(TokenKinds::IDENTIFIER, identifier)
  125. elsif source.match(/^[A-Z][a-zA-Z0-9_]*/)
  126. class_name = source.match(/^[A-Z][a-zA-Z0-9_]*/)[0]
  127. @position += class_name.size
  128. Token.new(TokenKinds::CLASS_NAME, class_name)
  129. else
  130. throw "Unrecognized character #{source[0]}"
  131. end
  132. end
  133. def scan_all
  134. tokens = Array.new
  135. until at_end
  136. if @source.slice(@position).match(/\s/)
  137. @position += 1
  138. else
  139. tokens << get_token
  140. end
  141. end
  142. tokens << Token.new(TokenKinds::EOF)
  143. tokens
  144. end
  145. private
  146. def at_end
  147. @position == @source.size
  148. end
  149. end