A toy dynamic programming language written in Ruby
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

lexer.rb 4.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. class Lexer
  2. def initialize(source)
  3. @source = source.split("\n").join(' ')
  4. @position = 0
  5. end
  6. def get_token
  7. return Token.new(TokenKinds::EOF) if at_end
  8. @position += 1 while !at_end && @source.slice(@position).match(/\s/)
  9. source = @source.slice(@position..-1)
  10. if source.match(/^null/)
  11. @position += 4
  12. Token.new(TokenKinds::NULL)
  13. elsif source.match(/^true/)
  14. @position += 4
  15. Token.new(TokenKinds::BOOLEAN, true)
  16. elsif source.match(/^false/)
  17. @position += 5
  18. Token.new(TokenKinds::BOOLEAN, false)
  19. elsif source.match(/^if/)
  20. @position += 2
  21. Token.new(TokenKinds::IF)
  22. elsif source.match(/^elseif/)
  23. @position += 6
  24. Token.new(TokenKinds::ELSEIF)
  25. elsif source.match(/^else/)
  26. @position += 4
  27. Token.new(TokenKinds::ELSE)
  28. elsif source.match(/^\d+(\.\d+)?/)
  29. number = source.match(/^\d+(\.\d+)?/)[0]
  30. @position += number.size
  31. Token.new(TokenKinds::NUMBER, number.to_f)
  32. elsif source.match(/^"(.*)"/)
  33. string = source.match(/^"([^"]*)"/)[1]
  34. @position += (string.size + 2)
  35. Token.new(TokenKinds::STRING, string)
  36. elsif source.match(/^\:([a-z][a-zA-Z0-9_]*)/)
  37. atom = source.match(/^\:([a-z][a-zA-Z0-9_]*)/)[1]
  38. @position += atom.size + 1
  39. Token.new(TokenKinds::ATOM, atom.to_sym)
  40. elsif source.match(/^\+/)
  41. @position += 1
  42. Token.new(TokenKinds::OPERATOR, :+)
  43. elsif source.match(/^\-/)
  44. @position += 1
  45. Token.new(TokenKinds::OPERATOR, :-)
  46. elsif source.match(/^\*/)
  47. @position += 1
  48. Token.new(TokenKinds::OPERATOR, :*)
  49. elsif source.match(%r{^\/})
  50. @position += 1
  51. Token.new(TokenKinds::OPERATOR, :/)
  52. elsif source.match(/^\{/)
  53. @position += 1
  54. Token.new(TokenKinds::LBRACE)
  55. elsif source.match(/^\}/)
  56. @position += 1
  57. Token.new(TokenKinds::RBRACE)
  58. elsif source.match(/^\(/)
  59. @position += 1
  60. Token.new(TokenKinds::LPAREN)
  61. elsif source.match(/^\)/)
  62. @position += 1
  63. Token.new(TokenKinds::RPAREN)
  64. elsif source.match(/^\[/)
  65. @position += 1
  66. Token.new(TokenKinds::LBRACKET)
  67. elsif source.match(/^\]/)
  68. @position += 1
  69. Token.new(TokenKinds::RBRACKET)
  70. elsif source.match(/^\;/)
  71. @position += 1
  72. Token.new(TokenKinds::SEMICOLON)
  73. elsif source.match(/^,/)
  74. @position += 1
  75. Token.new(TokenKinds::COMMA)
  76. elsif source.match(/^\./)
  77. @position += 1
  78. Token.new(TokenKinds::DOT)
  79. elsif source.match(/^=>/)
  80. @position += 2
  81. Token.new(TokenKinds::ROCKET)
  82. elsif source.match(/^!/)
  83. @position += 1
  84. Token.new(TokenKinds::OPERATOR, :!)
  85. elsif source.match(/^==/)
  86. @position += 2
  87. Token.new(TokenKinds::OPERATOR, :==)
  88. elsif source.match(/^\<=/)
  89. @position += 2
  90. Token.new(TokenKinds::OPERATOR, :<=)
  91. elsif source.match(/^\>\=/)
  92. @position += 2
  93. Token.new(TokenKinds::OPERATOR, :>=)
  94. elsif source.match(/^\</)
  95. @position += 1
  96. Token.new(TokenKinds::OPERATOR, :<)
  97. elsif source.match(/^\>/)
  98. @position += 1
  99. Token.new(TokenKinds::OPERATOR, :>)
  100. elsif source.match(/^and/)
  101. @position += 3
  102. Token.new(TokenKinds::OPERATOR, :and)
  103. elsif source.match(/^or/)
  104. @position += 2
  105. Token.new(TokenKinds::OPERATOR, :or)
  106. elsif source.match(/^not/)
  107. @position += 3
  108. Token.new(TokenKinds::OPERATOR, :not)
  109. elsif source.match(/^\=/)
  110. @position += 1
  111. Token.new(TokenKinds::EQUALS)
  112. elsif source.match(/^let/)
  113. @position += 3
  114. Token.new(TokenKinds::LET)
  115. elsif source.match(/^function/)
  116. @position += 8
  117. Token.new(TokenKinds::FUNCTION)
  118. elsif source.match(/^class/)
  119. @position += 5
  120. Token.new(TokenKinds::CLASS)
  121. elsif source.match(/^public/)
  122. @position += 6
  123. Token.new(TokenKinds::PUBLIC)
  124. elsif source.match(/^private/)
  125. @position += 7
  126. Token.new(TokenKinds::PRIVATE)
  127. elsif source.match(/^for/)
  128. @position += 3
  129. Token.new(TokenKinds::FOR)
  130. elsif source.match(/^in/)
  131. @position += 2
  132. Token.new(TokenKinds::IN)
  133. elsif source.match(/^[a-z][a-zA-Z0-9_]*/)
  134. identifier = source.match(/^[a-z][a-zA-Z0-9_]*/)[0]
  135. @position += identifier.size
  136. Token.new(TokenKinds::IDENTIFIER, identifier)
  137. elsif source.match(/^[A-Z][a-zA-Z0-9_]*/)
  138. class_name = source.match(/^[A-Z][a-zA-Z0-9_]*/)[0]
  139. @position += class_name.size
  140. Token.new(TokenKinds::CLASS_NAME, class_name)
  141. else
  142. raise "Unrecognized character #{source[0]}"
  143. end
  144. end
  145. def scan_all
  146. tokens = Array.new
  147. until at_end
  148. if @source.slice(@position).match(/\s/)
  149. @position += 1
  150. else
  151. tokens << get_token
  152. end
  153. end
  154. tokens << Token.new(TokenKinds::EOF)
  155. tokens
  156. end
  157. private
  158. def at_end
  159. @position == @source.size
  160. end
  161. end