A toy dynamic programming language written in Ruby
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

lexer.rb 4.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. class Lexer
  2. def initialize(source)
  3. @source = source.split("\n").join(' ')
  4. @position = 0
  5. end
  6. def get_token
  7. return Token.new(TokenKinds::EOF) if at_end
  8. @position += 1 while !at_end && @source.slice(@position).match(/\s/)
  9. source = @source.slice(@position..-1)
  10. if source.match(/^null/)
  11. @position += 4
  12. Token.new(TokenKinds::NULL)
  13. elsif source.match(/^true/)
  14. @position += 4
  15. Token.new(TokenKinds::BOOLEAN, true)
  16. elsif source.match(/^false/)
  17. @position += 5
  18. Token.new(TokenKinds::BOOLEAN, false)
  19. elsif source.match(/^if/)
  20. @position += 2
  21. Token.new(TokenKinds::IF)
  22. elsif source.match(/^elseif/)
  23. @position += 6
  24. Token.new(TokenKinds::ELSEIF)
  25. elsif source.match(/^else/)
  26. @position += 4
  27. Token.new(TokenKinds::ELSE)
  28. elsif source.match(/^\d+(\.\d+)?/)
  29. number = source.match(/^\d+(\.\d+)?/)[0]
  30. @position += number.size
  31. Token.new(TokenKinds::NUMBER, number.to_f)
  32. elsif source.match(/^"(.*)"/)
  33. string = source.match(/^"([^"]*)"/)[1]
  34. @position += (string.size + 2)
  35. Token.new(TokenKinds::STRING, string)
  36. elsif source.match(/^\:([a-z][a-zA-Z0-9_]*)/)
  37. atom = source.match(/^\:([a-z][a-zA-Z0-9_]*)/)[1]
  38. @position += atom.size + 1
  39. Token.new(TokenKinds::ATOM, atom.to_sym)
  40. elsif source.match(/^\+/)
  41. @position += 1
  42. Token.new(TokenKinds::OPERATOR, :+)
  43. elsif source.match(/^\-/)
  44. @position += 1
  45. Token.new(TokenKinds::OPERATOR, :-)
  46. elsif source.match(/^\*/)
  47. @position += 1
  48. Token.new(TokenKinds::OPERATOR, :*)
  49. elsif source.match(%r{^\/})
  50. @position += 1
  51. Token.new(TokenKinds::OPERATOR, :/)
  52. elsif source.match(/^\{/)
  53. @position += 1
  54. Token.new(TokenKinds::LBRACE)
  55. elsif source.match(/^\}/)
  56. @position += 1
  57. Token.new(TokenKinds::RBRACE)
  58. elsif source.match(/^\(/)
  59. @position += 1
  60. Token.new(TokenKinds::LPAREN)
  61. elsif source.match(/^\)/)
  62. @position += 1
  63. Token.new(TokenKinds::RPAREN)
  64. elsif source.match(/^\[/)
  65. @position += 1
  66. Token.new(TokenKinds::LBRACKET)
  67. elsif source.match(/^\]/)
  68. @position += 1
  69. Token.new(TokenKinds::RBRACKET)
  70. elsif source.match(/^\;/)
  71. @position += 1
  72. Token.new(TokenKinds::SEMICOLON)
  73. elsif source.match(/^,/)
  74. @position += 1
  75. Token.new(TokenKinds::COMMA)
  76. elsif source.match(/^\./)
  77. @position += 1
  78. Token.new(TokenKinds::DOT)
  79. elsif source.match(/^=>/)
  80. @position += 2
  81. Token.new(TokenKinds::ROCKET)
  82. elsif source.match(/^!/)
  83. @position += 1
  84. Token.new(TokenKinds::OPERATOR, :!)
  85. elsif source.match(/^==/)
  86. @position += 2
  87. Token.new(TokenKinds::OPERATOR, :==)
  88. elsif source.match(/^\<=/)
  89. @position += 2
  90. Token.new(TokenKinds::OPERATOR, :<=)
  91. elsif source.match(/^\>\=/)
  92. @position += 2
  93. Token.new(TokenKinds::OPERATOR, :>=)
  94. elsif source.match(/^\</)
  95. @position += 1
  96. Token.new(TokenKinds::OPERATOR, :<)
  97. elsif source.match(/^\>/)
  98. @position += 1
  99. Token.new(TokenKinds::OPERATOR, :>)
  100. elsif source.match(/^and/)
  101. @position += 3
  102. Token.new(TokenKinds::OPERATOR, :and)
  103. elsif source.match(/^or/)
  104. @position += 2
  105. Token.new(TokenKinds::OPERATOR, :or)
  106. elsif source.match(/^not/)
  107. @position += 3
  108. Token.new(TokenKinds::OPERATOR, :not)
  109. elsif source.match(/^\=/)
  110. @position += 1
  111. Token.new(TokenKinds::EQUALS)
  112. elsif source.match(/^let/)
  113. @position += 3
  114. Token.new(TokenKinds::LET)
  115. elsif source.match(/^function/)
  116. @position += 8
  117. Token.new(TokenKinds::FUNCTION)
  118. elsif source.match(/^class/)
  119. @position += 5
  120. Token.new(TokenKinds::CLASS)
  121. elsif source.match(/^public/)
  122. @position += 6
  123. Token.new(TokenKinds::PUBLIC)
  124. elsif source.match(/^private/)
  125. @position += 7
  126. Token.new(TokenKinds::PRIVATE)
  127. elsif source.match(/^[a-z][a-zA-Z0-9_]*/)
  128. identifier = source.match(/^[a-z][a-zA-Z0-9_]*/)[0]
  129. @position += identifier.size
  130. Token.new(TokenKinds::IDENTIFIER, identifier)
  131. elsif source.match(/^[A-Z][a-zA-Z0-9_]*/)
  132. class_name = source.match(/^[A-Z][a-zA-Z0-9_]*/)[0]
  133. @position += class_name.size
  134. Token.new(TokenKinds::CLASS_NAME, class_name)
  135. else
  136. raise "Unrecognized character #{source[0]}"
  137. end
  138. end
  139. def scan_all
  140. tokens = Array.new
  141. until at_end
  142. if @source.slice(@position).match(/\s/)
  143. @position += 1
  144. else
  145. tokens << get_token
  146. end
  147. end
  148. tokens << Token.new(TokenKinds::EOF)
  149. tokens
  150. end
  151. private
  152. def at_end
  153. @position == @source.size
  154. end
  155. end