A toy dynamic programming language written in Ruby
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

lexer.rb 4.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. class Lexer
  2. def initialize(source)
  3. @source = source.split("\n").join(' ')
  4. @position = 0
  5. end
  6. def get_token
  7. return Token.new(TokenKinds::EOF) if at_end
  8. @position += 1 while !at_end && @source.slice(@position).match(/\s/)
  9. source = @source.slice(@position..-1)
  10. if source.match(/^null/)
  11. @position += 4
  12. Token.new(TokenKinds::NULL)
  13. elsif source.match(/^true/)
  14. @position += 4
  15. Token.new(TokenKinds::BOOLEAN, true)
  16. elsif source.match(/^false/)
  17. @position += 5
  18. Token.new(TokenKinds::BOOLEAN, false)
  19. elsif source.match(/^if/)
  20. @position += 2
  21. Token.new(TokenKinds::IF)
  22. elsif source.match(/^elseif/)
  23. @position += 6
  24. Token.new(TokenKinds::ELSEIF)
  25. elsif source.match(/^else/)
  26. @position += 4
  27. Token.new(TokenKinds::ELSE)
  28. elsif source.match(/^\d+(\.\d+)?/)
  29. number = source.match(/^\d+(\.\d+)?/)[0]
  30. @position += number.size
  31. Token.new(TokenKinds::NUMBER, number.to_f)
  32. elsif source.match(/^"(.*)"/)
  33. string = source.match(/^"([^"]*)"/)[1]
  34. @position += (string.size + 2)
  35. Token.new(TokenKinds::STRING, string)
  36. elsif source.match(/^\+/)
  37. @position += 1
  38. Token.new(TokenKinds::OPERATOR, :+)
  39. elsif source.match(/^\-/)
  40. @position += 1
  41. Token.new(TokenKinds::OPERATOR, :-)
  42. elsif source.match(/^\*/)
  43. @position += 1
  44. Token.new(TokenKinds::OPERATOR, :*)
  45. elsif source.match(%r{^\/})
  46. @position += 1
  47. Token.new(TokenKinds::OPERATOR, :/)
  48. elsif source.match(/^\{/)
  49. @position += 1
  50. Token.new(TokenKinds::LBRACE)
  51. elsif source.match(/^\}/)
  52. @position += 1
  53. Token.new(TokenKinds::RBRACE)
  54. elsif source.match(/^\(/)
  55. @position += 1
  56. Token.new(TokenKinds::LPAREN)
  57. elsif source.match(/^\)/)
  58. @position += 1
  59. Token.new(TokenKinds::RPAREN)
  60. elsif source.match(/^\[/)
  61. @position += 1
  62. Token.new(TokenKinds::LBRACKET)
  63. elsif source.match(/^\]/)
  64. @position += 1
  65. Token.new(TokenKinds::RBRACKET)
  66. elsif source.match(/^\;/)
  67. @position += 1
  68. Token.new(TokenKinds::SEMICOLON)
  69. elsif source.match(/^,/)
  70. @position += 1
  71. Token.new(TokenKinds::COMMA)
  72. elsif source.match(/^\./)
  73. @position += 1
  74. Token.new(TokenKinds::DOT)
  75. elsif source.match(/^==/)
  76. @position += 2
  77. Token.new(TokenKinds::OPERATOR, :==)
  78. elsif source.match(/^\<=/)
  79. @position += 2
  80. Token.new(TokenKinds::OPERATOR, :<=)
  81. elsif source.match(/^\>\=/)
  82. @position += 2
  83. Token.new(TokenKinds::OPERATOR, :>=)
  84. elsif source.match(/^\</)
  85. @position += 1
  86. Token.new(TokenKinds::OPERATOR, :<)
  87. elsif source.match(/^\>/)
  88. @position += 1
  89. Token.new(TokenKinds::OPERATOR, :>)
  90. elsif source.match(/^and/)
  91. @position += 3
  92. Token.new(TokenKinds::OPERATOR, :and)
  93. elsif source.match(/^or/)
  94. @position += 2
  95. Token.new(TokenKinds::OPERATOR, :or)
  96. elsif source.match(/^\=/)
  97. @position += 1
  98. Token.new(TokenKinds::EQUALS)
  99. elsif source.match(/^let/)
  100. @position += 3
  101. Token.new(TokenKinds::LET)
  102. elsif source.match(/^function/)
  103. @position += 8
  104. Token.new(TokenKinds::FUNCTION)
  105. elsif source.match(/^class/)
  106. @position += 5
  107. Token.new(TokenKinds::CLASS)
  108. elsif source.match(/^public/)
  109. @position += 6
  110. Token.new(TokenKinds::PUBLIC)
  111. elsif source.match(/^private/)
  112. @position += 7
  113. Token.new(TokenKinds::PRIVATE)
  114. elsif source.match(/^[a-z][a-zA-Z0-9_]*/)
  115. identifier = source.match(/^[a-z][a-zA-Z0-9_]*/)[0]
  116. @position += identifier.size
  117. Token.new(TokenKinds::IDENTIFIER, identifier)
  118. elsif source.match(/^[A-Z][a-zA-Z0-9_]*/)
  119. class_name = source.match(/^[A-Z][a-zA-Z0-9_]*/)[0]
  120. @position += class_name.size
  121. Token.new(TokenKinds::CLASS_NAME, class_name)
  122. else
  123. throw "Unrecognized character #{source[0]}"
  124. end
  125. end
  126. def scan_all
  127. tokens = Array.new
  128. until at_end
  129. if @source.slice(@position).match(/\s/)
  130. @position += 1
  131. else
  132. tokens << get_token
  133. end
  134. end
  135. tokens << Token.new(TokenKinds::EOF)
  136. tokens
  137. end
  138. private
  139. def at_end
  140. @position == @source.size
  141. end
  142. end