You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parser.rb 1.3KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. require 'nokogiri'
  2. require 'time'
  3. class Parser
  4. def threads(page)
  5. threads = Array.new
  6. page.css('.even, .odd').each do |row|
  7. thread = Hash.new
  8. thread_link = row.at_css('.subject > a')
  9. next if thread_link.nil?
  10. thread[:remote_id] = thread_link['href'].split('/')[3]
  11. thread[:title] = thread_link.text.strip
  12. creator = row.at_css('.memberlink').text.strip
  13. creator = creator[0..-2] if creator.match(/\+$/)
  14. thread[:creator] = creator
  15. thread[:is_sticky] = !!thread[:title].match(/^Sticky:/)
  16. threads << thread
  17. end
  18. threads
  19. end
  20. def posts(thread, page)
  21. posts = Array.new
  22. page.css('.post').each do |_post|
  23. post = Hash.new
  24. post[:remote_id] = _post.at_css('ul.view')[:id].split('_')[1].to_i
  25. post[:creator] = _post.at_css('.memberlink').text.strip
  26. date, time = _post.at_css('.postinfo').text.split('posted this')[1].split('@')
  27. post[:created_at] = Time.parse("#{date} #{time}")
  28. post[:body] = _post.at_css('.postbody').children.map(&:to_html).join.strip
  29. post[:thread_id] = thread[:id]
  30. posts << post
  31. end
  32. posts
  33. end
  34. def thread_created_at(first_post)
  35. post_info = first_post.text.split('posted this')
  36. date, time = post_info[1].split('@')
  37. Time.parse("#{date} #{time}")
  38. end
  39. end