You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parse.rb 1.3KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. require 'nokogiri'
  2. module Parse
  3. def self.threads(page)
  4. threads = Array.new
  5. page.css('.even, .odd').each do |row|
  6. thread = Hash.new
  7. thread_link = row.at_css('.subject > a')
  8. next if thread_link.nil?
  9. thread[:remote_id] = thread_link['href'].split('/')[3]
  10. thread[:title] = thread_link.text.strip
  11. creator = row.at_css('.memberlink').text.strip
  12. creator = creator[0..-2] if creator.match(/\+$/)
  13. thread[:creator] = creator
  14. thread[:is_sticky] = !!thread[:title].match(/^Sticky:/)
  15. threads << thread
  16. end
  17. threads
  18. end
  19. def self.posts(thread, page)
  20. posts = Array.new
  21. page.css('.post').each do |_post|
  22. post = Hash.new
  23. post[:remote_id] = _post.at_css('ul.view')[:id].split('_')[1].to_i
  24. post[:creator] = _post.at_css('.memberlink').text.strip
  25. date, time =
  26. _post.at_css('.postinfo').text.split('posted this')[1].split('@')
  27. post[:created_at] = Time.parse("#{date} #{time}")
  28. post[:body] = _post.at_css('.postbody').children.map(&:to_html).join.strip
  29. post[:thread_id] = thread[:id]
  30. posts << post
  31. end
  32. posts
  33. end
  34. def self.thread_created_at(first_post)
  35. post_info = first_post.text.split('posted this')
  36. date, time = post_info[1].split('@')
  37. Time.parse("#{date} #{time}")
  38. end
  39. end