require 'nokogiri' require 'time' class Parser def threads(page) threads = Array.new page.css('.even, .odd').each do |row| thread = Hash.new thread_link = row.at_css('.subject > a') next if thread_link.nil? thread[:remote_id] = thread_link['href'].split('/')[3] thread[:title] = thread_link.text.strip creator = row.at_css('.memberlink').text.strip creator = creator[0..-2] if creator.match(/\+$/) thread[:creator] = creator thread[:is_sticky] = !!thread[:title].match(/^Sticky:/) threads << thread end threads end def posts(thread, page) posts = Array.new page.css('.post').each do |_post| post = Hash.new post[:remote_id] = _post.at_css('ul.view')[:id].split('_')[1].to_i post[:creator] = _post.at_css('.memberlink').text.strip date, time = _post.at_css('.postinfo').text.split('posted this')[1].split('@') post[:created_at] = Time.parse("#{date} #{time}") post[:body] = _post.at_css('.postbody').children.map(&:to_html).join.strip post[:thread_id] = thread[:id] posts << post end posts end def thread_created_at(first_post) post_info = first_post.text.split('posted this') date, time = post_info[1].split('@') Time.parse("#{date} #{time}") end end