require 'dotenv/load' require 'sequel' require_relative '../db/connect' require_relative '../lib/auth' require_relative '../lib/fetch' require_relative '../lib/parse' require_relative '../lib/models/post' require_relative '../lib/models/thread' class Scraper def initialize(first: 0, last: 0, log: false) @first = first @last = last @log = log authenticate! end def scrape (@first..@last).each_with_index do |page_number, page_index| page = Fetch.page(page_number, @cookie) threads = Parse.threads(page) threads.each do |t| next if page_index > 0 && t[:is_sticky] no_new_posts = scrape_thread(t) if no_new_posts next if t[:is_sticky] return end end end end private def authenticate! @cookie = login(ENV['VLV_USERNAME'], ENV['VLV_PASSWORD']) raise "Error logging into VLV. Check your credentials." if @cookie.nil? end def scrape_thread(t) log t[:title] page = Fetch.thread(t, @cookie) first_post = page.at_css('.postinfo:first-child') return false if first_post.nil? t[:created_at] = Parse.thread_created_at(first_post) thread = DB.from(:threads).first(remote_id: t[:remote_id]) if thread.nil? thread = VLV::Thread.create(t.delete_if { |k| k == :is_sticky }) log ' Inserting thread' end scrape_posts(thread, page) end def scrape_posts(thread, page) posts = Parse.posts(thread, page) last_post = posts.last unless DB.from(:posts).first(remote_id: last_post[:remote_id]).nil? log ' No new posts' return true end posts_count = posts.size posts.each_with_index do |p, index| msg = " Inserting post #{index + 1}/#{posts_count}" print msg if @log if DB.from(:posts).first(remote_id: p[:remote_id]).nil? VLV::Post.create(p) end print "\b" * msg.size unless index == posts_count - 1 if @log end log '' false end def log(msg) puts msg if @log end end