123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- require 'dotenv/load'
- require 'sequel'
-
- require_relative '../db/connect'
- require_relative '../lib/auth'
- require_relative '../lib/fetcher'
- require_relative '../lib/parser'
- require_relative '../lib/models/post'
- require_relative '../lib/models/thread'
-
- class Scraper
- def initialize(first: 0, last: 0, log: false)
- @first = first
- @last = last
- @log = log
- @no_new_posts = false
-
- authenticate!
- end
-
- def scrape
- (@first..@last).each_with_index do |page_number, page_index|
- page = fetcher.page(page_number)
- threads = parser.threads(page)
-
- threads.each do |t|
- next if page_index > 0 && t[:is_sticky]
-
- scrape_thread(t)
-
- if no_new_posts
- if t[:is_sticky]
- next
- else
- return
- end
- end
- end
- end
- end
-
- private
-
- attr_reader :cookie, :fetcher, :parser, :no_new_posts
-
- def scrape_thread(t)
- @no_new_posts = false
-
- log t[:title]
-
- page = fetcher.thread(t)
- first_post = page.at_css('.postinfo:first-child')
- return false if first_post.nil?
-
- thread = DB.from(:threads).first(remote_id: t[:remote_id])
- if thread.nil?
- log ' Inserting thread'
- t[:created_at] = parser.thread_created_at(first_post)
- thread = VLV::Thread.create(t.delete_if { |k| k == :is_sticky })
- end
-
- scrape_posts(thread, page)
- end
-
- def scrape_posts(thread, page)
- posts = parser.posts(thread, page)
- last_post = posts.last
-
- if DB.from(:posts).first(remote_id: last_post[:remote_id]).nil?
- db_posts = VLV::Post.where(remote_id: posts.map { |p| p[:remote_id] }).all
-
- posts = posts.each_with_index.map do |post|
- if db_posts.detect { |db_post| db_post.remote_id == post[:remote_id] }.nil?
- post
- end
- end
-
- log " Inserting #{posts.size} posts"
- VLV::Post.multi_insert(posts.compact)
- else
- no_new_posts!
- log ' No new posts'
- end
- end
-
- def no_new_posts!
- @no_new_posts = true
- end
-
- def authenticate!
- @cookie = login(ENV['VLV_USERNAME'], ENV['VLV_PASSWORD'])
- raise "Error logging into VLV. Check your credentials." if @cookie.nil?
-
- @fetcher = Fetcher.new(cookie: cookie)
- @parser = Parser.new
- end
-
- def log(msg)
- puts msg if @log
- end
- end
|