12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788 |
- require 'dotenv/load'
- require 'sequel'
-
- require_relative '../db/connect'
- require_relative '../lib/auth'
- require_relative '../lib/fetch'
- require_relative '../lib/parse'
- require_relative '../lib/models/post'
- require_relative '../lib/models/thread'
-
- class Scraper
- def initialize(first: 0, last: 0, log: false)
- @first = first
- @last = last
- @log = log
-
- authenticate!
- end
-
- def scrape
- (@first..@last).each_with_index do |page_number, page_index|
- page = Fetch.page(page_number, @cookie)
- threads = Parse.threads(page)
-
- threads.each do |t|
- next if page_index > 0 && t[:is_sticky]
- no_new_posts = scrape_thread(t)
- if no_new_posts
- next if t[:is_sticky]
- return
- end
- end
- end
- end
-
- private
-
- def authenticate!
- @cookie = login(ENV['VLV_USERNAME'], ENV['VLV_PASSWORD'])
-
- raise "Error logging into VLV. Check your credentials." if @cookie.nil?
- end
-
- def scrape_thread(t)
- log t[:title]
-
- page = Fetch.thread(t, @cookie)
- first_post = page.at_css('.postinfo:first-child')
- return false if first_post.nil?
-
- t[:created_at] = Parse.thread_created_at(first_post)
-
- thread = DB.from(:threads).first(remote_id: t[:remote_id])
- if thread.nil?
- thread = VLV::Thread.create(t.delete_if { |k| k == :is_sticky })
- log ' Inserting thread'
- end
-
- scrape_posts(thread, page)
- end
-
- def scrape_posts(thread, page)
- posts = Parse.posts(thread, page)
- last_post = posts.last
- unless DB.from(:posts).first(remote_id: last_post[:remote_id]).nil?
- log ' No new posts'
- return true
- end
-
- posts_count = posts.size
- posts.each_with_index do |p, index|
- msg = " Inserting post #{index + 1}/#{posts_count}"
- print msg if @log
- if DB.from(:posts).first(remote_id: p[:remote_id]).nil?
- VLV::Post.create(p)
- end
- print "\b" * msg.size unless index == posts_count - 1 if @log
- end
-
- log ''
-
- false
- end
-
- def log(msg)
- puts msg if @log
- end
- end
|