You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

scrape.rb 1.9KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. require 'dotenv/load'
  2. require 'sequel'
  3. require_relative '../db/connect'
  4. require_relative '../lib/auth'
  5. require_relative '../lib/fetch'
  6. require_relative '../lib/parse'
  7. require_relative '../lib/models/post'
  8. require_relative '../lib/models/thread'
  9. class Scraper
  10. def initialize(first: 0, last: 0, log: false)
  11. @first = first
  12. @last = last
  13. @log = log
  14. @cookie = login(ENV['VLV_USERNAME'], ENV['VLV_PASSWORD'])
  15. end
  16. def scrape
  17. (@first..@last).each_with_index do |page_number, page_index|
  18. page = Fetch.page(page_number, @cookie)
  19. threads = Parse.threads(page)
  20. threads.each do |t|
  21. next if page_index > 0 && t[:is_sticky]
  22. no_new_posts = scrape_thread(t)
  23. if no_new_posts
  24. next if t[:is_sticky]
  25. return
  26. end
  27. end
  28. end
  29. end
  30. def scrape_thread(t)
  31. log t[:title]
  32. page = Fetch.thread(t, @cookie)
  33. first_post = page.at_css('.postinfo:first-child')
  34. return false if first_post.nil?
  35. t[:created_at] = Parse.thread_created_at(first_post)
  36. thread = DB.from(:threads).first(remote_id: t[:remote_id])
  37. if thread.nil?
  38. thread = VLV::Thread.create(t.delete_if { |k| k == :is_sticky })
  39. log ' Inserting thread'
  40. end
  41. scrape_posts(thread, page)
  42. end
  43. def scrape_posts(thread, page)
  44. posts = Parse.posts(thread, page)
  45. last_post = posts.last
  46. unless DB.from(:posts).first(remote_id: last_post[:remote_id]).nil?
  47. log ' No new posts'
  48. return true
  49. end
  50. posts_count = posts.size
  51. posts.each_with_index do |p, index|
  52. msg = " Inserting post #{index + 1}/#{posts_count}"
  53. print msg if @log
  54. if DB.from(:posts).first(remote_id: p[:remote_id]).nil?
  55. VLV::Post.create(p)
  56. end
  57. print "\b" * msg.size unless index == posts_count - 1 if @log
  58. end
  59. log ''
  60. false
  61. end
  62. def log(msg)
  63. puts msg if @log
  64. end
  65. end