Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

scrape.rb 2.0KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. require 'dotenv/load'
  2. require 'sequel'
  3. require_relative '../db/connect'
  4. require_relative '../lib/auth'
  5. require_relative '../lib/fetch'
  6. require_relative '../lib/parse'
  7. require_relative '../lib/models/post'
  8. require_relative '../lib/models/thread'
  9. class Scraper
  10. def initialize(first: 0, last: 0, log: false)
  11. @first = first
  12. @last = last
  13. @log = log
  14. authenticate!
  15. end
  16. def scrape
  17. (@first..@last).each_with_index do |page_number, page_index|
  18. page = Fetch.page(page_number, @cookie)
  19. threads = Parse.threads(page)
  20. threads.each do |t|
  21. next if page_index > 0 && t[:is_sticky]
  22. no_new_posts = scrape_thread(t)
  23. if no_new_posts
  24. next if t[:is_sticky]
  25. return
  26. end
  27. end
  28. end
  29. end
  30. private
  31. def authenticate!
  32. @cookie = login(ENV['VLV_USERNAME'], ENV['VLV_PASSWORD'])
  33. raise "Error logging into VLV. Check your credentials." if @cookie.nil?
  34. end
  35. def scrape_thread(t)
  36. log t[:title]
  37. page = Fetch.thread(t, @cookie)
  38. first_post = page.at_css('.postinfo:first-child')
  39. return false if first_post.nil?
  40. t[:created_at] = Parse.thread_created_at(first_post)
  41. thread = DB.from(:threads).first(remote_id: t[:remote_id])
  42. if thread.nil?
  43. thread = VLV::Thread.create(t.delete_if { |k| k == :is_sticky })
  44. log ' Inserting thread'
  45. end
  46. scrape_posts(thread, page)
  47. end
  48. def scrape_posts(thread, page)
  49. posts = Parse.posts(thread, page)
  50. last_post = posts.last
  51. unless DB.from(:posts).first(remote_id: last_post[:remote_id]).nil?
  52. log ' No new posts'
  53. return true
  54. end
  55. posts_count = posts.size
  56. posts.each_with_index do |p, index|
  57. msg = " Inserting post #{index + 1}/#{posts_count}"
  58. print msg if @log
  59. if DB.from(:posts).first(remote_id: p[:remote_id]).nil?
  60. VLV::Post.create(p)
  61. end
  62. print "\b" * msg.size unless index == posts_count - 1 if @log
  63. end
  64. log ''
  65. false
  66. end
  67. def log(msg)
  68. puts msg if @log
  69. end
  70. end