Browse Source

Refactor fetcher, add specs

master
Dylan Baker 2 years ago
parent
commit
9198478704
4 changed files with 64 additions and 27 deletions
  1. 3
    3
      db/scrape.rb
  2. 0
    24
      lib/fetch.rb
  3. 29
    0
      lib/fetcher.rb
  4. 32
    0
      spec/fetcher_spec.rb

+ 3
- 3
db/scrape.rb View File

@@ -3,8 +3,8 @@ require 'sequel'
3 3
 
4 4
 require_relative '../db/connect'
5 5
 require_relative '../lib/auth'
6
-require_relative '../lib/fetch'
7 6
 require_relative '../lib/parse'
7
+require_relative '../lib/fetcher'
8 8
 require_relative '../lib/models/post'
9 9
 require_relative '../lib/models/thread'
10 10
 
@@ -19,7 +19,7 @@ class Scraper
19 19
 
20 20
   def scrape
21 21
     (@first..@last).each_with_index do |page_number, page_index|
22
-      page = Fetch.page(page_number, @cookie)
22
+      page = Fetcher.new(cookie: @cookie).page(page_number)
23 23
       threads = Parse.threads(page)
24 24
 
25 25
       threads.each do |t|
@@ -44,7 +44,7 @@ class Scraper
44 44
   def scrape_thread(t)
45 45
     log t[:title]
46 46
 
47
-    page = Fetch.thread(t, @cookie)
47
+    page = Fetcher.new(cookie: @cookie).thread(t)
48 48
     first_post = page.at_css('.postinfo:first-child')
49 49
     return false if first_post.nil?
50 50
 

+ 0
- 24
lib/fetch.rb View File

@@ -1,24 +0,0 @@
1
-require 'net/http'
2
-
3
-module Fetch
4
-  def self.page(page_number, cookie)
5
-    url = URI("http://board.vivalavinyl.com/thread/list/#{page_number}")
6
-    http = Net::HTTP.new(url.host, url.port)
7
-    request = Net::HTTP::Get.new(url)
8
-    request['cookie'] = cookie
9
-    response = http.request(request)
10
-    Nokogiri.HTML(response.body)
11
-  end
12
-
13
-  def self.thread(thread, cookie)
14
-    url =
15
-      URI(
16
-        "http://board.vivalavinyl.com/thread/view/#{thread[:remote_id]}&ajax=true"
17
-      )
18
-    http = Net::HTTP.new(url.host, url.port)
19
-    request = Net::HTTP::Get.new(url)
20
-    request['cookie'] = cookie
21
-    response = http.request(request)
22
-    Nokogiri.HTML(response.body)
23
-  end
24
-end

+ 29
- 0
lib/fetcher.rb View File

@@ -0,0 +1,29 @@
1
+require 'net/http'
2
+require 'nokogiri'
3
+
4
+class Fetcher
5
+  BASE_URL = 'http://board.vivalavinyl.com'
6
+
7
+  def initialize(cookie:)
8
+    @cookie = cookie
9
+  end
10
+
11
+  def page(page_number)
12
+    authenticated_request("/thread/list/#{page_number}")
13
+  end
14
+
15
+  def thread(thread)
16
+    authenticated_request("/thread/view/#{thread[:remote_id]}&ajax=true")
17
+  end
18
+
19
+  private
20
+
21
+  def authenticated_request(path)
22
+    uri = URI("#{BASE_URL}#{path}")
23
+    http = Net::HTTP.new(uri.host, uri.port)
24
+    request = Net::HTTP::Get.new(uri)
25
+    request['cookie'] = @cookie
26
+    response = http.request(request)
27
+    Nokogiri.HTML(response.body)
28
+  end
29
+end

+ 32
- 0
spec/fetcher_spec.rb View File

@@ -0,0 +1,32 @@
1
+require 'dotenv/load'
2
+require_relative '../lib/auth'
3
+require_relative '../lib/fetcher'
4
+
5
+RSpec.describe Fetcher do
6
+  let(:cookie) { login(ENV['VLV_USERNAME'], ENV['VLV_PASSWORD']) }
7
+  let(:thread_id) { 49668 }
8
+
9
+  subject { Fetcher.new(cookie: cookie) }
10
+
11
+  describe '#thread' do
12
+    it "fetches a thread" do
13
+      result = subject.thread({ remote_id: 49668 })
14
+      aggregate_failures do
15
+        expect(result).to be_a(Nokogiri::HTML::Document)
16
+        expect(result.at_css('.post:first-child').text)
17
+          .to include('reddwarf posted this March 30th, 2020 @ 3:29:23 am')
18
+      end
19
+    end
20
+  end
21
+
22
+  describe '#page' do
23
+    it "fetches a page of threads" do
24
+      result = subject.page(0)
25
+      aggregate_failures do
26
+        expect(result).to be_a(Nokogiri::HTML::Document)
27
+        expect(result.at_css('h3').text).to eq('VLV: politics, food, and aging')
28
+        expect(result.css('.even, .odd').length).to eq(110)
29
+      end
30
+    end
31
+  end
32
+end

Loading…
Cancel
Save