14 コミット

作成者 SHA1 メッセージ 日付
  Dylan Baker 48fe82bdbd Don't use full text search for exact matches 2年前
  Dylan Baker 64c0eaf641 Accidentally removed order clause 2年前
  Dylan Baker 4df2839f8e Add searching specs 2年前
  Dylan Baker 762c22f34f Specify language of search 2年前
  Dylan Baker a642c6bbeb Oops 2年前
  Dylan Baker db3bdae43f Add VCR to fetcher and scraper specs 2年前
  Dylan Baker ec8e45ba59 Merge branch 'specs' 2年前
  Dylan Baker 8bbc70cc5a Add exact match function 2年前
  Dylan Baker ed3e225aba Specs 2年前
  Dylan Baker 2c5c251e7b Rename scraper 2年前
  Dylan Baker db0d10b969 Refactor scraper 2年前
  Dylan Baker ef2fed5fc0 Allow passing DB into migration 2年前
  Dylan Baker c978ff6241 Refactor parser and add specs 2年前
  Dylan Baker 9198478704 Refactor fetcher, add specs 2年前
20個のファイルの変更419行の追加152行の削除
  1. 1
    0
      .gitignore
  2. 2
    1
      Gemfile
  3. 14
    0
      Gemfile.lock
  4. 15
    1
      Rakefile
  5. 1
    0
      db/connect.rb
  6. 1
    2
      db/migrate.rb
  7. 43
    30
      db/scraper.rb
  8. 0
    24
      lib/fetch.rb
  9. 29
    0
      lib/fetcher.rb
  10. 2
    0
      lib/models/post.rb
  11. 2
    0
      lib/models/thread.rb
  12. 6
    6
      lib/parser.rb
  13. 43
    44
      lib/search.rb
  14. 36
    0
      spec/fetcher_spec.rb
  15. 118
    42
      spec/parser_spec.rb
  16. 30
    0
      spec/scraper_spec.rb
  17. 48
    0
      spec/search_spec.rb
  18. 12
    0
      spec/spec_helper.rb
  19. 1
    0
      web/server.rb
  20. 15
    2
      web/views/partials/search.erb

+ 1
- 0
.gitignore ファイルの表示

@@ -3,3 +3,4 @@ vendor/
3 3
 web/public/*.css
4 4
 web/public/*.js
5 5
 db/log
6
+spec/vcr

+ 2
- 1
Gemfile ファイルの表示

@@ -13,5 +13,6 @@ gem 'rspec', '~> 3.9'
13 13
 gem 'sassc', '~> 2.2'
14 14
 gem 'sequel', '~> 5.30'
15 15
 gem 'sinatra', '~> 2.0'
16
-
17 16
 gem "truncato", "~> 0.7.11"
17
+gem "vcr", "~> 6.0"
18
+gem "webmock", "~> 3.14"

+ 14
- 0
Gemfile.lock ファイルの表示

@@ -1,9 +1,14 @@
1 1
 GEM
2 2
   remote: https://rubygems.org/
3 3
   specs:
4
+    addressable (2.8.0)
5
+      public_suffix (>= 2.0.2, < 5.0)
6
+    crack (0.4.5)
7
+      rexml
4 8
     diff-lcs (1.3)
5 9
     dotenv (2.7.5)
6 10
     ffi (1.12.2)
11
+    hashdiff (1.0.1)
7 12
     htmlentities (4.3.4)
8 13
     httparty (0.18.0)
9 14
       mime-types (~> 3.0)
@@ -18,10 +23,12 @@ GEM
18 23
     nokogiri (1.10.9)
19 24
       mini_portile2 (~> 2.4.0)
20 25
     pg (1.2.3)
26
+    public_suffix (4.0.6)
21 27
     rack (2.2.2)
22 28
     rack-protection (2.0.8.1)
23 29
       rack
24 30
     rake (13.0.1)
31
+    rexml (3.2.5)
25 32
     rspec (3.9.0)
26 33
       rspec-core (~> 3.9.0)
27 34
       rspec-expectations (~> 3.9.0)
@@ -48,6 +55,11 @@ GEM
48 55
     truncato (0.7.11)
49 56
       htmlentities (~> 4.3.1)
50 57
       nokogiri (>= 1.7.0, <= 2.0)
58
+    vcr (6.0.0)
59
+    webmock (3.14.0)
60
+      addressable (>= 2.8.0)
61
+      crack (>= 0.3.2)
62
+      hashdiff (>= 0.4.0, < 2.0.0)
51 63
 
52 64
 PLATFORMS
53 65
   ruby
@@ -64,6 +76,8 @@ DEPENDENCIES
64 76
   sequel (~> 5.30)
65 77
   sinatra (~> 2.0)
66 78
   truncato (~> 0.7.11)
79
+  vcr (~> 6.0)
80
+  webmock (~> 3.14)
67 81
 
68 82
 BUNDLED WITH
69 83
    2.1.4

+ 15
- 1
Rakefile ファイルの表示

@@ -12,8 +12,22 @@ task 'migrate' do
12 12
   migrate
13 13
 end
14 14
 
15
+task 'migrate_test' do
16
+  require_relative './db/migrate'
17
+
18
+  TEST_DB = Sequel.connect(
19
+    adapter: :postgres,
20
+    database: ENV['DB_DATABASE'] + '_test',
21
+    user: ENV['DB_USERNAME'],
22
+    password: ENV['DB_PASSWORD'],
23
+    logger: ENV['APP_ENV'] == 'development' ? Logger.new('db/log') : nil
24
+  )
25
+
26
+  migrate(db: TEST_DB)
27
+end
28
+
15 29
 task 'scrape' do
16
-  require_relative './db/scrape'
30
+  require_relative './db/scraper'
17 31
 
18 32
   should_log = ENV['APP_ENV'] == 'development' || ARGV.include?('--log')
19 33
   scraper = Scraper.new(log: should_log)

+ 1
- 0
db/connect.rb ファイルの表示

@@ -1,5 +1,6 @@
1 1
 require 'dotenv'
2 2
 require 'logger'
3
+require 'sequel'
3 4
 
4 5
 Dotenv.load(File.expand_path('../.env'))
5 6
 

+ 1
- 2
db/migrate.rb ファイルの表示

@@ -3,8 +3,7 @@ require 'sequel'
3 3
 
4 4
 require_relative 'connect'
5 5
 
6
-def migrate
7
-  db = DB
6
+def migrate(db: DB)
8 7
   db.create_table? :threads do
9 8
     primary_key :id
10 9
     String :title

db/scrape.rb → db/scraper.rb ファイルの表示

@@ -3,8 +3,8 @@ require 'sequel'
3 3
 
4 4
 require_relative '../db/connect'
5 5
 require_relative '../lib/auth'
6
-require_relative '../lib/fetch'
7
-require_relative '../lib/parse'
6
+require_relative '../lib/fetcher'
7
+require_relative '../lib/parser'
8 8
 require_relative '../lib/models/post'
9 9
 require_relative '../lib/models/thread'
10 10
 
@@ -13,21 +13,27 @@ class Scraper
13 13
     @first = first
14 14
     @last = last
15 15
     @log = log
16
+    @no_new_posts = false
16 17
 
17 18
     authenticate!
18 19
   end
19 20
 
20 21
   def scrape
21 22
     (@first..@last).each_with_index do |page_number, page_index|
22
-      page = Fetch.page(page_number, @cookie)
23
-      threads = Parse.threads(page)
23
+      page = fetcher.page(page_number)
24
+      threads = parser.threads(page)
24 25
 
25 26
       threads.each do |t|
26 27
         next if page_index > 0 && t[:is_sticky]
27
-        no_new_posts = scrape_thread(t)
28
+
29
+        scrape_thread(t)
30
+
28 31
         if no_new_posts
29
-          next if t[:is_sticky]
30
-          return
32
+          if t[:is_sticky]
33
+            next
34
+          else
35
+            return
36
+          end
31 37
         end
32 38
       end
33 39
     end
@@ -35,51 +41,58 @@ class Scraper
35 41
 
36 42
   private
37 43
 
38
-  def authenticate!
39
-    @cookie = login(ENV['VLV_USERNAME'], ENV['VLV_PASSWORD'])
40
-
41
-    raise "Error logging into VLV. Check your credentials." if @cookie.nil?
42
-  end
44
+  attr_reader :cookie, :fetcher, :parser, :no_new_posts
43 45
 
44 46
   def scrape_thread(t)
47
+    @no_new_posts = false
48
+
45 49
     log t[:title]
46 50
 
47
-    page = Fetch.thread(t, @cookie)
51
+    page = fetcher.thread(t)
48 52
     first_post = page.at_css('.postinfo:first-child')
49 53
     return false if first_post.nil?
50 54
 
51
-    t[:created_at] = Parse.thread_created_at(first_post)
52
-
53 55
     thread = DB.from(:threads).first(remote_id: t[:remote_id])
54 56
     if thread.nil?
55
-      thread = VLV::Thread.create(t.delete_if { |k| k == :is_sticky })
56 57
       log '  Inserting thread'
58
+      t[:created_at] = parser.thread_created_at(first_post)
59
+      thread = VLV::Thread.create(t.delete_if { |k| k == :is_sticky })
57 60
     end
58 61
 
59 62
     scrape_posts(thread, page)
60 63
   end
61 64
 
62 65
   def scrape_posts(thread, page)
63
-    posts = Parse.posts(thread, page)
66
+    posts = parser.posts(thread, page)
64 67
     last_post = posts.last
65
-    unless DB.from(:posts).first(remote_id: last_post[:remote_id]).nil?
66
-      log '  No new posts'
67
-      return true
68
-    end
69 68
 
70
-    posts_count = posts.size
71
-    posts.each_with_index do |p, index|
72
-      msg = "  Inserting post #{index + 1}/#{posts_count}"
73
-      print msg if @log
74
-      if DB.from(:posts).first(remote_id: p[:remote_id]).nil?
75
-        VLV::Post.create(p)
69
+    if DB.from(:posts).first(remote_id: last_post[:remote_id]).nil?
70
+      db_posts = VLV::Post.where(remote_id: posts.map { |p| p[:remote_id] }).all
71
+
72
+      posts = posts.each_with_index.map do |post|
73
+        if db_posts.detect { |db_post| db_post.remote_id == post[:remote_id] }.nil?
74
+          post
75
+        end
76 76
       end
77
-      print "\b" * msg.size unless index == posts_count - 1 if @log
77
+
78
+      log "  Inserting #{posts.size} posts"
79
+      VLV::Post.multi_insert(posts.compact)
80
+    else
81
+      no_new_posts!
82
+      log '  No new posts'
78 83
     end
84
+  end
79 85
 
80
-    log ''
86
+  def no_new_posts!
87
+    @no_new_posts = true
88
+  end
89
+
90
+  def authenticate!
91
+    @cookie = login(ENV['VLV_USERNAME'], ENV['VLV_PASSWORD'])
92
+    raise "Error logging into VLV. Check your credentials." if @cookie.nil?
81 93
 
82
-    false
94
+    @fetcher = Fetcher.new(cookie: cookie)
95
+    @parser = Parser.new
83 96
   end
84 97
 
85 98
   def log(msg)

+ 0
- 24
lib/fetch.rb ファイルの表示

@@ -1,24 +0,0 @@
1
-require 'net/http'
2
-
3
-module Fetch
4
-  def self.page(page_number, cookie)
5
-    url = URI("http://board.vivalavinyl.com/thread/list/#{page_number}")
6
-    http = Net::HTTP.new(url.host, url.port)
7
-    request = Net::HTTP::Get.new(url)
8
-    request['cookie'] = cookie
9
-    response = http.request(request)
10
-    Nokogiri.HTML(response.body)
11
-  end
12
-
13
-  def self.thread(thread, cookie)
14
-    url =
15
-      URI(
16
-        "http://board.vivalavinyl.com/thread/view/#{thread[:remote_id]}&ajax=true"
17
-      )
18
-    http = Net::HTTP.new(url.host, url.port)
19
-    request = Net::HTTP::Get.new(url)
20
-    request['cookie'] = cookie
21
-    response = http.request(request)
22
-    Nokogiri.HTML(response.body)
23
-  end
24
-end

+ 29
- 0
lib/fetcher.rb ファイルの表示

@@ -0,0 +1,29 @@
1
+require 'net/http'
2
+require 'nokogiri'
3
+
4
+class Fetcher
5
+  BASE_URL = 'http://board.vivalavinyl.com'
6
+
7
+  def initialize(cookie:)
8
+    @cookie = cookie
9
+  end
10
+
11
+  def page(page_number)
12
+    authenticated_request("/thread/list/#{page_number}")
13
+  end
14
+
15
+  def thread(thread)
16
+    authenticated_request("/thread/view/#{thread[:remote_id]}&ajax=true")
17
+  end
18
+
19
+  private
20
+
21
+  def authenticated_request(path)
22
+    uri = URI("#{BASE_URL}#{path}")
23
+    http = Net::HTTP.new(uri.host, uri.port)
24
+    request = Net::HTTP::Get.new(uri)
25
+    request['cookie'] = @cookie
26
+    response = http.request(request)
27
+    Nokogiri.HTML(response.body)
28
+  end
29
+end

+ 2
- 0
lib/models/post.rb ファイルの表示

@@ -1,3 +1,5 @@
1
+require 'sequel'
2
+
1 3
 module VLV
2 4
   class Post < Sequel::Model
3 5
     many_to_one :thread

+ 2
- 0
lib/models/thread.rb ファイルの表示

@@ -1,3 +1,5 @@
1
+require 'sequel'
2
+
1 3
 module VLV
2 4
   class Thread < Sequel::Model
3 5
     one_to_many :posts

lib/parse.rb → lib/parser.rb ファイルの表示

@@ -1,7 +1,8 @@
1 1
 require 'nokogiri'
2
+require 'time'
2 3
 
3
-module Parse
4
-  def self.threads(page)
4
+class Parser
5
+  def threads(page)
5 6
     threads = Array.new
6 7
 
7 8
     page.css('.even, .odd').each do |row|
@@ -20,15 +21,14 @@ module Parse
20 21
     threads
21 22
   end
22 23
 
23
-  def self.posts(thread, page)
24
+  def posts(thread, page)
24 25
     posts = Array.new
25 26
 
26 27
     page.css('.post').each do |_post|
27 28
       post = Hash.new
28 29
       post[:remote_id] = _post.at_css('ul.view')[:id].split('_')[1].to_i
29 30
       post[:creator] = _post.at_css('.memberlink').text.strip
30
-      date, time =
31
-        _post.at_css('.postinfo').text.split('posted this')[1].split('@')
31
+      date, time = _post.at_css('.postinfo').text.split('posted this')[1].split('@')
32 32
       post[:created_at] = Time.parse("#{date} #{time}")
33 33
       post[:body] = _post.at_css('.postbody').children.map(&:to_html).join.strip
34 34
       post[:thread_id] = thread[:id]
@@ -38,7 +38,7 @@ module Parse
38 38
     posts
39 39
   end
40 40
 
41
-  def self.thread_created_at(first_post)
41
+  def thread_created_at(first_post)
42 42
     post_info = first_post.text.split('posted this')
43 43
     date, time = post_info[1].split('@')
44 44
     Time.parse("#{date} #{time}")

+ 43
- 44
lib/search.rb ファイルの表示

@@ -1,5 +1,8 @@
1 1
 require 'sequel'
2 2
 
3
+require_relative './models/post'
4
+require_relative './models/thread'
5
+
3 6
 RESULTS_PER_PAGE = 50
4 7
 
5 8
 def search(params)
@@ -8,6 +11,8 @@ def search(params)
8 11
   username = params[:username].strip
9 12
   from_date = params[:from_date].strip
10 13
   to_date = params[:to_date].strip
14
+  sort = params[:sort].strip
15
+  exact_match = params[:exact_match].strip == "yes"
11 16
 
12 17
   errors = Array.new
13 18
 
@@ -39,18 +44,9 @@ def search(params)
39 44
 
40 45
   results = case params[:type]
41 46
   when 'threads'
42
-    sort =
43
-      case params[:sort]
44
-      when 'thread'
45
-        'created_at DESC'
46
-      when 'post'
47
-        'last_post_created_at DESC'
48
-      else
49
-        'created_at DESC'
50
-      end
51
-    search_threads(query, username, from_date, to_date, sort, offset)
47
+    search_threads(query, username, from_date, to_date, sort, offset, exact_match)
52 48
   when 'posts'
53
-    search_posts(query, username, from_date, to_date, offset)
49
+    search_posts(query, username, from_date, to_date, offset, exact_match)
54 50
   else
55 51
     Array.new
56 52
   end
@@ -58,39 +54,42 @@ def search(params)
58 54
   {results: results, errors: errors}
59 55
 end
60 56
 
61
-def search_threads(query, username, from_date, to_date, sort, offset)
62
-  DB[<<-SQL, query, username, username, from_date, from_date, to_date, to_date, offset]
63
-    SELECT
64
-      threads.*,
65
-      count(*) OVER() AS full_count
66
-    FROM threads
67
-    WHERE
68
-      to_tsvector(title) @@ plainto_tsquery(?)
69
-      AND (LOWER(threads.creator) = LOWER(?) OR ? = '')
70
-      AND (created_at >= ? OR ? IS NULL)
71
-      AND (created_at <= ? OR ? IS NULL)
72
-    ORDER BY #{sort}
73
-    LIMIT #{RESULTS_PER_PAGE}
74
-    OFFSET ?;
75
-  SQL
57
+def search_threads(q, username, from_date, to_date, sort, offset, exact_match)
58
+  sort = Sequel.desc(sort == 'post' ? :last_post_created_at : :created_at)
59
+  query = VLV::Thread
60
+    .select(Sequel.lit('threads.*, count(*) OVER() AS full_count'))
61
+    .where(Sequel.lit("(LOWER(threads.creator) = LOWER(?) OR ? = '')", username, username))
62
+    .where(Sequel.lit("created_at >= ? OR ? IS NULL", from_date, from_date))
63
+    .where(Sequel.lit("created_at >= ? OR ? IS NULL", to_date, to_date))
64
+    .order(sort)
65
+    .limit(RESULTS_PER_PAGE)
66
+    .offset(Sequel.lit('?', offset))
67
+
68
+  if exact_match
69
+    query.where(Sequel.ilike(:title, "%#{q}%"))
70
+  else
71
+    query.full_text_search(:title, Sequel.lit("websearch_to_tsquery(?)", q), tsquery: true, language: 'english')
72
+  end
76 73
 end
77 74
 
78
-def search_posts(query, username, from_date, to_date, offset)
79
-  DB[<<-SQL, query, username, username, from_date, from_date, to_date, to_date, offset]
80
-    SELECT
81
-      posts.*,
82
-      threads.title as thread_title,
83
-      threads.remote_id as remote_thread_id,
84
-      count(*) OVER() AS full_count
85
-    FROM posts
86
-    INNER JOIN threads on posts.thread_id = threads.id
87
-    WHERE
88
-      tsv @@ plainto_tsquery(?)
89
-      AND ((LOWER(posts.creator) = LOWER(?)) OR (? = ''))
90
-      AND (posts.created_at >= ? OR ? IS NULL)
91
-      AND (posts.created_at <= ? OR ? IS NULL)
92
-    ORDER BY posts.created_at DESC
93
-    LIMIT #{RESULTS_PER_PAGE}
94
-    OFFSET ?;
95
-  SQL
75
+def search_posts(q, username, from_date, to_date, offset, exact_match)
76
+  query = VLV::Post
77
+    .select(Sequel.lit('posts.*, threads.title as thread_title, threads.remote_id as remote_thread_id, count(*) OVER() AS full_count'))
78
+    .join(Sequel.lit('threads on posts.thread_id = threads.id'))
79
+    .where(Sequel.lit("(LOWER(posts.creator) = LOWER(?) OR (? = ''))", username, username))
80
+    .where(Sequel.lit("posts.created_at >= ? OR ? IS NULL", from_date, from_date))
81
+    .where(Sequel.lit("posts.created_at >= ? OR ? IS NULL", to_date, to_date))
82
+    .limit(RESULTS_PER_PAGE)
83
+    .offset(Sequel.lit('?', offset))
84
+    .order(Sequel.desc(Sequel.lit('posts.created_at')))
85
+
86
+  if exact_match
87
+    query.where(Sequel.ilike(:body, "%#{q}%"))
88
+  else
89
+    query.full_text_search(:tsv, Sequel.lit("websearch_to_tsquery(?)", q), {
90
+      tsquery: true,
91
+      tsvector: true,
92
+      language: 'english'
93
+    })
94
+  end
96 95
 end

+ 36
- 0
spec/fetcher_spec.rb ファイルの表示

@@ -0,0 +1,36 @@
1
+require 'dotenv/load'
2
+require_relative '../lib/auth'
3
+require_relative '../lib/fetcher'
4
+
5
+RSpec.describe Fetcher do
6
+  let(:cookie) { login(ENV['VLV_USERNAME'], ENV['VLV_PASSWORD']) }
7
+  let(:thread_id) { 49668 }
8
+
9
+  subject { Fetcher.new(cookie: cookie) }
10
+
11
+  describe '#thread' do
12
+    it "fetches a thread" do
13
+      VCR.use_cassette "fetch_a_thread" do
14
+        result = subject.thread({ remote_id: 49668 })
15
+        aggregate_failures do
16
+          expect(result).to be_a(Nokogiri::HTML::Document)
17
+          expect(result.at_css('.post:first-child').text)
18
+            .to include('reddwarf posted this March 30th, 2020 @ 3:29:23 am')
19
+        end
20
+      end
21
+    end
22
+  end
23
+
24
+  describe '#page' do
25
+    it "fetches a page of threads" do
26
+      VCR.use_cassette "fetch_a_page_of_threads" do
27
+        result = subject.page(0)
28
+        aggregate_failures do
29
+          expect(result).to be_a(Nokogiri::HTML::Document)
30
+          expect(result.at_css('h3').text).to eq('VLV: politics, food, and aging')
31
+          expect(result.css('.even, .odd').length).to eq(110)
32
+        end
33
+      end
34
+    end
35
+  end
36
+end

+ 118
- 42
spec/parser_spec.rb ファイルの表示

@@ -1,47 +1,123 @@
1
-require_relative '../lib/parse'
1
+require_relative '../lib/parser'
2 2
 
3
-RSpec.describe 'Parser' do
4
-  it 'should parse threads' do
5
-    html = Nokogiri::HTML(<<~HTML)
6
-      <div class="even" id="thread_12345">
7
-        <ul class="list read">
8
-          <li class="member">
9
-            <span>Thread By: </span>
10
-            <a href="/member/view/creator1/" class="memberlink">creator1</a>
11
-          </li>
12
-          <li class="subject">
13
-            <span>Subject: </span>
14
-            <a href="/thread/view/12345/&p=999">
15
-              <strong>Sticky:</sticky> Thread title 1
16
-            </a>
17
-          </li>
18
-          <li class="posts"><span>Posts: </span>999</li>
19
-          <li class="lastpost">
20
-            <span>Last Post By:</span>
21
-            <a href="/member/view/lastposter1/" class="memberlink">lastposter1</a> on Fri&nbsp;Apr&nbsp;10&nbsp;2020&nbsp;01:23&nbsp;am</li>
22
-        </ul>
23
-      </div>
24
-      <div class="even" id="thread_123456">
25
-        <ul class="list read">
26
-          <li class="member">
27
-            <span>Thread By: </span>
28
-            <a href="/member/view/creator2/" class="memberlink">creator2</a>
29
-          </li>
30
-          <li class="subject">
31
-            <span>Subject: </span>
32
-            <a href="/thread/view/123456/&p=999">Thread title 2</a>
33
-          </li>
34
-          <li class="posts"><span>Posts: </span>999</li>
35
-          <li class="lastpost">
36
-            <span>Last Post By:</span>
37
-            <a href="/member/view/lastposter2/" class="memberlink">lastposter2</a> on Fri&nbsp;Apr&nbsp;10&nbsp;2020&nbsp;01:23&nbsp;am</li>
38
-        </ul>
3
+RSpec.describe Parser do
4
+  subject { Parser.new }
5
+
6
+  describe '#threads' do
7
+    let(:html) do
8
+      Nokogiri::HTML(<<~HTML)
9
+        <div class="even" id="thread_12345">
10
+          <ul class="list read">
11
+            <li class="member">
12
+              <span>Thread By: </span>
13
+              <a href="/member/view/creator1/" class="memberlink">creator1</a>
14
+            </li>
15
+            <li class="subject">
16
+              <span>Subject: </span>
17
+              <a href="/thread/view/12345/&p=999">
18
+                <strong>Sticky:</sticky> Thread title 1
19
+              </a>
20
+            </li>
21
+            <li class="posts"><span>Posts: </span>999</li>
22
+            <li class="lastpost">
23
+              <span>Last Post By:</span>
24
+              <a href="/member/view/lastposter1/" class="memberlink">lastposter1</a> on Fri&nbsp;Apr&nbsp;10&nbsp;2020&nbsp;01:23&nbsp;am</li>
25
+          </ul>
26
+        </div>
27
+        <div class="even" id="thread_123456">
28
+          <ul class="list read">
29
+            <li class="member">
30
+              <span>Thread By: </span>
31
+              <a href="/member/view/creator2/" class="memberlink">creator2</a>
32
+            </li>
33
+            <li class="subject">
34
+              <span>Subject: </span>
35
+              <a href="/thread/view/123456/&p=999">Thread title 2</a>
36
+            </li>
37
+            <li class="posts"><span>Posts: </span>999</li>
38
+            <li class="lastpost">
39
+              <span>Last Post By:</span>
40
+              <a href="/member/view/lastposter2/" class="memberlink">lastposter2</a> on Fri&nbsp;Apr&nbsp;10&nbsp;2020&nbsp;01:23&nbsp;am</li>
41
+          </ul>
42
+        </div>
43
+      HTML
44
+    end
45
+
46
+    it 'parses threads' do
47
+      expect(subject.threads(html)).to eq([
48
+        {remote_id: '12345', title: 'Sticky: Thread title 1', creator: 'creator1', is_sticky: true},
49
+        {remote_id: '123456', title: 'Thread title 2', creator: 'creator2', is_sticky: false},
50
+      ])
51
+    end
52
+  end
53
+
54
+  describe '#posts' do
55
+    let(:html) do
56
+      Nokogiri::HTML(<<~HTML)
57
+        <div>
58
+          <div class="post">
59
+            <ul class="view" id="post_69">
60
+              <li class="info">
61
+                <div class="postinfo">
62
+                  <a class="memberlink" href="/member/view/User1">User1</a>
63
+                  posted this October 26th, 2021 @ 12:34:56 am
64
+                </div>
65
+              </li>
66
+              <li class="postbody">
67
+                This is the body of the first post
68
+              </li>
69
+            </ul>
70
+          </div>
71
+          <div class="post">
72
+            <ul class="view" id="post_420">
73
+              <li class="info">
74
+                <div class="postinfo">
75
+                  <a class="memberlink" href="/member/view/User2">User2</a>
76
+                  posted this October 27th, 2021 @ 12:34:56 am
77
+                </div>
78
+              </li>
79
+              <li class="postbody">
80
+                This is the body of the second post
81
+              </li>
82
+            </ul>
83
+          </div>
84
+        </div>
85
+      HTML
86
+    end
87
+
88
+    it 'parses posts' do
89
+      expect(subject.posts({ id: 666 }, html)).to match_array([
90
+        {
91
+          remote_id: 69,
92
+          creator: 'User1',
93
+          thread_id: 666,
94
+          created_at: Time.new(2021, 10, 26, 0, 34, 56),
95
+          body: 'This is the body of the first post'
96
+        },
97
+        {
98
+          remote_id: 420,
99
+          creator: 'User2',
100
+          thread_id: 666,
101
+          created_at: Time.new(2021, 10, 27, 0, 34, 56),
102
+          body: 'This is the body of the second post'
103
+        }
104
+      ])
105
+    end
106
+  end
107
+
108
+  describe '#thread_created_at' do
109
+    let(:html) do
110
+      Nokogiri::HTML(<<~HTML)
111
+      <div class="postinfo">
112
+        <a class="memberlink" href="/member/view/User1">User1</a>
113
+        posted this October 27th, 2021 @ 12:34:56 am
39 114
       </div>
40
-    HTML
115
+      HTML
116
+    end
41 117
 
42
-    expect(Parse.threads(html)).to eq([
43
-      {remote_id: '12345', title: 'Sticky: Thread title 1', creator: 'creator1', is_sticky: true},
44
-      {remote_id: '123456', title: 'Thread title 2', creator: 'creator2', is_sticky: false},
45
-    ])
118
+    it 'parses the timestamp of the first post' do
119
+      expect(subject.thread_created_at(html))
120
+        .to eq(Time.new(2021, 10, 27, 00, 34, 56))
121
+    end
46 122
   end
47 123
 end

+ 30
- 0
spec/scraper_spec.rb ファイルの表示

@@ -0,0 +1,30 @@
1
+require 'dotenv/load'
2
+require 'spec_helper'
3
+
4
+require_relative '../db/scraper'
5
+
6
+RSpec.describe Scraper do
7
+  around(:each) do |example|
8
+    DB.transaction(rollback: :always, auto_savepoint: true) { example.run }
9
+  end
10
+
11
+  subject { described_class.new(log: true) }
12
+
13
+  describe '#initialize' do
14
+    it 'authenticates' do
15
+      VCR.use_cassette "authentication" do
16
+        expect(subject.send(:cookie)).to_not be_nil
17
+      end
18
+    end
19
+  end
20
+
21
+  describe '#scrape' do
22
+    it 'creates new threads and posts' do
23
+      VCR.use_cassette "create_new_threads_and_posts" do
24
+        expect { subject.scrape }
25
+          .to  change { VLV::Thread.count }.by(109)
26
+          .and change { VLV::Post.count   }.by_at_least(5000)
27
+      end
28
+    end
29
+  end
30
+end

+ 48
- 0
spec/search_spec.rb ファイルの表示

@@ -0,0 +1,48 @@
1
+require 'dotenv/load'
2
+require 'spec_helper'
3
+
4
+require_relative '../db/connect'
5
+require_relative '../lib/search'
6
+require_relative '../lib/models/post'
7
+require_relative '../lib/models/thread'
8
+
9
+RSpec.describe 'Search' do
10
+  around(:each) do |example|
11
+    DB.transaction(rollback: :always, auto_savepoint: true) { example.run }
12
+  end
13
+
14
+  before do
15
+    @thread1 = VLV::Thread.create(title: "This is a thread with many words in the title")
16
+    @thread2 = VLV::Thread.create(title: "Thread words")
17
+    @post1   = VLV::Post.create(body: "This is a post with many words in the body", thread_id: @thread1.id)
18
+    @post2   = VLV::Post.create(body: "Post words", thread_id: @thread2.id)
19
+  end
20
+
21
+  describe '#search_threads' do
22
+    it "finds threads with matching words" do
23
+      threads = search_threads('thread words', '', nil, nil, nil, nil, false)
24
+      expect(threads.map(&:title)).to match_array([@thread1.title, @thread2.title])
25
+    end
26
+
27
+    context "with exact_match" do
28
+      it "only finds threads with exactly matching phrases" do
29
+        threads = search_threads('thread words', '', nil, nil, nil, nil, true)
30
+        expect(threads.map(&:title)).to match_array([@thread2.title])
31
+      end
32
+    end
33
+  end
34
+
35
+  describe '#search_posts' do
36
+    it "finds posts with matching words" do
37
+      posts = search_posts('post words', '', nil, nil, nil, false)
38
+      expect(posts.map(&:body)).to match_array([@post1.body, @post2.body])
39
+    end
40
+
41
+    context "with exact_match" do
42
+      it "only finds posts with exactly matching phrases" do
43
+        posts = search_posts('post words', '', nil, nil, nil, true)
44
+        expect(posts.map(&:body)).to match_array([@post2.body])
45
+      end
46
+    end
47
+  end
48
+end

+ 12
- 0
spec/spec_helper.rb ファイルの表示

@@ -0,0 +1,12 @@
1
+require 'vcr'
2
+require "webmock/rspec"
3
+
4
+ENV["DB_DATABASE"] = ENV["DB_DATABASE"] + '_test'
5
+
6
+WebMock.disable_net_connect!(allow_localhost: true)
7
+
8
+VCR.configure do |c|
9
+  c.cassette_library_dir = "spec/vcr"
10
+  c.hook_into :webmock
11
+  c.ignore_localhost = true
12
+end

+ 1
- 0
web/server.rb ファイルの表示

@@ -37,6 +37,7 @@ class VLVSearch < Sinatra::Base
37 37
     params[:username] = String.new unless params[:username]
38 38
     params[:from_date] = String.new unless params[:from_date]
39 39
     params[:to_date] = String.new unless params[:to_date]
40
+    params[:exact_match] = String.new unless params[:exact_match]
40 41
 
41 42
     results = search(params)
42 43
 

+ 15
- 2
web/views/partials/search.erb ファイルの表示

@@ -2,10 +2,21 @@
2 2
   <input name="q" type="search" value="<%= params[:q] %>" placeholder="Search for..." class="form__search" required>
3 3
   <div class="filters">
4 4
     <div class="filters__section">
5
+      <div class="filters__subsection">
6
+        <p>Exact match?</p>
7
+        <label class="form__label">
8
+          <input type="radio" name="exact_match" value="yes" <% if [nil,true, "yes"].include?(params[:exact_match]) %>checked<% end %>>
9
+          Yes
10
+        </label>
11
+        <label class="form__label">
12
+          <input type="radio" name="exact_match" value="no" <% if [false, "no"].include?(params[:exact_match]) %>checked<% end %>>
13
+          No
14
+        </label>
15
+      </div>
5 16
       <div class="filters__subsection">
6 17
         <p>Search in:</p>
7 18
         <label class="form__label">
8
-          <input type="radio" name="type" value="threads" <% if [nil, "threads"].include?(params[:type]) %>checked<% end%>>
19
+          <input type="radio" name="type" value="threads" <% if [nil, "threads"].include?(params[:type]) %>checked<% end %>>
9 20
           Threads
10 21
         </label>
11 22
         <label class="form__label">
@@ -13,6 +24,8 @@
13 24
           Posts
14 25
         </label>
15 26
       </div>
27
+    </div>
28
+    <div class="filters__section">
16 29
       <div class="filters__subsection">
17 30
         <p>From Date <em>(YYYY-MM-DD)</em>:</p>
18 31
         <label class="form__label">
@@ -48,7 +61,7 @@
48 61
       <div class="filters__subsection filters__subsection--sort <%= params[:type] == 'threads' ? 'open' : '' %>">
49 62
         <p>Sort by:</p>
50 63
         <label class="form__label">
51
-          <input type="radio" name="sort" value="thread" <% if [nil, "thread"].include? params[:sort] %>checked<% end%>>
64
+          <input type="radio" name="sort" value="thread" <% if [nil, "thread"].include? params[:sort] %>checked<% end %>>
52 65
           Thread creation
53 66
         </label>
54 67
         <label class="form__label">

読み込み中…
キャンセル
保存