4 Commits

Author SHA1 Message Date
  Dylan Baker e46a9fa449 Parse stickiness in parse_threads 4 years ago
  Dylan Baker 2c524af608 Log in development 4 years ago
  Dylan Baker e1bbe85032 Add option to log or not 4 years ago
  Dylan Baker 210c1c5667 Fix breaking/sticky logic 4 years ago
2 changed files with 15 additions and 10 deletions
  1. 1
    1
      Rakefile
  2. 14
    9
      db/scrape.rb

+ 1
- 1
Rakefile View File

9
 end
9
 end
10
 
10
 
11
 task 'scrape' do
11
 task 'scrape' do
12
-  scrape
12
+  scrape(log: ENV['APP_ENV'] == 'development')
13
 end
13
 end
14
 
14
 
15
 task 'build' do
15
 task 'build' do

+ 14
- 9
db/scrape.rb View File

16
 end
16
 end
17
 
17
 
18
 def fetch_thread(thread, cookie)
18
 def fetch_thread(thread, cookie)
19
+  sleep(0.5)
19
   url =
20
   url =
20
     URI(
21
     URI(
21
       "http://board.vivalavinyl.com/thread/view/#{thread[:remote_id]}&ajax=true"
22
       "http://board.vivalavinyl.com/thread/view/#{thread[:remote_id]}&ajax=true"
65
     thread_link = row.at_css('.subject > a')
66
     thread_link = row.at_css('.subject > a')
66
     next if thread_link.nil?
67
     next if thread_link.nil?
67
     thread[:remote_id] = thread_link['href'].split('/')[3]
68
     thread[:remote_id] = thread_link['href'].split('/')[3]
68
-    thread[:title] = thread_link.text
69
+    thread[:title] = thread_link.text.strip
69
     creator = row.at_css('.memberlink').text.strip
70
     creator = row.at_css('.memberlink').text.strip
70
     creator = creator[0..-2] if creator.match(/\+$/)
71
     creator = creator[0..-2] if creator.match(/\+$/)
71
     thread[:creator] = creator
72
     thread[:creator] = creator
73
+    thread[:is_sticky] = !!thread[:title].match(/^Sticky:/)
72
     threads << thread
74
     threads << thread
73
   end
75
   end
74
   threads
76
   threads
75
 end
77
 end
76
 
78
 
77
-def scrape(first: 0, last: 0)
79
+def scrape(first: 0, last: 0, log: false)
78
   cookie = login(ENV['VLV_USERNAME'], ENV['VLV_PASSWORD'])
80
   cookie = login(ENV['VLV_USERNAME'], ENV['VLV_PASSWORD'])
79
   db = connect
81
   db = connect
80
 
82
 
81
-  (first..last).each do |page_number|
83
+  (first..last).each_with_index do |page_number, index|
82
     page = fetch_page(page_number, cookie)
84
     page = fetch_page(page_number, cookie)
83
     threads = parse_threads(page)
85
     threads = parse_threads(page)
84
     threads.each do |t|
86
     threads.each do |t|
85
-      puts t[:title]
86
-      sleep(1)
87
+      next if index > 0 && t[:is_sticky]
88
+
89
+      puts t[:title] if log
90
+
87
       page = fetch_thread(t, cookie)
91
       page = fetch_thread(t, cookie)
88
       first_post = page.at_css('.postinfo:first-child')
92
       first_post = page.at_css('.postinfo:first-child')
89
 
93
 
96
       thread = db.from(:threads).first(remote_id: t[:remote_id])
100
       thread = db.from(:threads).first(remote_id: t[:remote_id])
97
       is_new_thread = thread.nil?
101
       is_new_thread = thread.nil?
98
       if is_new_thread
102
       if is_new_thread
99
-        puts '  Inserting thread'
103
+        puts '  Inserting thread' if log
100
         id =
104
         id =
101
           db.from(:threads).insert(
105
           db.from(:threads).insert(
102
             title: t[:title],
106
             title: t[:title],
112
 
116
 
113
       last_post = posts.last
117
       last_post = posts.last
114
       unless db.from(:posts).first(remote_id: last_post[:remote_id]).nil?
118
       unless db.from(:posts).first(remote_id: last_post[:remote_id]).nil?
119
+        next if t[:is_sticky]
115
         break
120
         break
116
       end
121
       end
117
 
122
 
118
       posts_count = posts.size
123
       posts_count = posts.size
119
       posts.each_with_index do |p, index|
124
       posts.each_with_index do |p, index|
120
         msg = "  Inserting post #{index + 1}/#{posts_count}"
125
         msg = "  Inserting post #{index + 1}/#{posts_count}"
121
-        print msg
126
+        print msg if log
122
         if is_new_thread || db.from(:posts).first(remote_id: p[:remote_id]).nil?
127
         if is_new_thread || db.from(:posts).first(remote_id: p[:remote_id]).nil?
123
           db.from(:posts).insert(
128
           db.from(:posts).insert(
124
             body: p[:body],
129
             body: p[:body],
128
             remote_id: p[:remote_id]
133
             remote_id: p[:remote_id]
129
           )
134
           )
130
         end
135
         end
131
-        print "\b" * msg.size unless index == posts_count - 1
136
+        print "\b" * msg.size unless index == posts_count - 1 if log
132
       end
137
       end
133
 
138
 
134
-      puts
139
+      puts if log
135
     end
140
     end
136
   end
141
   end
137
 end
142
 end

Loading…
Cancel
Save