|
@@ -1,47 +1,123 @@
|
1
|
|
-require_relative '../lib/parse'
|
|
1
|
+require_relative '../lib/parser'
|
2
|
2
|
|
3
|
|
-RSpec.describe 'Parser' do
|
4
|
|
- it 'should parse threads' do
|
5
|
|
- html = Nokogiri::HTML(<<~HTML)
|
6
|
|
- <div class="even" id="thread_12345">
|
7
|
|
- <ul class="list read">
|
8
|
|
- <li class="member">
|
9
|
|
- <span>Thread By: </span>
|
10
|
|
- <a href="/member/view/creator1/" class="memberlink">creator1</a>
|
11
|
|
- </li>
|
12
|
|
- <li class="subject">
|
13
|
|
- <span>Subject: </span>
|
14
|
|
- <a href="/thread/view/12345/&p=999">
|
15
|
|
- <strong>Sticky:</sticky> Thread title 1
|
16
|
|
- </a>
|
17
|
|
- </li>
|
18
|
|
- <li class="posts"><span>Posts: </span>999</li>
|
19
|
|
- <li class="lastpost">
|
20
|
|
- <span>Last Post By:</span>
|
21
|
|
- <a href="/member/view/lastposter1/" class="memberlink">lastposter1</a> on Fri Apr 10 2020 01:23 am</li>
|
22
|
|
- </ul>
|
23
|
|
- </div>
|
24
|
|
- <div class="even" id="thread_123456">
|
25
|
|
- <ul class="list read">
|
26
|
|
- <li class="member">
|
27
|
|
- <span>Thread By: </span>
|
28
|
|
- <a href="/member/view/creator2/" class="memberlink">creator2</a>
|
29
|
|
- </li>
|
30
|
|
- <li class="subject">
|
31
|
|
- <span>Subject: </span>
|
32
|
|
- <a href="/thread/view/123456/&p=999">Thread title 2</a>
|
33
|
|
- </li>
|
34
|
|
- <li class="posts"><span>Posts: </span>999</li>
|
35
|
|
- <li class="lastpost">
|
36
|
|
- <span>Last Post By:</span>
|
37
|
|
- <a href="/member/view/lastposter2/" class="memberlink">lastposter2</a> on Fri Apr 10 2020 01:23 am</li>
|
38
|
|
- </ul>
|
|
3
|
+RSpec.describe Parser do
|
|
4
|
+ subject { Parser.new }
|
|
5
|
+
|
|
6
|
+ describe '#threads' do
|
|
7
|
+ let(:html) do
|
|
8
|
+ Nokogiri::HTML(<<~HTML)
|
|
9
|
+ <div class="even" id="thread_12345">
|
|
10
|
+ <ul class="list read">
|
|
11
|
+ <li class="member">
|
|
12
|
+ <span>Thread By: </span>
|
|
13
|
+ <a href="/member/view/creator1/" class="memberlink">creator1</a>
|
|
14
|
+ </li>
|
|
15
|
+ <li class="subject">
|
|
16
|
+ <span>Subject: </span>
|
|
17
|
+ <a href="/thread/view/12345/&p=999">
|
|
18
|
+ <strong>Sticky:</sticky> Thread title 1
|
|
19
|
+ </a>
|
|
20
|
+ </li>
|
|
21
|
+ <li class="posts"><span>Posts: </span>999</li>
|
|
22
|
+ <li class="lastpost">
|
|
23
|
+ <span>Last Post By:</span>
|
|
24
|
+ <a href="/member/view/lastposter1/" class="memberlink">lastposter1</a> on Fri Apr 10 2020 01:23 am</li>
|
|
25
|
+ </ul>
|
|
26
|
+ </div>
|
|
27
|
+ <div class="even" id="thread_123456">
|
|
28
|
+ <ul class="list read">
|
|
29
|
+ <li class="member">
|
|
30
|
+ <span>Thread By: </span>
|
|
31
|
+ <a href="/member/view/creator2/" class="memberlink">creator2</a>
|
|
32
|
+ </li>
|
|
33
|
+ <li class="subject">
|
|
34
|
+ <span>Subject: </span>
|
|
35
|
+ <a href="/thread/view/123456/&p=999">Thread title 2</a>
|
|
36
|
+ </li>
|
|
37
|
+ <li class="posts"><span>Posts: </span>999</li>
|
|
38
|
+ <li class="lastpost">
|
|
39
|
+ <span>Last Post By:</span>
|
|
40
|
+ <a href="/member/view/lastposter2/" class="memberlink">lastposter2</a> on Fri Apr 10 2020 01:23 am</li>
|
|
41
|
+ </ul>
|
|
42
|
+ </div>
|
|
43
|
+ HTML
|
|
44
|
+ end
|
|
45
|
+
|
|
46
|
+ it 'parses threads' do
|
|
47
|
+ expect(subject.threads(html)).to eq([
|
|
48
|
+ {remote_id: '12345', title: 'Sticky: Thread title 1', creator: 'creator1', is_sticky: true},
|
|
49
|
+ {remote_id: '123456', title: 'Thread title 2', creator: 'creator2', is_sticky: false},
|
|
50
|
+ ])
|
|
51
|
+ end
|
|
52
|
+ end
|
|
53
|
+
|
|
54
|
+ describe '#posts' do
|
|
55
|
+ let(:html) do
|
|
56
|
+ Nokogiri::HTML(<<~HTML)
|
|
57
|
+ <div>
|
|
58
|
+ <div class="post">
|
|
59
|
+ <ul class="view" id="post_69">
|
|
60
|
+ <li class="info">
|
|
61
|
+ <div class="postinfo">
|
|
62
|
+ <a class="memberlink" href="/member/view/User1">User1</a>
|
|
63
|
+ posted this October 26th, 2021 @ 12:34:56 am
|
|
64
|
+ </div>
|
|
65
|
+ </li>
|
|
66
|
+ <li class="postbody">
|
|
67
|
+ This is the body of the first post
|
|
68
|
+ </li>
|
|
69
|
+ </ul>
|
|
70
|
+ </div>
|
|
71
|
+ <div class="post">
|
|
72
|
+ <ul class="view" id="post_420">
|
|
73
|
+ <li class="info">
|
|
74
|
+ <div class="postinfo">
|
|
75
|
+ <a class="memberlink" href="/member/view/User2">User2</a>
|
|
76
|
+ posted this October 27th, 2021 @ 12:34:56 am
|
|
77
|
+ </div>
|
|
78
|
+ </li>
|
|
79
|
+ <li class="postbody">
|
|
80
|
+ This is the body of the second post
|
|
81
|
+ </li>
|
|
82
|
+ </ul>
|
|
83
|
+ </div>
|
|
84
|
+ </div>
|
|
85
|
+ HTML
|
|
86
|
+ end
|
|
87
|
+
|
|
88
|
+ it 'parses posts' do
|
|
89
|
+ expect(subject.posts({ id: 666 }, html)).to match_array([
|
|
90
|
+ {
|
|
91
|
+ remote_id: 69,
|
|
92
|
+ creator: 'User1',
|
|
93
|
+ thread_id: 666,
|
|
94
|
+ created_at: Time.new(2021, 10, 26, 0, 34, 56),
|
|
95
|
+ body: 'This is the body of the first post'
|
|
96
|
+ },
|
|
97
|
+ {
|
|
98
|
+ remote_id: 420,
|
|
99
|
+ creator: 'User2',
|
|
100
|
+ thread_id: 666,
|
|
101
|
+ created_at: Time.new(2021, 10, 27, 0, 34, 56),
|
|
102
|
+ body: 'This is the body of the second post'
|
|
103
|
+ }
|
|
104
|
+ ])
|
|
105
|
+ end
|
|
106
|
+ end
|
|
107
|
+
|
|
108
|
+ describe '#thread_created_at' do
|
|
109
|
+ let(:html) do
|
|
110
|
+ Nokogiri::HTML(<<~HTML)
|
|
111
|
+ <div class="postinfo">
|
|
112
|
+ <a class="memberlink" href="/member/view/User1">User1</a>
|
|
113
|
+ posted this October 27th, 2021 @ 12:34:56 am
|
39
|
114
|
</div>
|
40
|
|
- HTML
|
|
115
|
+ HTML
|
|
116
|
+ end
|
41
|
117
|
|
42
|
|
- expect(Parse.threads(html)).to eq([
|
43
|
|
- {remote_id: '12345', title: 'Sticky: Thread title 1', creator: 'creator1', is_sticky: true},
|
44
|
|
- {remote_id: '123456', title: 'Thread title 2', creator: 'creator2', is_sticky: false},
|
45
|
|
- ])
|
|
118
|
+ it 'parses the timestamp of the first post' do
|
|
119
|
+ expect(subject.thread_created_at(html))
|
|
120
|
+ .to eq(Time.new(2021, 10, 27, 00, 34, 56))
|
|
121
|
+ end
|
46
|
122
|
end
|
47
|
123
|
end
|