Fix Pleroma.HTML.extract_first_external_url/2
[akkoma] / test / html_test.exs
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.HTMLTest do
6 alias Pleroma.HTML
7 alias Pleroma.Object
8 alias Pleroma.Web.CommonAPI
9 use Pleroma.DataCase
10
11 import Pleroma.Factory
12
13 @html_sample """
14 <b>this is in bold</b>
15 <p>this is a paragraph</p>
16 this is a linebreak<br />
17 this is a link with allowed "rel" attribute: <a href="http://example.com/" rel="tag">example.com</a>
18 this is a link with not allowed "rel" attribute: <a href="http://example.com/" rel="tag noallowed">example.com</a>
19 this is an image: <img src="http://example.com/image.jpg"><br />
20 <script>alert('hacked')</script>
21 """
22
23 @html_onerror_sample """
24 <img src="http://example.com/image.jpg" onerror="alert('hacked')">
25 """
26
27 @html_span_class_sample """
28 <span class="animate-spin">hi</span>
29 """
30
31 @html_span_microformats_sample """
32 <span class="h-card"><a class="u-url mention">@<span>foo</span></a></span>
33 """
34
35 @html_span_invalid_microformats_sample """
36 <span class="h-card"><a class="u-url mention animate-spin">@<span>foo</span></a></span>
37 """
38
39 describe "StripTags scrubber" do
40 test "works as expected" do
41 expected = """
42 this is in bold
43 this is a paragraph
44 this is a linebreak
45 this is a link with allowed &quot;rel&quot; attribute: example.com
46 this is a link with not allowed &quot;rel&quot; attribute: example.com
47 this is an image:
48 alert(&#39;hacked&#39;)
49 """
50
51 assert expected == HTML.strip_tags(@html_sample)
52 end
53
54 test "does not allow attribute-based XSS" do
55 expected = "\n"
56
57 assert expected == HTML.strip_tags(@html_onerror_sample)
58 end
59 end
60
61 describe "TwitterText scrubber" do
62 test "normalizes HTML as expected" do
63 expected = """
64 this is in bold
65 <p>this is a paragraph</p>
66 this is a linebreak<br/>
67 this is a link with allowed &quot;rel&quot; attribute: <a href="http://example.com/" rel="tag">example.com</a>
68 this is a link with not allowed &quot;rel&quot; attribute: <a href="http://example.com/">example.com</a>
69 this is an image: <img src="http://example.com/image.jpg"/><br/>
70 alert(&#39;hacked&#39;)
71 """
72
73 assert expected == HTML.filter_tags(@html_sample, Pleroma.HTML.Scrubber.TwitterText)
74 end
75
76 test "does not allow attribute-based XSS" do
77 expected = """
78 <img src="http://example.com/image.jpg"/>
79 """
80
81 assert expected == HTML.filter_tags(@html_onerror_sample, Pleroma.HTML.Scrubber.TwitterText)
82 end
83
84 test "does not allow spans with invalid classes" do
85 expected = """
86 <span>hi</span>
87 """
88
89 assert expected ==
90 HTML.filter_tags(@html_span_class_sample, Pleroma.HTML.Scrubber.TwitterText)
91 end
92
93 test "does allow microformats" do
94 expected = """
95 <span class="h-card"><a class="u-url mention">@<span>foo</span></a></span>
96 """
97
98 assert expected ==
99 HTML.filter_tags(@html_span_microformats_sample, Pleroma.HTML.Scrubber.TwitterText)
100 end
101
102 test "filters invalid microformats markup" do
103 expected = """
104 <span class="h-card"><a>@<span>foo</span></a></span>
105 """
106
107 assert expected ==
108 HTML.filter_tags(
109 @html_span_invalid_microformats_sample,
110 Pleroma.HTML.Scrubber.TwitterText
111 )
112 end
113 end
114
115 describe "default scrubber" do
116 test "normalizes HTML as expected" do
117 expected = """
118 <b>this is in bold</b>
119 <p>this is a paragraph</p>
120 this is a linebreak<br/>
121 this is a link with allowed &quot;rel&quot; attribute: <a href="http://example.com/" rel="tag">example.com</a>
122 this is a link with not allowed &quot;rel&quot; attribute: <a href="http://example.com/">example.com</a>
123 this is an image: <img src="http://example.com/image.jpg"/><br/>
124 alert(&#39;hacked&#39;)
125 """
126
127 assert expected == HTML.filter_tags(@html_sample, Pleroma.HTML.Scrubber.Default)
128 end
129
130 test "does not allow attribute-based XSS" do
131 expected = """
132 <img src="http://example.com/image.jpg"/>
133 """
134
135 assert expected == HTML.filter_tags(@html_onerror_sample, Pleroma.HTML.Scrubber.Default)
136 end
137
138 test "does not allow spans with invalid classes" do
139 expected = """
140 <span>hi</span>
141 """
142
143 assert expected == HTML.filter_tags(@html_span_class_sample, Pleroma.HTML.Scrubber.Default)
144 end
145
146 test "does allow microformats" do
147 expected = """
148 <span class="h-card"><a class="u-url mention">@<span>foo</span></a></span>
149 """
150
151 assert expected ==
152 HTML.filter_tags(@html_span_microformats_sample, Pleroma.HTML.Scrubber.Default)
153 end
154
155 test "filters invalid microformats markup" do
156 expected = """
157 <span class="h-card"><a>@<span>foo</span></a></span>
158 """
159
160 assert expected ==
161 HTML.filter_tags(
162 @html_span_invalid_microformats_sample,
163 Pleroma.HTML.Scrubber.Default
164 )
165 end
166 end
167
168 describe "extract_first_external_url" do
169 test "extracts the url" do
170 user = insert(:user)
171
172 {:ok, activity} =
173 CommonAPI.post(user, %{
174 "status" =>
175 "I think I just found the best github repo https://github.com/komeiji-satori/Dress"
176 })
177
178 object = Object.normalize(activity)
179 {:ok, url} = HTML.extract_first_external_url(object, object.data["content"])
180 assert url == "https://github.com/komeiji-satori/Dress"
181 end
182
183 test "skips mentions" do
184 user = insert(:user)
185 other_user = insert(:user)
186
187 {:ok, activity} =
188 CommonAPI.post(user, %{
189 "status" =>
190 "@#{other_user.nickname} install misskey! https://github.com/syuilo/misskey/blob/develop/docs/setup.en.md"
191 })
192
193 object = Object.normalize(activity)
194 {:ok, url} = HTML.extract_first_external_url(object, object.data["content"])
195
196 assert url == "https://github.com/syuilo/misskey/blob/develop/docs/setup.en.md"
197
198 refute url == other_user.ap_id
199 end
200
201 test "skips hashtags" do
202 user = insert(:user)
203
204 {:ok, activity} =
205 CommonAPI.post(user, %{
206 "status" =>
207 "#cofe https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
208 })
209
210 object = Object.normalize(activity)
211 {:ok, url} = HTML.extract_first_external_url(object, object.data["content"])
212
213 assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
214 end
215
216 test "skips microformats hashtags" do
217 user = insert(:user)
218
219 {:ok, activity} =
220 CommonAPI.post(user, %{
221 "status" =>
222 "<a href=\"https://pleroma.gov/tags/cofe\" rel=\"tag\">#cofe</a> https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140",
223 "content_type" => "text/html"
224 })
225
226 object = Object.normalize(activity)
227 {:ok, url} = HTML.extract_first_external_url(object, object.data["content"])
228
229 assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
230 end
231
232 test "does not crash when there is an HTML entity in a link" do
233 user = insert(:user)
234
235 {:ok, activity} =
236 CommonAPI.post(user, %{"status" => "\"http://cofe.com/?boomer=ok&foo=bar\""})
237
238 object = Object.normalize(activity)
239
240 assert {:ok, nil} = HTML.extract_first_external_url(object, object.data["content"])
241 end
242 end
243 end