Merge branch 'develop' into feature/fast_sanitize
[akkoma] / lib / pleroma / html.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.HTML do
6 defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]
7 defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers
8 defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default]
9
10 def get_scrubbers do
11 Pleroma.Config.get([:markup, :scrub_policy])
12 |> get_scrubbers
13 end
14
15 def filter_tags(html, nil) do
16 filter_tags(html, get_scrubbers())
17 end
18
19 def filter_tags(html, scrubbers) when is_list(scrubbers) do
20 Enum.reduce(scrubbers, html, fn scrubber, html ->
21 filter_tags(html, scrubber)
22 end)
23 end
24
25 def filter_tags(html, scrubber) do
26 {:ok, content} = FastSanitize.Sanitizer.scrub(html, scrubber)
27 content
28 end
29
30 def filter_tags(html), do: filter_tags(html, nil)
31 def strip_tags(html), do: filter_tags(html, FastSanitize.Sanitizer.StripTags)
32
33 def get_cached_scrubbed_html_for_activity(
34 content,
35 scrubbers,
36 activity,
37 key \\ "",
38 callback \\ fn x -> x end
39 ) do
40 key = "#{key}#{generate_scrubber_signature(scrubbers)}|#{activity.id}"
41
42 Cachex.fetch!(:scrubber_cache, key, fn _key ->
43 object = Pleroma.Object.normalize(activity)
44 ensure_scrubbed_html(content, scrubbers, object.data["fake"] || false, callback)
45 end)
46 end
47
48 def get_cached_stripped_html_for_activity(content, activity, key) do
49 get_cached_scrubbed_html_for_activity(
50 content,
51 FastSanitize.Sanitizer.StripTags,
52 activity,
53 key,
54 &HtmlEntities.decode/1
55 )
56 end
57
58 def ensure_scrubbed_html(
59 content,
60 scrubbers,
61 fake,
62 callback
63 ) do
64 content =
65 content
66 |> filter_tags(scrubbers)
67 |> callback.()
68
69 if fake do
70 {:ignore, content}
71 else
72 {:commit, content}
73 end
74 end
75
76 defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do
77 generate_scrubber_signature([scrubber])
78 end
79
80 defp generate_scrubber_signature(scrubbers) do
81 Enum.reduce(scrubbers, "", fn scrubber, signature ->
82 "#{signature}#{to_string(scrubber)}"
83 end)
84 end
85
86 def extract_first_external_url(_, nil), do: {:error, "No content"}
87
88 def extract_first_external_url(object, content) do
89 key = "URL|#{object.id}"
90
91 Cachex.fetch!(:scrubber_cache, key, fn _key ->
92 result =
93 content
94 |> Floki.filter_out("a.mention,a.hashtag,a[rel~=\"tag\"]")
95 |> Floki.attribute("a", "href")
96 |> Enum.at(0)
97
98 {:commit, {:ok, result}}
99 end)
100 end
101 end
102
103 defmodule Pleroma.HTML.Scrubber.TwitterText do
104 @moduledoc """
105 An HTML scrubbing policy which limits to twitter-style text. Only
106 paragraphs, breaks and links are allowed through the filter.
107 """
108
109 @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
110
111 require FastSanitize.Sanitizer.Meta
112 alias FastSanitize.Sanitizer.Meta
113
114 Meta.strip_comments()
115
116 # links
117 Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes)
118
119 Meta.allow_tag_with_this_attribute_values(:a, "class", [
120 "hashtag",
121 "u-url",
122 "mention",
123 "u-url mention",
124 "mention u-url"
125 ])
126
127 Meta.allow_tag_with_this_attribute_values(:a, "rel", [
128 "tag",
129 "nofollow",
130 "noopener",
131 "noreferrer"
132 ])
133
134 Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
135
136 # paragraphs and linebreaks
137 Meta.allow_tag_with_these_attributes(:br, [])
138 Meta.allow_tag_with_these_attributes(:p, [])
139
140 # microformats
141 Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"])
142 Meta.allow_tag_with_these_attributes(:span, [])
143
144 # allow inline images for custom emoji
145 if Pleroma.Config.get([:markup, :allow_inline_images]) do
146 # restrict img tags to http/https only, because of MediaProxy.
147 Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"])
148
149 Meta.allow_tag_with_these_attributes(:img, [
150 "width",
151 "height",
152 "class",
153 "title",
154 "alt"
155 ])
156 end
157
158 Meta.strip_everything_not_covered()
159 end
160
161 defmodule Pleroma.HTML.Scrubber.Default do
162 @doc "The default HTML scrubbing policy: no "
163
164 require FastSanitize.Sanitizer.Meta
165 alias FastSanitize.Sanitizer.Meta
166 # credo:disable-for-previous-line
167 # No idea how to fix this one…
168
169 @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
170
171 Meta.strip_comments()
172
173 Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes)
174
175 Meta.allow_tag_with_this_attribute_values(:a, "class", [
176 "hashtag",
177 "u-url",
178 "mention",
179 "u-url mention",
180 "mention u-url"
181 ])
182
183 Meta.allow_tag_with_this_attribute_values(:a, "rel", [
184 "tag",
185 "nofollow",
186 "noopener",
187 "noreferrer",
188 "ugc"
189 ])
190
191 Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
192
193 Meta.allow_tag_with_these_attributes(:abbr, ["title"])
194
195 Meta.allow_tag_with_these_attributes(:b, [])
196 Meta.allow_tag_with_these_attributes(:blockquote, [])
197 Meta.allow_tag_with_these_attributes(:br, [])
198 Meta.allow_tag_with_these_attributes(:code, [])
199 Meta.allow_tag_with_these_attributes(:del, [])
200 Meta.allow_tag_with_these_attributes(:em, [])
201 Meta.allow_tag_with_these_attributes(:i, [])
202 Meta.allow_tag_with_these_attributes(:li, [])
203 Meta.allow_tag_with_these_attributes(:ol, [])
204 Meta.allow_tag_with_these_attributes(:p, [])
205 Meta.allow_tag_with_these_attributes(:pre, [])
206 Meta.allow_tag_with_these_attributes(:strong, [])
207 Meta.allow_tag_with_these_attributes(:sub, [])
208 Meta.allow_tag_with_these_attributes(:sup, [])
209 Meta.allow_tag_with_these_attributes(:u, [])
210 Meta.allow_tag_with_these_attributes(:ul, [])
211
212 Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"])
213 Meta.allow_tag_with_these_attributes(:span, [])
214
215 @allow_inline_images Pleroma.Config.get([:markup, :allow_inline_images])
216
217 if @allow_inline_images do
218 # restrict img tags to http/https only, because of MediaProxy.
219 Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"])
220
221 Meta.allow_tag_with_these_attributes(:img, [
222 "width",
223 "height",
224 "class",
225 "title",
226 "alt"
227 ])
228 end
229
230 if Pleroma.Config.get([:markup, :allow_tables]) do
231 Meta.allow_tag_with_these_attributes(:table, [])
232 Meta.allow_tag_with_these_attributes(:tbody, [])
233 Meta.allow_tag_with_these_attributes(:td, [])
234 Meta.allow_tag_with_these_attributes(:th, [])
235 Meta.allow_tag_with_these_attributes(:thead, [])
236 Meta.allow_tag_with_these_attributes(:tr, [])
237 end
238
239 if Pleroma.Config.get([:markup, :allow_headings]) do
240 Meta.allow_tag_with_these_attributes(:h1, [])
241 Meta.allow_tag_with_these_attributes(:h2, [])
242 Meta.allow_tag_with_these_attributes(:h3, [])
243 Meta.allow_tag_with_these_attributes(:h4, [])
244 Meta.allow_tag_with_these_attributes(:h5, [])
245 end
246
247 if Pleroma.Config.get([:markup, :allow_fonts]) do
248 Meta.allow_tag_with_these_attributes(:font, ["face"])
249 end
250
251 Meta.strip_everything_not_covered()
252 end
253
254 defmodule Pleroma.HTML.Transform.MediaProxy do
255 @moduledoc "Transforms inline image URIs to use MediaProxy."
256
257 alias Pleroma.Web.MediaProxy
258
259 def before_scrub(html), do: html
260
261 def scrub_attribute(:img, {"src", "http" <> target}) do
262 media_url =
263 ("http" <> target)
264 |> MediaProxy.url()
265
266 {"src", media_url}
267 end
268
269 def scrub_attribute(_tag, attribute), do: attribute
270
271 def scrub({:img, attributes, children}) do
272 attributes =
273 attributes
274 |> Enum.map(fn attr -> scrub_attribute(:img, attr) end)
275 |> Enum.reject(&is_nil(&1))
276
277 {:img, attributes, children}
278 end
279
280 def scrub({:comment, _text, _children}), do: ""
281
282 def scrub({tag, attributes, children}), do: {tag, attributes, children}
283 def scrub({_tag, children}), do: children
284 def scrub(text), do: text
285 end
286
287 defmodule Pleroma.HTML.Scrubber.LinksOnly do
288 @moduledoc """
289 An HTML scrubbing policy which limits to links only.
290 """
291
292 @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
293
294 require FastSanitize.Sanitizer.Meta
295 alias FastSanitize.Sanitizer.Meta
296
297 Meta.strip_comments()
298
299 # links
300 Meta.allow_tag_with_uri_attributes(:a, ["href"], @valid_schemes)
301
302 Meta.allow_tag_with_this_attribute_values(:a, "rel", [
303 "tag",
304 "nofollow",
305 "noopener",
306 "noreferrer",
307 "me",
308 "ugc"
309 ])
310
311 Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
312 Meta.strip_everything_not_covered()
313 end