Merge branch 'develop' into issue/1342
[akkoma] / lib / pleroma / html.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.HTML do
6 defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]
7 defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers
8 defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default]
9
10 def get_scrubbers do
11 Pleroma.Config.get([:markup, :scrub_policy])
12 |> get_scrubbers
13 end
14
15 def filter_tags(html, nil) do
16 filter_tags(html, get_scrubbers())
17 end
18
19 def filter_tags(html, scrubbers) when is_list(scrubbers) do
20 Enum.reduce(scrubbers, html, fn scrubber, html ->
21 filter_tags(html, scrubber)
22 end)
23 end
24
25 def filter_tags(html, scrubber) do
26 {:ok, content} = FastSanitize.Sanitizer.scrub(html, scrubber)
27 content
28 end
29
30 def filter_tags(html), do: filter_tags(html, nil)
31 def strip_tags(html), do: filter_tags(html, FastSanitize.Sanitizer.StripTags)
32
33 def get_cached_scrubbed_html_for_activity(
34 content,
35 scrubbers,
36 activity,
37 key \\ "",
38 callback \\ fn x -> x end
39 ) do
40 key = "#{key}#{generate_scrubber_signature(scrubbers)}|#{activity.id}"
41
42 Cachex.fetch!(:scrubber_cache, key, fn _key ->
43 object = Pleroma.Object.normalize(activity)
44 ensure_scrubbed_html(content, scrubbers, object.data["fake"] || false, callback)
45 end)
46 end
47
48 def get_cached_stripped_html_for_activity(content, activity, key) do
49 get_cached_scrubbed_html_for_activity(
50 content,
51 FastSanitize.Sanitizer.StripTags,
52 activity,
53 key,
54 &HtmlEntities.decode/1
55 )
56 end
57
58 def ensure_scrubbed_html(
59 content,
60 scrubbers,
61 fake,
62 callback
63 ) do
64 content =
65 content
66 |> filter_tags(scrubbers)
67 |> callback.()
68
69 if fake do
70 {:ignore, content}
71 else
72 {:commit, content}
73 end
74 end
75
76 defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do
77 generate_scrubber_signature([scrubber])
78 end
79
80 defp generate_scrubber_signature(scrubbers) do
81 Enum.reduce(scrubbers, "", fn scrubber, signature ->
82 "#{signature}#{to_string(scrubber)}"
83 end)
84 end
85
86 def extract_first_external_url(_, nil), do: {:error, "No content"}
87
88 def extract_first_external_url(object, content) do
89 key = "URL|#{object.id}"
90
91 Cachex.fetch!(:scrubber_cache, key, fn _key ->
92 result =
93 content
94 |> Floki.filter_out("a.mention,a.hashtag,a[rel~=\"tag\"]")
95 |> Floki.attribute("a", "href")
96 |> Enum.at(0)
97
98 {:commit, {:ok, result}}
99 end)
100 end
101 end
102
103 defmodule Pleroma.HTML.Scrubber.TwitterText do
104 @moduledoc """
105 An HTML scrubbing policy which limits to twitter-style text. Only
106 paragraphs, breaks and links are allowed through the filter.
107 """
108
109 @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
110
111 require FastSanitize.Sanitizer.Meta
112 alias FastSanitize.Sanitizer.Meta
113
114 Meta.strip_comments()
115
116 # links
117 Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes)
118
119 Meta.allow_tag_with_this_attribute_values(:a, "class", [
120 "hashtag",
121 "u-url",
122 "mention",
123 "u-url mention",
124 "mention u-url"
125 ])
126
127 Meta.allow_tag_with_this_attribute_values(:a, "rel", [
128 "tag",
129 "nofollow",
130 "noopener",
131 "noreferrer"
132 ])
133
134 Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
135
136 # paragraphs and linebreaks
137 Meta.allow_tag_with_these_attributes(:br, [])
138 Meta.allow_tag_with_these_attributes(:p, [])
139
140 # microformats
141 Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"])
142 Meta.allow_tag_with_these_attributes(:span, [])
143
144 # allow inline images for custom emoji
145 if Pleroma.Config.get([:markup, :allow_inline_images]) do
146 # restrict img tags to http/https only, because of MediaProxy.
147 Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"])
148
149 Meta.allow_tag_with_these_attributes(:img, [
150 "width",
151 "height",
152 "class",
153 "title",
154 "alt"
155 ])
156 end
157
158 Meta.strip_everything_not_covered()
159 end
160
161 defmodule Pleroma.HTML.Scrubber.Default do
162 @doc "The default HTML scrubbing policy: no "
163
164 require FastSanitize.Sanitizer.Meta
165 alias FastSanitize.Sanitizer.Meta
166
167 # credo:disable-for-previous-line
168 # No idea how to fix this one…
169
170 @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
171
172 Meta.strip_comments()
173
174 Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes)
175
176 Meta.allow_tag_with_this_attribute_values(:a, "class", [
177 "hashtag",
178 "u-url",
179 "mention",
180 "u-url mention",
181 "mention u-url"
182 ])
183
184 Meta.allow_tag_with_this_attribute_values(:a, "rel", [
185 "tag",
186 "nofollow",
187 "noopener",
188 "noreferrer",
189 "ugc"
190 ])
191
192 Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
193
194 Meta.allow_tag_with_these_attributes(:abbr, ["title"])
195
196 Meta.allow_tag_with_these_attributes(:b, [])
197 Meta.allow_tag_with_these_attributes(:blockquote, [])
198 Meta.allow_tag_with_these_attributes(:br, [])
199 Meta.allow_tag_with_these_attributes(:code, [])
200 Meta.allow_tag_with_these_attributes(:del, [])
201 Meta.allow_tag_with_these_attributes(:em, [])
202 Meta.allow_tag_with_these_attributes(:i, [])
203 Meta.allow_tag_with_these_attributes(:li, [])
204 Meta.allow_tag_with_these_attributes(:ol, [])
205 Meta.allow_tag_with_these_attributes(:p, [])
206 Meta.allow_tag_with_these_attributes(:pre, [])
207 Meta.allow_tag_with_these_attributes(:strong, [])
208 Meta.allow_tag_with_these_attributes(:sub, [])
209 Meta.allow_tag_with_these_attributes(:sup, [])
210 Meta.allow_tag_with_these_attributes(:u, [])
211 Meta.allow_tag_with_these_attributes(:ul, [])
212
213 Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"])
214 Meta.allow_tag_with_these_attributes(:span, [])
215
216 @allow_inline_images Pleroma.Config.get([:markup, :allow_inline_images])
217
218 if @allow_inline_images do
219 # restrict img tags to http/https only, because of MediaProxy.
220 Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"])
221
222 Meta.allow_tag_with_these_attributes(:img, [
223 "width",
224 "height",
225 "class",
226 "title",
227 "alt"
228 ])
229 end
230
231 if Pleroma.Config.get([:markup, :allow_tables]) do
232 Meta.allow_tag_with_these_attributes(:table, [])
233 Meta.allow_tag_with_these_attributes(:tbody, [])
234 Meta.allow_tag_with_these_attributes(:td, [])
235 Meta.allow_tag_with_these_attributes(:th, [])
236 Meta.allow_tag_with_these_attributes(:thead, [])
237 Meta.allow_tag_with_these_attributes(:tr, [])
238 end
239
240 if Pleroma.Config.get([:markup, :allow_headings]) do
241 Meta.allow_tag_with_these_attributes(:h1, [])
242 Meta.allow_tag_with_these_attributes(:h2, [])
243 Meta.allow_tag_with_these_attributes(:h3, [])
244 Meta.allow_tag_with_these_attributes(:h4, [])
245 Meta.allow_tag_with_these_attributes(:h5, [])
246 end
247
248 if Pleroma.Config.get([:markup, :allow_fonts]) do
249 Meta.allow_tag_with_these_attributes(:font, ["face"])
250 end
251
252 Meta.strip_everything_not_covered()
253 end
254
255 defmodule Pleroma.HTML.Transform.MediaProxy do
256 @moduledoc "Transforms inline image URIs to use MediaProxy."
257
258 alias Pleroma.Web.MediaProxy
259
260 def before_scrub(html), do: html
261
262 def scrub_attribute(:img, {"src", "http" <> target}) do
263 media_url =
264 ("http" <> target)
265 |> MediaProxy.url()
266
267 {"src", media_url}
268 end
269
270 def scrub_attribute(_tag, attribute), do: attribute
271
272 def scrub({:img, attributes, children}) do
273 attributes =
274 attributes
275 |> Enum.map(fn attr -> scrub_attribute(:img, attr) end)
276 |> Enum.reject(&is_nil(&1))
277
278 {:img, attributes, children}
279 end
280
281 def scrub({:comment, _text, _children}), do: ""
282
283 def scrub({tag, attributes, children}), do: {tag, attributes, children}
284 def scrub({_tag, children}), do: children
285 def scrub(text), do: text
286 end
287
288 defmodule Pleroma.HTML.Scrubber.LinksOnly do
289 @moduledoc """
290 An HTML scrubbing policy which limits to links only.
291 """
292
293 @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
294
295 require FastSanitize.Sanitizer.Meta
296 alias FastSanitize.Sanitizer.Meta
297
298 Meta.strip_comments()
299
300 # links
301 Meta.allow_tag_with_uri_attributes(:a, ["href"], @valid_schemes)
302
303 Meta.allow_tag_with_this_attribute_values(:a, "rel", [
304 "tag",
305 "nofollow",
306 "noopener",
307 "noreferrer",
308 "me",
309 "ugc"
310 ])
311
312 Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
313 Meta.strip_everything_not_covered()
314 end