Merge branch 'bugfix/web-notification-special-char' into 'develop'
[akkoma] / lib / pleroma / html.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.HTML do
6 alias HtmlSanitizeEx.Scrubber
7
8 defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]
9 defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers
10 defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default]
11
12 def get_scrubbers do
13 Pleroma.Config.get([:markup, :scrub_policy])
14 |> get_scrubbers
15 end
16
17 def filter_tags(html, nil) do
18 filter_tags(html, get_scrubbers())
19 end
20
21 def filter_tags(html, scrubbers) when is_list(scrubbers) do
22 Enum.reduce(scrubbers, html, fn scrubber, html ->
23 filter_tags(html, scrubber)
24 end)
25 end
26
27 def filter_tags(html, scrubber), do: Scrubber.scrub(html, scrubber)
28 def filter_tags(html), do: filter_tags(html, nil)
29 def strip_tags(html), do: Scrubber.scrub(html, Scrubber.StripTags)
30
31 def get_cached_scrubbed_html_for_activity(
32 content,
33 scrubbers,
34 activity,
35 key \\ "",
36 callback \\ fn x -> x end
37 ) do
38 key = "#{key}#{generate_scrubber_signature(scrubbers)}|#{activity.id}"
39
40 Cachex.fetch!(:scrubber_cache, key, fn _key ->
41 object = Pleroma.Object.normalize(activity)
42 ensure_scrubbed_html(content, scrubbers, object.data["fake"] || false, callback)
43 end)
44 end
45
46 def get_cached_stripped_html_for_activity(content, activity, key) do
47 get_cached_scrubbed_html_for_activity(
48 content,
49 HtmlSanitizeEx.Scrubber.StripTags,
50 activity,
51 key,
52 &HtmlEntities.decode/1
53 )
54 end
55
56 def ensure_scrubbed_html(
57 content,
58 scrubbers,
59 fake,
60 callback
61 ) do
62 content =
63 content
64 |> filter_tags(scrubbers)
65 |> callback.()
66
67 if fake do
68 {:ignore, content}
69 else
70 {:commit, content}
71 end
72 end
73
74 def ensure_scrubbed_html(
75 content,
76 scrubbers,
77 true = _fake
78 ) do
79 {:ignore, filter_tags(content, scrubbers)}
80 end
81
82 defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do
83 generate_scrubber_signature([scrubber])
84 end
85
86 defp generate_scrubber_signature(scrubbers) do
87 Enum.reduce(scrubbers, "", fn scrubber, signature ->
88 "#{signature}#{to_string(scrubber)}"
89 end)
90 end
91
92 def extract_first_external_url(_, nil), do: {:error, "No content"}
93
94 def extract_first_external_url(object, content) do
95 key = "URL|#{object.id}"
96
97 Cachex.fetch!(:scrubber_cache, key, fn _key ->
98 result =
99 content
100 |> Floki.filter_out("a.mention")
101 |> Floki.attribute("a", "href")
102 |> Enum.at(0)
103
104 {:commit, {:ok, result}}
105 end)
106 end
107 end
108
109 defmodule Pleroma.HTML.Scrubber.TwitterText do
110 @moduledoc """
111 An HTML scrubbing policy which limits to twitter-style text. Only
112 paragraphs, breaks and links are allowed through the filter.
113 """
114
115 @markup Application.get_env(:pleroma, :markup)
116 @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
117
118 require HtmlSanitizeEx.Scrubber.Meta
119 alias HtmlSanitizeEx.Scrubber.Meta
120
121 Meta.remove_cdata_sections_before_scrub()
122 Meta.strip_comments()
123
124 # links
125 Meta.allow_tag_with_uri_attributes("a", ["href", "data-user", "data-tag"], @valid_schemes)
126
127 Meta.allow_tag_with_this_attribute_values("a", "class", [
128 "hashtag",
129 "u-url",
130 "mention",
131 "u-url mention",
132 "mention u-url"
133 ])
134
135 Meta.allow_tag_with_this_attribute_values("a", "rel", [
136 "tag",
137 "nofollow",
138 "noopener",
139 "noreferrer"
140 ])
141
142 Meta.allow_tag_with_these_attributes("a", ["name", "title"])
143
144 # paragraphs and linebreaks
145 Meta.allow_tag_with_these_attributes("br", [])
146 Meta.allow_tag_with_these_attributes("p", [])
147
148 # microformats
149 Meta.allow_tag_with_this_attribute_values("span", "class", ["h-card"])
150 Meta.allow_tag_with_these_attributes("span", [])
151
152 # allow inline images for custom emoji
153 @allow_inline_images Keyword.get(@markup, :allow_inline_images)
154
155 if @allow_inline_images do
156 # restrict img tags to http/https only, because of MediaProxy.
157 Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"])
158
159 Meta.allow_tag_with_these_attributes("img", [
160 "width",
161 "height",
162 "title",
163 "alt"
164 ])
165 end
166
167 Meta.strip_everything_not_covered()
168 end
169
170 defmodule Pleroma.HTML.Scrubber.Default do
171 @doc "The default HTML scrubbing policy: no "
172
173 require HtmlSanitizeEx.Scrubber.Meta
174 alias HtmlSanitizeEx.Scrubber.Meta
175 # credo:disable-for-previous-line
176 # No idea how to fix this one…
177
178 @markup Application.get_env(:pleroma, :markup)
179 @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
180
181 Meta.remove_cdata_sections_before_scrub()
182 Meta.strip_comments()
183
184 Meta.allow_tag_with_uri_attributes("a", ["href", "data-user", "data-tag"], @valid_schemes)
185
186 Meta.allow_tag_with_this_attribute_values("a", "class", [
187 "hashtag",
188 "u-url",
189 "mention",
190 "u-url mention",
191 "mention u-url"
192 ])
193
194 Meta.allow_tag_with_this_attribute_values("a", "rel", [
195 "tag",
196 "nofollow",
197 "noopener",
198 "noreferrer"
199 ])
200
201 Meta.allow_tag_with_these_attributes("a", ["name", "title"])
202
203 Meta.allow_tag_with_these_attributes("abbr", ["title"])
204
205 Meta.allow_tag_with_these_attributes("b", [])
206 Meta.allow_tag_with_these_attributes("blockquote", [])
207 Meta.allow_tag_with_these_attributes("br", [])
208 Meta.allow_tag_with_these_attributes("code", [])
209 Meta.allow_tag_with_these_attributes("del", [])
210 Meta.allow_tag_with_these_attributes("em", [])
211 Meta.allow_tag_with_these_attributes("i", [])
212 Meta.allow_tag_with_these_attributes("li", [])
213 Meta.allow_tag_with_these_attributes("ol", [])
214 Meta.allow_tag_with_these_attributes("p", [])
215 Meta.allow_tag_with_these_attributes("pre", [])
216 Meta.allow_tag_with_these_attributes("strong", [])
217 Meta.allow_tag_with_these_attributes("u", [])
218 Meta.allow_tag_with_these_attributes("ul", [])
219
220 Meta.allow_tag_with_this_attribute_values("span", "class", ["h-card"])
221 Meta.allow_tag_with_these_attributes("span", [])
222
223 @allow_inline_images Keyword.get(@markup, :allow_inline_images)
224
225 if @allow_inline_images do
226 # restrict img tags to http/https only, because of MediaProxy.
227 Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"])
228
229 Meta.allow_tag_with_these_attributes("img", [
230 "width",
231 "height",
232 "title",
233 "alt"
234 ])
235 end
236
237 @allow_tables Keyword.get(@markup, :allow_tables)
238
239 if @allow_tables do
240 Meta.allow_tag_with_these_attributes("table", [])
241 Meta.allow_tag_with_these_attributes("tbody", [])
242 Meta.allow_tag_with_these_attributes("td", [])
243 Meta.allow_tag_with_these_attributes("th", [])
244 Meta.allow_tag_with_these_attributes("thead", [])
245 Meta.allow_tag_with_these_attributes("tr", [])
246 end
247
248 @allow_headings Keyword.get(@markup, :allow_headings)
249
250 if @allow_headings do
251 Meta.allow_tag_with_these_attributes("h1", [])
252 Meta.allow_tag_with_these_attributes("h2", [])
253 Meta.allow_tag_with_these_attributes("h3", [])
254 Meta.allow_tag_with_these_attributes("h4", [])
255 Meta.allow_tag_with_these_attributes("h5", [])
256 end
257
258 @allow_fonts Keyword.get(@markup, :allow_fonts)
259
260 if @allow_fonts do
261 Meta.allow_tag_with_these_attributes("font", ["face"])
262 end
263
264 Meta.strip_everything_not_covered()
265 end
266
267 defmodule Pleroma.HTML.Transform.MediaProxy do
268 @moduledoc "Transforms inline image URIs to use MediaProxy."
269
270 alias Pleroma.Web.MediaProxy
271
272 def before_scrub(html), do: html
273
274 def scrub_attribute("img", {"src", "http" <> target}) do
275 media_url =
276 ("http" <> target)
277 |> MediaProxy.url()
278
279 {"src", media_url}
280 end
281
282 def scrub_attribute(_tag, attribute), do: attribute
283
284 def scrub({"img", attributes, children}) do
285 attributes =
286 attributes
287 |> Enum.map(fn attr -> scrub_attribute("img", attr) end)
288 |> Enum.reject(&is_nil(&1))
289
290 {"img", attributes, children}
291 end
292
293 def scrub({:comment, _children}), do: ""
294
295 def scrub({tag, attributes, children}), do: {tag, attributes, children}
296 def scrub({_tag, children}), do: children
297 def scrub(text), do: text
298 end