1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
5 defmodule Pleroma.HTML do
6 defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]
7 defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers
8 defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default]
11 Pleroma.Config.get([:markup, :scrub_policy])
15 def filter_tags(html, nil) do
16 filter_tags(html, get_scrubbers())
19 def filter_tags(html, scrubbers) when is_list(scrubbers) do
20 Enum.reduce(scrubbers, html, fn scrubber, html ->
21 filter_tags(html, scrubber)
25 def filter_tags(html, scrubber) do
26 {:ok, content} = FastSanitize.Sanitizer.scrub(html, scrubber)
30 def filter_tags(html), do: filter_tags(html, nil)
31 def strip_tags(html), do: filter_tags(html, FastSanitize.Sanitizer.StripTags)
33 def get_cached_scrubbed_html_for_activity(
38 callback \\ fn x -> x end
40 key = "#{key}#{generate_scrubber_signature(scrubbers)}|#{activity.id}"
42 Cachex.fetch!(:scrubber_cache, key, fn _key ->
43 object = Pleroma.Object.normalize(activity)
44 ensure_scrubbed_html(content, scrubbers, object.data["fake"] || false, callback)
48 def get_cached_stripped_html_for_activity(content, activity, key) do
49 get_cached_scrubbed_html_for_activity(
51 FastSanitize.Sanitizer.StripTags,
54 &HtmlEntities.decode/1
58 def ensure_scrubbed_html(
66 |> filter_tags(scrubbers)
76 defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do
77 generate_scrubber_signature([scrubber])
80 defp generate_scrubber_signature(scrubbers) do
81 Enum.reduce(scrubbers, "", fn scrubber, signature ->
82 "#{signature}#{to_string(scrubber)}"
86 def extract_first_external_url(_, nil), do: {:error, "No content"}
88 def extract_first_external_url(object, content) do
89 key = "URL|#{object.id}"
91 Cachex.fetch!(:scrubber_cache, key, fn _key ->
94 |> Floki.filter_out("a.mention,a.hashtag,a[rel~=\"tag\"]")
95 |> Floki.attribute("a", "href")
98 {:commit, {:ok, result}}
103 defmodule Pleroma.HTML.Scrubber.TwitterText do
105 An HTML scrubbing policy which limits to twitter-style text. Only
106 paragraphs, breaks and links are allowed through the filter.
109 @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
111 require FastSanitize.Sanitizer.Meta
112 alias FastSanitize.Sanitizer.Meta
114 Meta.strip_comments()
117 Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes)
119 Meta.allow_tag_with_this_attribute_values(:a, "class", [
127 Meta.allow_tag_with_this_attribute_values(:a, "rel", [
134 Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
136 # paragraphs and linebreaks
137 Meta.allow_tag_with_these_attributes(:br, [])
138 Meta.allow_tag_with_these_attributes(:p, [])
141 Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"])
142 Meta.allow_tag_with_these_attributes(:span, [])
144 # allow inline images for custom emoji
145 if Pleroma.Config.get([:markup, :allow_inline_images]) do
146 # restrict img tags to http/https only, because of MediaProxy.
147 Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"])
149 Meta.allow_tag_with_these_attributes(:img, [
158 Meta.strip_everything_not_covered()
161 defmodule Pleroma.HTML.Scrubber.Default do
162 @doc "The default HTML scrubbing policy: no "
164 require FastSanitize.Sanitizer.Meta
165 alias FastSanitize.Sanitizer.Meta
167 # credo:disable-for-previous-line
168 # No idea how to fix this one…
170 @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
172 Meta.strip_comments()
174 Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes)
176 Meta.allow_tag_with_this_attribute_values(:a, "class", [
184 Meta.allow_tag_with_this_attribute_values(:a, "rel", [
192 Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
194 Meta.allow_tag_with_these_attributes(:abbr, ["title"])
196 Meta.allow_tag_with_these_attributes(:b, [])
197 Meta.allow_tag_with_these_attributes(:blockquote, [])
198 Meta.allow_tag_with_these_attributes(:br, [])
199 Meta.allow_tag_with_these_attributes(:code, [])
200 Meta.allow_tag_with_these_attributes(:del, [])
201 Meta.allow_tag_with_these_attributes(:em, [])
202 Meta.allow_tag_with_these_attributes(:i, [])
203 Meta.allow_tag_with_these_attributes(:li, [])
204 Meta.allow_tag_with_these_attributes(:ol, [])
205 Meta.allow_tag_with_these_attributes(:p, [])
206 Meta.allow_tag_with_these_attributes(:pre, [])
207 Meta.allow_tag_with_these_attributes(:strong, [])
208 Meta.allow_tag_with_these_attributes(:sub, [])
209 Meta.allow_tag_with_these_attributes(:sup, [])
210 Meta.allow_tag_with_these_attributes(:u, [])
211 Meta.allow_tag_with_these_attributes(:ul, [])
213 Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"])
214 Meta.allow_tag_with_these_attributes(:span, [])
216 @allow_inline_images Pleroma.Config.get([:markup, :allow_inline_images])
218 if @allow_inline_images do
219 # restrict img tags to http/https only, because of MediaProxy.
220 Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"])
222 Meta.allow_tag_with_these_attributes(:img, [
231 if Pleroma.Config.get([:markup, :allow_tables]) do
232 Meta.allow_tag_with_these_attributes(:table, [])
233 Meta.allow_tag_with_these_attributes(:tbody, [])
234 Meta.allow_tag_with_these_attributes(:td, [])
235 Meta.allow_tag_with_these_attributes(:th, [])
236 Meta.allow_tag_with_these_attributes(:thead, [])
237 Meta.allow_tag_with_these_attributes(:tr, [])
240 if Pleroma.Config.get([:markup, :allow_headings]) do
241 Meta.allow_tag_with_these_attributes(:h1, [])
242 Meta.allow_tag_with_these_attributes(:h2, [])
243 Meta.allow_tag_with_these_attributes(:h3, [])
244 Meta.allow_tag_with_these_attributes(:h4, [])
245 Meta.allow_tag_with_these_attributes(:h5, [])
248 if Pleroma.Config.get([:markup, :allow_fonts]) do
249 Meta.allow_tag_with_these_attributes(:font, ["face"])
252 Meta.strip_everything_not_covered()
255 defmodule Pleroma.HTML.Transform.MediaProxy do
256 @moduledoc "Transforms inline image URIs to use MediaProxy."
258 alias Pleroma.Web.MediaProxy
260 def before_scrub(html), do: html
262 def scrub_attribute(:img, {"src", "http" <> target}) do
270 def scrub_attribute(_tag, attribute), do: attribute
272 def scrub({:img, attributes, children}) do
275 |> Enum.map(fn attr -> scrub_attribute(:img, attr) end)
276 |> Enum.reject(&is_nil(&1))
278 {:img, attributes, children}
281 def scrub({:comment, _text, _children}), do: ""
283 def scrub({tag, attributes, children}), do: {tag, attributes, children}
284 def scrub({_tag, children}), do: children
285 def scrub(text), do: text
288 defmodule Pleroma.HTML.Scrubber.LinksOnly do
290 An HTML scrubbing policy which limits to links only.
293 @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
295 require FastSanitize.Sanitizer.Meta
296 alias FastSanitize.Sanitizer.Meta
298 Meta.strip_comments()
301 Meta.allow_tag_with_uri_attributes(:a, ["href"], @valid_schemes)
303 Meta.allow_tag_with_this_attribute_values(:a, "rel", [
312 Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
313 Meta.strip_everything_not_covered()