git.squeep.com Git - akkoma/blob - lib/pleroma/html.ex

   1 # Pleroma: A lightweight social networking server
   2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
   3 # SPDX-License-Identifier: AGPL-3.0-only
   4
   5 defmodule Pleroma.HTML do
   6   alias HtmlSanitizeEx.Scrubber
   7
   8   defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]
   9   defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers
  10   defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default]
  11
  12   def get_scrubbers do
  13     Pleroma.Config.get([:markup, :scrub_policy])
  14     |> get_scrubbers
  15   end
  16
  17   def filter_tags(html, nil) do
  18     filter_tags(html, get_scrubbers())
  19   end
  20
  21   def filter_tags(html, scrubbers) when is_list(scrubbers) do
  22     Enum.reduce(scrubbers, html, fn scrubber, html ->
  23       filter_tags(html, scrubber)
  24     end)
  25   end
  26
  27   def filter_tags(html, scrubber), do: Scrubber.scrub(html, scrubber)
  28   def filter_tags(html), do: filter_tags(html, nil)
  29   def strip_tags(html), do: Scrubber.scrub(html, Scrubber.StripTags)
  30
  31   def get_cached_scrubbed_html_for_activity(
  32         content,
  33         scrubbers,
  34         activity,
  35         key \\ "",
  36         callback \\ fn x -> x end
  37       ) do
  38     key = "#{key}#{generate_scrubber_signature(scrubbers)}|#{activity.id}"
  39
  40     Cachex.fetch!(:scrubber_cache, key, fn _key ->
  41       object = Pleroma.Object.normalize(activity)
  42       ensure_scrubbed_html(content, scrubbers, object.data["fake"] || false, callback)
  43     end)
  44   end
  45
  46   def get_cached_stripped_html_for_activity(content, activity, key) do
  47     get_cached_scrubbed_html_for_activity(
  48       content,
  49       HtmlSanitizeEx.Scrubber.StripTags,
  50       activity,
  51       key,
  52       &HtmlEntities.decode/1
  53     )
  54   end
  55
  56   def ensure_scrubbed_html(
  57         content,
  58         scrubbers,
  59         fake,
  60         callback
  61       ) do
  62     content =
  63       content
  64       |> filter_tags(scrubbers)
  65       |> callback.()
  66
  67     if fake do
  68       {:ignore, content}
  69     else
  70       {:commit, content}
  71     end
  72   end
  73
  74   def ensure_scrubbed_html(
  75         content,
  76         scrubbers,
  77         true = _fake
  78       ) do
  79     {:ignore, filter_tags(content, scrubbers)}
  80   end
  81
  82   defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do
  83     generate_scrubber_signature([scrubber])
  84   end
  85
  86   defp generate_scrubber_signature(scrubbers) do
  87     Enum.reduce(scrubbers, "", fn scrubber, signature ->
  88       "#{signature}#{to_string(scrubber)}"
  89     end)
  90   end
  91
  92   def extract_first_external_url(_, nil), do: {:error, "No content"}
  93
  94   def extract_first_external_url(object, content) do
  95     key = "URL|#{object.id}"
  96
  97     Cachex.fetch!(:scrubber_cache, key, fn _key ->
  98       result =
  99         content
 100         |> Floki.filter_out("a.mention")
 101         |> Floki.attribute("a", "href")
 102         |> Enum.at(0)
 103
 104       {:commit, {:ok, result}}
 105     end)
 106   end
 107 end
 108
 109 defmodule Pleroma.HTML.Scrubber.TwitterText do
 110   @moduledoc """
 111   An HTML scrubbing policy which limits to twitter-style text.  Only
 112   paragraphs, breaks and links are allowed through the filter.
 113   """
 114
 115   @markup Application.get_env(:pleroma, :markup)
 116   @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
 117
 118   require HtmlSanitizeEx.Scrubber.Meta
 119   alias HtmlSanitizeEx.Scrubber.Meta
 120
 121   Meta.remove_cdata_sections_before_scrub()
 122   Meta.strip_comments()
 123
 124   # links
 125   Meta.allow_tag_with_uri_attributes("a", ["href", "data-user", "data-tag"], @valid_schemes)
 126
 127   Meta.allow_tag_with_this_attribute_values("a", "class", [
 128     "hashtag",
 129     "u-url",
 130     "mention",
 131     "u-url mention",
 132     "mention u-url"
 133   ])
 134
 135   Meta.allow_tag_with_this_attribute_values("a", "rel", [
 136     "tag",
 137     "nofollow",
 138     "noopener",
 139     "noreferrer"
 140   ])
 141
 142   Meta.allow_tag_with_these_attributes("a", ["name", "title"])
 143
 144   # paragraphs and linebreaks
 145   Meta.allow_tag_with_these_attributes("br", [])
 146   Meta.allow_tag_with_these_attributes("p", [])
 147
 148   # microformats
 149   Meta.allow_tag_with_this_attribute_values("span", "class", ["h-card"])
 150   Meta.allow_tag_with_these_attributes("span", [])
 151
 152   # allow inline images for custom emoji
 153   @allow_inline_images Keyword.get(@markup, :allow_inline_images)
 154
 155   if @allow_inline_images do
 156     # restrict img tags to http/https only, because of MediaProxy.
 157     Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"])
 158
 159     Meta.allow_tag_with_these_attributes("img", [
 160       "width",
 161       "height",
 162       "title",
 163       "alt"
 164     ])
 165   end
 166
 167   Meta.strip_everything_not_covered()
 168 end
 169
 170 defmodule Pleroma.HTML.Scrubber.Default do
 171   @doc "The default HTML scrubbing policy: no "
 172
 173   require HtmlSanitizeEx.Scrubber.Meta
 174   alias HtmlSanitizeEx.Scrubber.Meta
 175   # credo:disable-for-previous-line
 176   # No idea how to fix this one…
 177
 178   @markup Application.get_env(:pleroma, :markup)
 179   @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
 180
 181   Meta.remove_cdata_sections_before_scrub()
 182   Meta.strip_comments()
 183
 184   Meta.allow_tag_with_uri_attributes("a", ["href", "data-user", "data-tag"], @valid_schemes)
 185
 186   Meta.allow_tag_with_this_attribute_values("a", "class", [
 187     "hashtag",
 188     "u-url",
 189     "mention",
 190     "u-url mention",
 191     "mention u-url"
 192   ])
 193
 194   Meta.allow_tag_with_this_attribute_values("a", "rel", [
 195     "tag",
 196     "nofollow",
 197     "noopener",
 198     "noreferrer"
 199   ])
 200
 201   Meta.allow_tag_with_these_attributes("a", ["name", "title"])
 202
 203   Meta.allow_tag_with_these_attributes("abbr", ["title"])
 204
 205   Meta.allow_tag_with_these_attributes("b", [])
 206   Meta.allow_tag_with_these_attributes("blockquote", [])
 207   Meta.allow_tag_with_these_attributes("br", [])
 208   Meta.allow_tag_with_these_attributes("code", [])
 209   Meta.allow_tag_with_these_attributes("del", [])
 210   Meta.allow_tag_with_these_attributes("em", [])
 211   Meta.allow_tag_with_these_attributes("i", [])
 212   Meta.allow_tag_with_these_attributes("li", [])
 213   Meta.allow_tag_with_these_attributes("ol", [])
 214   Meta.allow_tag_with_these_attributes("p", [])
 215   Meta.allow_tag_with_these_attributes("pre", [])
 216   Meta.allow_tag_with_these_attributes("strong", [])
 217   Meta.allow_tag_with_these_attributes("u", [])
 218   Meta.allow_tag_with_these_attributes("ul", [])
 219
 220   Meta.allow_tag_with_this_attribute_values("span", "class", ["h-card"])
 221   Meta.allow_tag_with_these_attributes("span", [])
 222
 223   @allow_inline_images Keyword.get(@markup, :allow_inline_images)
 224
 225   if @allow_inline_images do
 226     # restrict img tags to http/https only, because of MediaProxy.
 227     Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"])
 228
 229     Meta.allow_tag_with_these_attributes("img", [
 230       "width",
 231       "height",
 232       "title",
 233       "alt"
 234     ])
 235   end
 236
 237   @allow_tables Keyword.get(@markup, :allow_tables)
 238
 239   if @allow_tables do
 240     Meta.allow_tag_with_these_attributes("table", [])
 241     Meta.allow_tag_with_these_attributes("tbody", [])
 242     Meta.allow_tag_with_these_attributes("td", [])
 243     Meta.allow_tag_with_these_attributes("th", [])
 244     Meta.allow_tag_with_these_attributes("thead", [])
 245     Meta.allow_tag_with_these_attributes("tr", [])
 246   end
 247
 248   @allow_headings Keyword.get(@markup, :allow_headings)
 249
 250   if @allow_headings do
 251     Meta.allow_tag_with_these_attributes("h1", [])
 252     Meta.allow_tag_with_these_attributes("h2", [])
 253     Meta.allow_tag_with_these_attributes("h3", [])
 254     Meta.allow_tag_with_these_attributes("h4", [])
 255     Meta.allow_tag_with_these_attributes("h5", [])
 256   end
 257
 258   @allow_fonts Keyword.get(@markup, :allow_fonts)
 259
 260   if @allow_fonts do
 261     Meta.allow_tag_with_these_attributes("font", ["face"])
 262   end
 263
 264   Meta.strip_everything_not_covered()
 265 end
 266
 267 defmodule Pleroma.HTML.Transform.MediaProxy do
 268   @moduledoc "Transforms inline image URIs to use MediaProxy."
 269
 270   alias Pleroma.Web.MediaProxy
 271
 272   def before_scrub(html), do: html
 273
 274   def scrub_attribute("img", {"src", "http" <> target}) do
 275     media_url =
 276       ("http" <> target)
 277       |> MediaProxy.url()
 278
 279     {"src", media_url}
 280   end
 281
 282   def scrub_attribute(_tag, attribute), do: attribute
 283
 284   def scrub({"img", attributes, children}) do
 285     attributes =
 286       attributes
 287       |> Enum.map(fn attr -> scrub_attribute("img", attr) end)
 288       |> Enum.reject(&is_nil(&1))
 289
 290     {"img", attributes, children}
 291   end
 292
 293   def scrub({:comment, _children}), do: ""
 294
 295   def scrub({tag, attributes, children}), do: {tag, attributes, children}
 296   def scrub({_tag, children}), do: children
 297   def scrub(text), do: text
 298 end