git.squeep.com Git - akkoma/blob - lib/pleroma/html.ex

   1 # Pleroma: A lightweight social networking server
   2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
   3 # SPDX-License-Identifier: AGPL-3.0-only
   4
   5 defmodule Pleroma.HTML do
   6   alias HtmlSanitizeEx.Scrubber
   7
   8   defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]
   9   defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers
  10   defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default]
  11
  12   def get_scrubbers do
  13     Pleroma.Config.get([:markup, :scrub_policy])
  14     |> get_scrubbers
  15   end
  16
  17   def filter_tags(html, nil) do
  18     filter_tags(html, get_scrubbers())
  19   end
  20
  21   def filter_tags(html, scrubbers) when is_list(scrubbers) do
  22     Enum.reduce(scrubbers, html, fn scrubber, html ->
  23       filter_tags(html, scrubber)
  24     end)
  25   end
  26
  27   def filter_tags(html, scrubber), do: Scrubber.scrub(html, scrubber)
  28   def filter_tags(html), do: filter_tags(html, nil)
  29   def strip_tags(html), do: Scrubber.scrub(html, Scrubber.StripTags)
  30
  31   def get_cached_scrubbed_html_for_activity(
  32         content,
  33         scrubbers,
  34         activity,
  35         key \\ "",
  36         callback \\ fn x -> x end
  37       ) do
  38     key = "#{key}#{generate_scrubber_signature(scrubbers)}|#{activity.id}"
  39
  40     Cachex.fetch!(:scrubber_cache, key, fn _key ->
  41       object = Pleroma.Object.normalize(activity)
  42       ensure_scrubbed_html(content, scrubbers, object.data["fake"] || false, callback)
  43     end)
  44   end
  45
  46   def get_cached_stripped_html_for_activity(content, activity, key) do
  47     get_cached_scrubbed_html_for_activity(
  48       content,
  49       HtmlSanitizeEx.Scrubber.StripTags,
  50       activity,
  51       key,
  52       &HtmlEntities.decode/1
  53     )
  54   end
  55
  56   def ensure_scrubbed_html(
  57         content,
  58         scrubbers,
  59         fake,
  60         callback
  61       ) do
  62     content =
  63       content
  64       |> filter_tags(scrubbers)
  65       |> callback.()
  66
  67     if fake do
  68       {:ignore, content}
  69     else
  70       {:commit, content}
  71     end
  72   end
  73
  74   defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do
  75     generate_scrubber_signature([scrubber])
  76   end
  77
  78   defp generate_scrubber_signature(scrubbers) do
  79     Enum.reduce(scrubbers, "", fn scrubber, signature ->
  80       "#{signature}#{to_string(scrubber)}"
  81     end)
  82   end
  83
  84   def extract_first_external_url(_, nil), do: {:error, "No content"}
  85
  86   def extract_first_external_url(object, content) do
  87     key = "URL|#{object.id}"
  88
  89     Cachex.fetch!(:scrubber_cache, key, fn _key ->
  90       result =
  91         content
  92         |> Floki.filter_out("a.mention,a.hashtag,a[rel~=\"tag\"]")
  93         |> Floki.attribute("a", "href")
  94         |> Enum.at(0)
  95
  96       {:commit, {:ok, result}}
  97     end)
  98   end
  99 end
 100
 101 defmodule Pleroma.HTML.Scrubber.TwitterText do
 102   @moduledoc """
 103   An HTML scrubbing policy which limits to twitter-style text.  Only
 104   paragraphs, breaks and links are allowed through the filter.
 105   """
 106
 107   @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
 108
 109   require HtmlSanitizeEx.Scrubber.Meta
 110   alias HtmlSanitizeEx.Scrubber.Meta
 111
 112   Meta.remove_cdata_sections_before_scrub()
 113   Meta.strip_comments()
 114
 115   # links
 116   Meta.allow_tag_with_uri_attributes("a", ["href", "data-user", "data-tag"], @valid_schemes)
 117
 118   Meta.allow_tag_with_this_attribute_values("a", "class", [
 119     "hashtag",
 120     "u-url",
 121     "mention",
 122     "u-url mention",
 123     "mention u-url"
 124   ])
 125
 126   Meta.allow_tag_with_this_attribute_values("a", "rel", [
 127     "tag",
 128     "nofollow",
 129     "noopener",
 130     "noreferrer"
 131   ])
 132
 133   Meta.allow_tag_with_these_attributes("a", ["name", "title"])
 134
 135   # paragraphs and linebreaks
 136   Meta.allow_tag_with_these_attributes("br", [])
 137   Meta.allow_tag_with_these_attributes("p", [])
 138
 139   # microformats
 140   Meta.allow_tag_with_this_attribute_values("span", "class", ["h-card"])
 141   Meta.allow_tag_with_these_attributes("span", [])
 142
 143   # allow inline images for custom emoji
 144   if Pleroma.Config.get([:markup, :allow_inline_images]) do
 145     # restrict img tags to http/https only, because of MediaProxy.
 146     Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"])
 147
 148     Meta.allow_tag_with_these_attributes("img", [
 149       "width",
 150       "height",
 151       "class",
 152       "title",
 153       "alt"
 154     ])
 155   end
 156
 157   Meta.strip_everything_not_covered()
 158 end
 159
 160 defmodule Pleroma.HTML.Scrubber.Default do
 161   @doc "The default HTML scrubbing policy: no "
 162
 163   require HtmlSanitizeEx.Scrubber.Meta
 164   alias HtmlSanitizeEx.Scrubber.Meta
 165   # credo:disable-for-previous-line
 166   # No idea how to fix this one…
 167
 168   @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
 169
 170   Meta.remove_cdata_sections_before_scrub()
 171   Meta.strip_comments()
 172
 173   Meta.allow_tag_with_uri_attributes("a", ["href", "data-user", "data-tag"], @valid_schemes)
 174
 175   Meta.allow_tag_with_this_attribute_values("a", "class", [
 176     "hashtag",
 177     "u-url",
 178     "mention",
 179     "u-url mention",
 180     "mention u-url"
 181   ])
 182
 183   Meta.allow_tag_with_this_attribute_values("a", "rel", [
 184     "tag",
 185     "nofollow",
 186     "noopener",
 187     "noreferrer",
 188     "ugc"
 189   ])
 190
 191   Meta.allow_tag_with_these_attributes("a", ["name", "title"])
 192
 193   Meta.allow_tag_with_these_attributes("abbr", ["title"])
 194
 195   Meta.allow_tag_with_these_attributes("b", [])
 196   Meta.allow_tag_with_these_attributes("blockquote", [])
 197   Meta.allow_tag_with_these_attributes("br", [])
 198   Meta.allow_tag_with_these_attributes("code", [])
 199   Meta.allow_tag_with_these_attributes("del", [])
 200   Meta.allow_tag_with_these_attributes("em", [])
 201   Meta.allow_tag_with_these_attributes("i", [])
 202   Meta.allow_tag_with_these_attributes("li", [])
 203   Meta.allow_tag_with_these_attributes("ol", [])
 204   Meta.allow_tag_with_these_attributes("p", [])
 205   Meta.allow_tag_with_these_attributes("pre", [])
 206   Meta.allow_tag_with_these_attributes("strong", [])
 207   Meta.allow_tag_with_these_attributes("sub", [])
 208   Meta.allow_tag_with_these_attributes("sup", [])
 209   Meta.allow_tag_with_these_attributes("u", [])
 210   Meta.allow_tag_with_these_attributes("ul", [])
 211
 212   Meta.allow_tag_with_this_attribute_values("span", "class", ["h-card"])
 213   Meta.allow_tag_with_these_attributes("span", [])
 214
 215   @allow_inline_images Pleroma.Config.get([:markup, :allow_inline_images])
 216
 217   if @allow_inline_images do
 218     # restrict img tags to http/https only, because of MediaProxy.
 219     Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"])
 220
 221     Meta.allow_tag_with_these_attributes("img", [
 222       "width",
 223       "height",
 224       "class",
 225       "title",
 226       "alt"
 227     ])
 228   end
 229
 230   if Pleroma.Config.get([:markup, :allow_tables]) do
 231     Meta.allow_tag_with_these_attributes("table", [])
 232     Meta.allow_tag_with_these_attributes("tbody", [])
 233     Meta.allow_tag_with_these_attributes("td", [])
 234     Meta.allow_tag_with_these_attributes("th", [])
 235     Meta.allow_tag_with_these_attributes("thead", [])
 236     Meta.allow_tag_with_these_attributes("tr", [])
 237   end
 238
 239   if Pleroma.Config.get([:markup, :allow_headings]) do
 240     Meta.allow_tag_with_these_attributes("h1", [])
 241     Meta.allow_tag_with_these_attributes("h2", [])
 242     Meta.allow_tag_with_these_attributes("h3", [])
 243     Meta.allow_tag_with_these_attributes("h4", [])
 244     Meta.allow_tag_with_these_attributes("h5", [])
 245   end
 246
 247   if Pleroma.Config.get([:markup, :allow_fonts]) do
 248     Meta.allow_tag_with_these_attributes("font", ["face"])
 249   end
 250
 251   Meta.strip_everything_not_covered()
 252 end
 253
 254 defmodule Pleroma.HTML.Transform.MediaProxy do
 255   @moduledoc "Transforms inline image URIs to use MediaProxy."
 256
 257   alias Pleroma.Web.MediaProxy
 258
 259   def before_scrub(html), do: html
 260
 261   def scrub_attribute("img", {"src", "http" <> target}) do
 262     media_url =
 263       ("http" <> target)
 264       |> MediaProxy.url()
 265
 266     {"src", media_url}
 267   end
 268
 269   def scrub_attribute(_tag, attribute), do: attribute
 270
 271   def scrub({"img", attributes, children}) do
 272     attributes =
 273       attributes
 274       |> Enum.map(fn attr -> scrub_attribute("img", attr) end)
 275       |> Enum.reject(&is_nil(&1))
 276
 277     {"img", attributes, children}
 278   end
 279
 280   def scrub({:comment, _children}), do: ""
 281
 282   def scrub({tag, attributes, children}), do: {tag, attributes, children}
 283   def scrub({_tag, children}), do: children
 284   def scrub(text), do: text
 285 end
 286
 287 defmodule Pleroma.HTML.Scrubber.LinksOnly do
 288   @moduledoc """
 289   An HTML scrubbing policy which limits to links only.
 290   """
 291
 292   @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
 293
 294   require HtmlSanitizeEx.Scrubber.Meta
 295   alias HtmlSanitizeEx.Scrubber.Meta
 296
 297   Meta.remove_cdata_sections_before_scrub()
 298   Meta.strip_comments()
 299
 300   # links
 301   Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes)
 302
 303   Meta.allow_tag_with_this_attribute_values("a", "rel", [
 304     "tag",
 305     "nofollow",
 306     "noopener",
 307     "noreferrer",
 308     "me",
 309     "ugc"
 310   ])
 311
 312   Meta.allow_tag_with_these_attributes("a", ["name", "title"])
 313   Meta.strip_everything_not_covered()
 314 end