git.squeep.com Git - akkoma/blob - lib/pleroma/html.ex

   1 # Pleroma: A lightweight social networking server
   2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
   3 # SPDX-License-Identifier: AGPL-3.0-only
   4
   5 defmodule Pleroma.HTML do
   6   defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]
   7   defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers
   8   defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default]
   9
  10   def get_scrubbers do
  11     Pleroma.Config.get([:markup, :scrub_policy])
  12     |> get_scrubbers
  13   end
  14
  15   def filter_tags(html, nil) do
  16     filter_tags(html, get_scrubbers())
  17   end
  18
  19   def filter_tags(html, scrubbers) when is_list(scrubbers) do
  20     Enum.reduce(scrubbers, html, fn scrubber, html ->
  21       filter_tags(html, scrubber)
  22     end)
  23   end
  24
  25   def filter_tags(html, scrubber) do
  26     {:ok, content} = FastSanitize.Sanitizer.scrub(html, scrubber)
  27     content
  28   end
  29
  30   def filter_tags(html), do: filter_tags(html, nil)
  31   def strip_tags(html), do: filter_tags(html, FastSanitize.Sanitizer.StripTags)
  32
  33   def get_cached_scrubbed_html_for_activity(
  34         content,
  35         scrubbers,
  36         activity,
  37         key \\ "",
  38         callback \\ fn x -> x end
  39       ) do
  40     key = "#{key}#{generate_scrubber_signature(scrubbers)}|#{activity.id}"
  41
  42     Cachex.fetch!(:scrubber_cache, key, fn _key ->
  43       object = Pleroma.Object.normalize(activity)
  44       ensure_scrubbed_html(content, scrubbers, object.data["fake"] || false, callback)
  45     end)
  46   end
  47
  48   def get_cached_stripped_html_for_activity(content, activity, key) do
  49     get_cached_scrubbed_html_for_activity(
  50       content,
  51       FastSanitize.Sanitizer.StripTags,
  52       activity,
  53       key,
  54       &HtmlEntities.decode/1
  55     )
  56   end
  57
  58   def ensure_scrubbed_html(
  59         content,
  60         scrubbers,
  61         fake,
  62         callback
  63       ) do
  64     content =
  65       content
  66       |> filter_tags(scrubbers)
  67       |> callback.()
  68
  69     if fake do
  70       {:ignore, content}
  71     else
  72       {:commit, content}
  73     end
  74   end
  75
  76   defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do
  77     generate_scrubber_signature([scrubber])
  78   end
  79
  80   defp generate_scrubber_signature(scrubbers) do
  81     Enum.reduce(scrubbers, "", fn scrubber, signature ->
  82       "#{signature}#{to_string(scrubber)}"
  83     end)
  84   end
  85
  86   def extract_first_external_url(_, nil), do: {:error, "No content"}
  87
  88   def extract_first_external_url(object, content) do
  89     key = "URL|#{object.id}"
  90
  91     Cachex.fetch!(:scrubber_cache, key, fn _key ->
  92       result =
  93         content
  94         |> Floki.filter_out("a.mention,a.hashtag,a[rel~=\"tag\"]")
  95         |> Floki.attribute("a", "href")
  96         |> Enum.at(0)
  97
  98       {:commit, {:ok, result}}
  99     end)
 100   end
 101 end
 102
 103 defmodule Pleroma.HTML.Scrubber.TwitterText do
 104   @moduledoc """
 105   An HTML scrubbing policy which limits to twitter-style text.  Only
 106   paragraphs, breaks and links are allowed through the filter.
 107   """
 108
 109   @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
 110
 111   require FastSanitize.Sanitizer.Meta
 112   alias FastSanitize.Sanitizer.Meta
 113
 114   Meta.strip_comments()
 115
 116   # links
 117   Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes)
 118
 119   Meta.allow_tag_with_this_attribute_values(:a, "class", [
 120     "hashtag",
 121     "u-url",
 122     "mention",
 123     "u-url mention",
 124     "mention u-url"
 125   ])
 126
 127   Meta.allow_tag_with_this_attribute_values(:a, "rel", [
 128     "tag",
 129     "nofollow",
 130     "noopener",
 131     "noreferrer"
 132   ])
 133
 134   Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
 135
 136   # paragraphs and linebreaks
 137   Meta.allow_tag_with_these_attributes(:br, [])
 138   Meta.allow_tag_with_these_attributes(:p, [])
 139
 140   # microformats
 141   Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"])
 142   Meta.allow_tag_with_these_attributes(:span, [])
 143
 144   # allow inline images for custom emoji
 145   if Pleroma.Config.get([:markup, :allow_inline_images]) do
 146     # restrict img tags to http/https only, because of MediaProxy.
 147     Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"])
 148
 149     Meta.allow_tag_with_these_attributes(:img, [
 150       "width",
 151       "height",
 152       "class",
 153       "title",
 154       "alt"
 155     ])
 156   end
 157
 158   Meta.strip_everything_not_covered()
 159 end
 160
 161 defmodule Pleroma.HTML.Scrubber.Default do
 162   @doc "The default HTML scrubbing policy: no "
 163
 164   require FastSanitize.Sanitizer.Meta
 165   alias FastSanitize.Sanitizer.Meta
 166
 167   # credo:disable-for-previous-line
 168   # No idea how to fix this one…
 169
 170   @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
 171
 172   Meta.strip_comments()
 173
 174   Meta.allow_tag_with_uri_attributes(:a, ["href", "data-user", "data-tag"], @valid_schemes)
 175
 176   Meta.allow_tag_with_this_attribute_values(:a, "class", [
 177     "hashtag",
 178     "u-url",
 179     "mention",
 180     "u-url mention",
 181     "mention u-url"
 182   ])
 183
 184   Meta.allow_tag_with_this_attribute_values(:a, "rel", [
 185     "tag",
 186     "nofollow",
 187     "noopener",
 188     "noreferrer",
 189     "ugc"
 190   ])
 191
 192   Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
 193
 194   Meta.allow_tag_with_these_attributes(:abbr, ["title"])
 195
 196   Meta.allow_tag_with_these_attributes(:b, [])
 197   Meta.allow_tag_with_these_attributes(:blockquote, [])
 198   Meta.allow_tag_with_these_attributes(:br, [])
 199   Meta.allow_tag_with_these_attributes(:code, [])
 200   Meta.allow_tag_with_these_attributes(:del, [])
 201   Meta.allow_tag_with_these_attributes(:em, [])
 202   Meta.allow_tag_with_these_attributes(:i, [])
 203   Meta.allow_tag_with_these_attributes(:li, [])
 204   Meta.allow_tag_with_these_attributes(:ol, [])
 205   Meta.allow_tag_with_these_attributes(:p, [])
 206   Meta.allow_tag_with_these_attributes(:pre, [])
 207   Meta.allow_tag_with_these_attributes(:strong, [])
 208   Meta.allow_tag_with_these_attributes(:sub, [])
 209   Meta.allow_tag_with_these_attributes(:sup, [])
 210   Meta.allow_tag_with_these_attributes(:u, [])
 211   Meta.allow_tag_with_these_attributes(:ul, [])
 212
 213   Meta.allow_tag_with_this_attribute_values(:span, "class", ["h-card"])
 214   Meta.allow_tag_with_these_attributes(:span, [])
 215
 216   @allow_inline_images Pleroma.Config.get([:markup, :allow_inline_images])
 217
 218   if @allow_inline_images do
 219     # restrict img tags to http/https only, because of MediaProxy.
 220     Meta.allow_tag_with_uri_attributes(:img, ["src"], ["http", "https"])
 221
 222     Meta.allow_tag_with_these_attributes(:img, [
 223       "width",
 224       "height",
 225       "class",
 226       "title",
 227       "alt"
 228     ])
 229   end
 230
 231   if Pleroma.Config.get([:markup, :allow_tables]) do
 232     Meta.allow_tag_with_these_attributes(:table, [])
 233     Meta.allow_tag_with_these_attributes(:tbody, [])
 234     Meta.allow_tag_with_these_attributes(:td, [])
 235     Meta.allow_tag_with_these_attributes(:th, [])
 236     Meta.allow_tag_with_these_attributes(:thead, [])
 237     Meta.allow_tag_with_these_attributes(:tr, [])
 238   end
 239
 240   if Pleroma.Config.get([:markup, :allow_headings]) do
 241     Meta.allow_tag_with_these_attributes(:h1, [])
 242     Meta.allow_tag_with_these_attributes(:h2, [])
 243     Meta.allow_tag_with_these_attributes(:h3, [])
 244     Meta.allow_tag_with_these_attributes(:h4, [])
 245     Meta.allow_tag_with_these_attributes(:h5, [])
 246   end
 247
 248   if Pleroma.Config.get([:markup, :allow_fonts]) do
 249     Meta.allow_tag_with_these_attributes(:font, ["face"])
 250   end
 251
 252   Meta.strip_everything_not_covered()
 253 end
 254
 255 defmodule Pleroma.HTML.Transform.MediaProxy do
 256   @moduledoc "Transforms inline image URIs to use MediaProxy."
 257
 258   alias Pleroma.Web.MediaProxy
 259
 260   def before_scrub(html), do: html
 261
 262   def scrub_attribute(:img, {"src", "http" <> target}) do
 263     media_url =
 264       ("http" <> target)
 265       |> MediaProxy.url()
 266
 267     {"src", media_url}
 268   end
 269
 270   def scrub_attribute(_tag, attribute), do: attribute
 271
 272   def scrub({:img, attributes, children}) do
 273     attributes =
 274       attributes
 275       |> Enum.map(fn attr -> scrub_attribute(:img, attr) end)
 276       |> Enum.reject(&is_nil(&1))
 277
 278     {:img, attributes, children}
 279   end
 280
 281   def scrub({:comment, _text, _children}), do: ""
 282
 283   def scrub({tag, attributes, children}), do: {tag, attributes, children}
 284   def scrub({_tag, children}), do: children
 285   def scrub(text), do: text
 286 end
 287
 288 defmodule Pleroma.HTML.Scrubber.LinksOnly do
 289   @moduledoc """
 290   An HTML scrubbing policy which limits to links only.
 291   """
 292
 293   @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], [])
 294
 295   require FastSanitize.Sanitizer.Meta
 296   alias FastSanitize.Sanitizer.Meta
 297
 298   Meta.strip_comments()
 299
 300   # links
 301   Meta.allow_tag_with_uri_attributes(:a, ["href"], @valid_schemes)
 302
 303   Meta.allow_tag_with_this_attribute_values(:a, "rel", [
 304     "tag",
 305     "nofollow",
 306     "noopener",
 307     "noreferrer",
 308     "me",
 309     "ugc"
 310   ])
 311
 312   Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
 313   Meta.strip_everything_not_covered()
 314 end