git.squeep.com Git - akkoma/blob - lib/pleroma/html.ex

   1 # Pleroma: A lightweight social networking server
   2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
   3 # SPDX-License-Identifier: AGPL-3.0-only
   4
   5 defmodule Pleroma.HTML do
   6   alias HtmlSanitizeEx.Scrubber
   7
   8   defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]
   9   defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers
  10   defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default]
  11
  12   def get_scrubbers() do
  13     Pleroma.Config.get([:markup, :scrub_policy])
  14     |> get_scrubbers
  15   end
  16
  17   def filter_tags(html, nil) do
  18     filter_tags(html, get_scrubbers())
  19   end
  20
  21   def filter_tags(html, scrubbers) when is_list(scrubbers) do
  22     Enum.reduce(scrubbers, html, fn scrubber, html ->
  23       filter_tags(html, scrubber)
  24     end)
  25   end
  26
  27   def filter_tags(html, scrubber), do: Scrubber.scrub(html, scrubber)
  28   def filter_tags(html), do: filter_tags(html, nil)
  29   def strip_tags(html), do: Scrubber.scrub(html, Scrubber.StripTags)
  30
  31   def get_cached_scrubbed_html_for_object(content, scrubbers, object) do
  32     key = "#{generate_scrubber_signature(scrubbers)}|#{object.id}"
  33     Cachex.fetch!(:scrubber_cache, key, fn _key -> ensure_scrubbed_html(content, scrubbers) end)
  34   end
  35
  36   def get_cached_stripped_html_for_object(content, object) do
  37     get_cached_scrubbed_html_for_object(content, HtmlSanitizeEx.Scrubber.StripTags, object)
  38   end
  39
  40   def ensure_scrubbed_html(
  41         content,
  42         scrubbers
  43       ) do
  44     {:commit, filter_tags(content, scrubbers)}
  45   end
  46
  47   defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do
  48     generate_scrubber_signature([scrubber])
  49   end
  50
  51   defp generate_scrubber_signature(scrubbers) do
  52     Enum.reduce(scrubbers, "", fn scrubber, signature ->
  53       # If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber.StripTags) it is assumed it is always 0)
  54       version =
  55         if Kernel.function_exported?(scrubber, :version, 0) do
  56           scrubber.version
  57         else
  58           0
  59         end
  60
  61       "#{signature}#{to_string(scrubber)}#{version}"
  62     end)
  63   end
  64 end
  65
  66 defmodule Pleroma.HTML.Scrubber.TwitterText do
  67   @moduledoc """
  68   An HTML scrubbing policy which limits to twitter-style text.  Only
  69   paragraphs, breaks and links are allowed through the filter.
  70   """
  71
  72   @markup Application.get_env(:pleroma, :markup)
  73   @uri_schemes Application.get_env(:pleroma, :uri_schemes, [])
  74   @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, [])
  75
  76   require HtmlSanitizeEx.Scrubber.Meta
  77   alias HtmlSanitizeEx.Scrubber.Meta
  78
  79   def version do
  80     0
  81   end
  82
  83   Meta.remove_cdata_sections_before_scrub()
  84   Meta.strip_comments()
  85
  86   # links
  87   Meta.allow_tag_with_uri_attributes("a", ["href", "data-user", "data-tag"], @valid_schemes)
  88   Meta.allow_tag_with_these_attributes("a", ["name", "title"])
  89
  90   # paragraphs and linebreaks
  91   Meta.allow_tag_with_these_attributes("br", [])
  92   Meta.allow_tag_with_these_attributes("p", [])
  93
  94   # microformats
  95   Meta.allow_tag_with_these_attributes("span", [])
  96
  97   # allow inline images for custom emoji
  98   @allow_inline_images Keyword.get(@markup, :allow_inline_images)
  99
 100   if @allow_inline_images do
 101     # restrict img tags to http/https only, because of MediaProxy.
 102     Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"])
 103
 104     Meta.allow_tag_with_these_attributes("img", [
 105       "width",
 106       "height",
 107       "title",
 108       "alt"
 109     ])
 110   end
 111
 112   Meta.strip_everything_not_covered()
 113 end
 114
 115 defmodule Pleroma.HTML.Scrubber.Default do
 116   @doc "The default HTML scrubbing policy: no "
 117
 118   require HtmlSanitizeEx.Scrubber.Meta
 119   alias HtmlSanitizeEx.Scrubber.Meta
 120
 121   def version do
 122     0
 123   end
 124
 125   @markup Application.get_env(:pleroma, :markup)
 126   @uri_schemes Application.get_env(:pleroma, :uri_schemes, [])
 127   @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, [])
 128
 129   Meta.remove_cdata_sections_before_scrub()
 130   Meta.strip_comments()
 131
 132   Meta.allow_tag_with_uri_attributes("a", ["href", "data-user", "data-tag"], @valid_schemes)
 133   Meta.allow_tag_with_these_attributes("a", ["name", "title"])
 134
 135   Meta.allow_tag_with_these_attributes("abbr", ["title"])
 136
 137   Meta.allow_tag_with_these_attributes("b", [])
 138   Meta.allow_tag_with_these_attributes("blockquote", [])
 139   Meta.allow_tag_with_these_attributes("br", [])
 140   Meta.allow_tag_with_these_attributes("code", [])
 141   Meta.allow_tag_with_these_attributes("del", [])
 142   Meta.allow_tag_with_these_attributes("em", [])
 143   Meta.allow_tag_with_these_attributes("i", [])
 144   Meta.allow_tag_with_these_attributes("li", [])
 145   Meta.allow_tag_with_these_attributes("ol", [])
 146   Meta.allow_tag_with_these_attributes("p", [])
 147   Meta.allow_tag_with_these_attributes("pre", [])
 148   Meta.allow_tag_with_these_attributes("span", [])
 149   Meta.allow_tag_with_these_attributes("strong", [])
 150   Meta.allow_tag_with_these_attributes("u", [])
 151   Meta.allow_tag_with_these_attributes("ul", [])
 152
 153   @allow_inline_images Keyword.get(@markup, :allow_inline_images)
 154
 155   if @allow_inline_images do
 156     # restrict img tags to http/https only, because of MediaProxy.
 157     Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"])
 158
 159     Meta.allow_tag_with_these_attributes("img", [
 160       "width",
 161       "height",
 162       "title",
 163       "alt"
 164     ])
 165   end
 166
 167   @allow_tables Keyword.get(@markup, :allow_tables)
 168
 169   if @allow_tables do
 170     Meta.allow_tag_with_these_attributes("table", [])
 171     Meta.allow_tag_with_these_attributes("tbody", [])
 172     Meta.allow_tag_with_these_attributes("td", [])
 173     Meta.allow_tag_with_these_attributes("th", [])
 174     Meta.allow_tag_with_these_attributes("thead", [])
 175     Meta.allow_tag_with_these_attributes("tr", [])
 176   end
 177
 178   @allow_headings Keyword.get(@markup, :allow_headings)
 179
 180   if @allow_headings do
 181     Meta.allow_tag_with_these_attributes("h1", [])
 182     Meta.allow_tag_with_these_attributes("h2", [])
 183     Meta.allow_tag_with_these_attributes("h3", [])
 184     Meta.allow_tag_with_these_attributes("h4", [])
 185     Meta.allow_tag_with_these_attributes("h5", [])
 186   end
 187
 188   @allow_fonts Keyword.get(@markup, :allow_fonts)
 189
 190   if @allow_fonts do
 191     Meta.allow_tag_with_these_attributes("font", ["face"])
 192   end
 193
 194   Meta.strip_everything_not_covered()
 195 end
 196
 197 defmodule Pleroma.HTML.Transform.MediaProxy do
 198   @moduledoc "Transforms inline image URIs to use MediaProxy."
 199
 200   alias Pleroma.Web.MediaProxy
 201
 202   def version do
 203     0
 204   end
 205
 206   def before_scrub(html), do: html
 207
 208   def scrub_attribute("img", {"src", "http" <> target}) do
 209     media_url =
 210       ("http" <> target)
 211       |> MediaProxy.url()
 212
 213     {"src", media_url}
 214   end
 215
 216   def scrub_attribute(_tag, attribute), do: attribute
 217
 218   def scrub({"img", attributes, children}) do
 219     attributes =
 220       attributes
 221       |> Enum.map(fn attr -> scrub_attribute("img", attr) end)
 222       |> Enum.reject(&is_nil(&1))
 223
 224     {"img", attributes, children}
 225   end
 226
 227   def scrub({:comment, _children}), do: ""
 228
 229   def scrub({tag, attributes, children}), do: {tag, attributes, children}
 230   def scrub({_tag, children}), do: children
 231   def scrub(text), do: text
 232 end