From: William Pitcock <nenolod@dereferenced.org>
Date: Mon, 10 Sep 2018 00:05:26 +0000 (+0000)
Subject: html: add default scrubbing profile and configuration knobs
X-Git-Url: https://git.squeep.com/?a=commitdiff_plain;h=40e2f6e50034e81c3bf509e9dc9f2c938d86445d;p=akkoma

html: add default scrubbing profile and configuration knobs
---

diff --git a/config/config.exs b/config/config.exs
index 559a12a91..b3889ba12 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -77,7 +77,11 @@ config :pleroma, :instance,
   managed_config: true
 
 config :pleroma, :markup,
-  scrub_policy: HtmlSanitizeEx.Scrubber.BasicHTML
+  allow_inline_images: false,
+  allow_headings: false,
+  allow_tables: false,
+  allow_fonts: false,
+  scrub_policy: Pleroma.HTML.Scrubber.Default
 
 config :pleroma, :fe,
   theme: "pleroma-dark",
diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex
index 0ec73a91d..1c62f2ccc 100644
--- a/lib/pleroma/html.ex
+++ b/lib/pleroma/html.ex
@@ -12,3 +12,103 @@ defmodule Pleroma.HTML do
     html |> Scrubber.scrub(Scrubber.StripTags)
   end
 end
+
+defmodule Pleroma.HTML.Scrubber.TwitterText do
+  @moduledoc """
+  An HTML scrubbing policy which limits to twitter-style text.  Only
+  paragraphs, breaks and links are allowed through the filter.
+  """
+
+  require HtmlSanitizeEx.Scrubber.Meta
+  alias HtmlSanitizeEx.Scrubber.Meta
+
+  @valid_schemes ["http", "https"]
+
+  Meta.remove_cdata_sections_before_scrub()
+  Meta.strip_comments()
+
+  # links
+  Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes)
+  Meta.allow_tag_with_these_attributes("a", ["name", "title"])
+
+  # paragraphs and linebreaks
+  Meta.allow_tag_with_these_attributes("br", [])
+  Meta.allow_tag_with_these_attributes("p", [])
+
+  # microformats
+  Meta.allow_tag_with_these_attributes("span", [])
+end
+
+defmodule Pleroma.HTML.Scrubber.Default do
+  @doc "The default HTML scrubbing policy: no "
+
+  require HtmlSanitizeEx.Scrubber.Meta
+  alias HtmlSanitizeEx.Scrubber.Meta
+
+  @valid_schemes ["http", "https"]
+
+  Meta.remove_cdata_sections_before_scrub()
+  Meta.strip_comments()
+
+  Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes)
+  Meta.allow_tag_with_these_attributes("a", ["name", "title"])
+
+  Meta.allow_tag_with_these_attributes("b", [])
+  Meta.allow_tag_with_these_attributes("blockquote", [])
+  Meta.allow_tag_with_these_attributes("br", [])
+  Meta.allow_tag_with_these_attributes("code", [])
+  Meta.allow_tag_with_these_attributes("del", [])
+  Meta.allow_tag_with_these_attributes("em", [])
+  Meta.allow_tag_with_these_attributes("i", [])
+  Meta.allow_tag_with_these_attributes("li", [])
+  Meta.allow_tag_with_these_attributes("ol", [])
+  Meta.allow_tag_with_these_attributes("p", [])
+  Meta.allow_tag_with_these_attributes("pre", [])
+  Meta.allow_tag_with_these_attributes("span", [])
+  Meta.allow_tag_with_these_attributes("strong", [])
+  Meta.allow_tag_with_these_attributes("u", [])
+  Meta.allow_tag_with_these_attributes("ul", [])
+
+  @markup Application.get_env(:pleroma, :markup)
+  @allow_inline_images Keyword.get(@markup, :allow_inline_images)
+
+  if @allow_inline_images do
+    Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes)
+
+    Meta.allow_tag_with_these_attributes("img", [
+      "width",
+      "height",
+      "title",
+      "alt"
+    ])
+  end
+
+  @allow_tables Keyword.get(@markup, :allow_tables)
+
+  if @allow_tables do
+    Meta.allow_tag_with_these_attributes("table", [])
+    Meta.allow_tag_with_these_attributes("tbody", [])
+    Meta.allow_tag_with_these_attributes("td", [])
+    Meta.allow_tag_with_these_attributes("th", [])
+    Meta.allow_tag_with_these_attributes("thead", [])
+    Meta.allow_tag_with_these_attributes("tr", [])
+  end
+
+  @allow_headings Keyword.get(@markup, :allow_headings)
+
+  if @allow_headings do
+    Meta.allow_tag_with_these_attributes("h1", [])
+    Meta.allow_tag_with_these_attributes("h2", [])
+    Meta.allow_tag_with_these_attributes("h3", [])
+    Meta.allow_tag_with_these_attributes("h4", [])
+    Meta.allow_tag_with_these_attributes("h5", [])
+  end
+
+  @allow_fonts Keyword.get(@markup, :allow_fonts)
+
+  if @allow_fonts do
+    Meta.allow_tag_with_these_attributes("font", ["face"])
+  end
+
+  Meta.strip_everything_not_covered()
+end