X-Git-Url: http://git.squeep.com/?a=blobdiff_plain;f=lib%2Fpleroma%2Fhtml.ex;h=2cae29f35170fe5b952551b405db677c8baba9f2;hb=92b4a1aa1bc750bb077ae45c422967f9712e247d;hp=e5e78ee4f50124a1d1c2d0568b5d7c14f6c0fa67;hpb=ad5263c647aea65dbeb4c329825671895e0a8863;p=akkoma diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index e5e78ee4f..2cae29f35 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -3,7 +3,24 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.HTML do - alias HtmlSanitizeEx.Scrubber + # Scrubbers are compiled on boot so they can be configured in OTP releases + # @on_load :compile_scrubbers + + def compile_scrubbers do + dir = Path.join(:code.priv_dir(:pleroma), "scrubbers") + + dir + |> File.ls!() + |> Enum.map(&Path.join(dir, &1)) + |> Kernel.ParallelCompiler.compile() + |> case do + {:error, _errors, _warnings} -> + raise "Compiling scrubbers failed" + + {:ok, _modules, _warnings} -> + :ok + end + end defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber] defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers @@ -24,9 +41,13 @@ defmodule Pleroma.HTML do end) end - def filter_tags(html, scrubber), do: Scrubber.scrub(html, scrubber) + def filter_tags(html, scrubber) do + {:ok, content} = FastSanitize.Sanitizer.scrub(html, scrubber) + content + end + def filter_tags(html), do: filter_tags(html, nil) - def strip_tags(html), do: Scrubber.scrub(html, Scrubber.StripTags) + def strip_tags(html), do: filter_tags(html, FastSanitize.Sanitizer.StripTags) def get_cached_scrubbed_html_for_activity( content, @@ -46,7 +67,7 @@ defmodule Pleroma.HTML do def get_cached_stripped_html_for_activity(content, activity, key) do get_cached_scrubbed_html_for_activity( content, - HtmlSanitizeEx.Scrubber.StripTags, + FastSanitize.Sanitizer.StripTags, activity, key, &HtmlEntities.decode/1 @@ -89,7 +110,7 @@ defmodule Pleroma.HTML do Cachex.fetch!(:scrubber_cache, key, fn _key -> result = content - |> Floki.filter_out("a.mention") + |> Floki.filter_out("a.mention,a.hashtag,a[rel~=\"tag\"]") |> Floki.attribute("a", "href") |> Enum.at(0) @@ -97,186 +118,3 @@ defmodule Pleroma.HTML do end) end end - -defmodule Pleroma.HTML.Scrubber.TwitterText do - @moduledoc """ - An HTML scrubbing policy which limits to twitter-style text. Only - paragraphs, breaks and links are allowed through the filter. - """ - - @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], []) - - require HtmlSanitizeEx.Scrubber.Meta - alias HtmlSanitizeEx.Scrubber.Meta - - Meta.remove_cdata_sections_before_scrub() - Meta.strip_comments() - - # links - Meta.allow_tag_with_uri_attributes("a", ["href", "data-user", "data-tag"], @valid_schemes) - - Meta.allow_tag_with_this_attribute_values("a", "class", [ - "hashtag", - "u-url", - "mention", - "u-url mention", - "mention u-url" - ]) - - Meta.allow_tag_with_this_attribute_values("a", "rel", [ - "tag", - "nofollow", - "noopener", - "noreferrer" - ]) - - Meta.allow_tag_with_these_attributes("a", ["name", "title"]) - - # paragraphs and linebreaks - Meta.allow_tag_with_these_attributes("br", []) - Meta.allow_tag_with_these_attributes("p", []) - - # microformats - Meta.allow_tag_with_this_attribute_values("span", "class", ["h-card"]) - Meta.allow_tag_with_these_attributes("span", []) - - # allow inline images for custom emoji - if Pleroma.Config.get([:markup, :allow_inline_images]) do - # restrict img tags to http/https only, because of MediaProxy. - Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"]) - - Meta.allow_tag_with_these_attributes("img", [ - "width", - "height", - "class", - "title", - "alt" - ]) - end - - Meta.strip_everything_not_covered() -end - -defmodule Pleroma.HTML.Scrubber.Default do - @doc "The default HTML scrubbing policy: no " - - require HtmlSanitizeEx.Scrubber.Meta - alias HtmlSanitizeEx.Scrubber.Meta - # credo:disable-for-previous-line - # No idea how to fix this one… - - @valid_schemes Pleroma.Config.get([:uri_schemes, :valid_schemes], []) - - Meta.remove_cdata_sections_before_scrub() - Meta.strip_comments() - - Meta.allow_tag_with_uri_attributes("a", ["href", "data-user", "data-tag"], @valid_schemes) - - Meta.allow_tag_with_this_attribute_values("a", "class", [ - "hashtag", - "u-url", - "mention", - "u-url mention", - "mention u-url" - ]) - - Meta.allow_tag_with_this_attribute_values("a", "rel", [ - "tag", - "nofollow", - "noopener", - "noreferrer" - ]) - - Meta.allow_tag_with_these_attributes("a", ["name", "title"]) - - Meta.allow_tag_with_these_attributes("abbr", ["title"]) - - Meta.allow_tag_with_these_attributes("b", []) - Meta.allow_tag_with_these_attributes("blockquote", []) - Meta.allow_tag_with_these_attributes("br", []) - Meta.allow_tag_with_these_attributes("code", []) - Meta.allow_tag_with_these_attributes("del", []) - Meta.allow_tag_with_these_attributes("em", []) - Meta.allow_tag_with_these_attributes("i", []) - Meta.allow_tag_with_these_attributes("li", []) - Meta.allow_tag_with_these_attributes("ol", []) - Meta.allow_tag_with_these_attributes("p", []) - Meta.allow_tag_with_these_attributes("pre", []) - Meta.allow_tag_with_these_attributes("strong", []) - Meta.allow_tag_with_these_attributes("u", []) - Meta.allow_tag_with_these_attributes("ul", []) - - Meta.allow_tag_with_this_attribute_values("span", "class", ["h-card"]) - Meta.allow_tag_with_these_attributes("span", []) - - @allow_inline_images Pleroma.Config.get([:markup, :allow_inline_images]) - - if @allow_inline_images do - # restrict img tags to http/https only, because of MediaProxy. - Meta.allow_tag_with_uri_attributes("img", ["src"], ["http", "https"]) - - Meta.allow_tag_with_these_attributes("img", [ - "width", - "height", - "class", - "title", - "alt" - ]) - end - - if Pleroma.Config.get([:markup, :allow_tables]) do - Meta.allow_tag_with_these_attributes("table", []) - Meta.allow_tag_with_these_attributes("tbody", []) - Meta.allow_tag_with_these_attributes("td", []) - Meta.allow_tag_with_these_attributes("th", []) - Meta.allow_tag_with_these_attributes("thead", []) - Meta.allow_tag_with_these_attributes("tr", []) - end - - if Pleroma.Config.get([:markup, :allow_headings]) do - Meta.allow_tag_with_these_attributes("h1", []) - Meta.allow_tag_with_these_attributes("h2", []) - Meta.allow_tag_with_these_attributes("h3", []) - Meta.allow_tag_with_these_attributes("h4", []) - Meta.allow_tag_with_these_attributes("h5", []) - end - - if Pleroma.Config.get([:markup, :allow_fonts]) do - Meta.allow_tag_with_these_attributes("font", ["face"]) - end - - Meta.strip_everything_not_covered() -end - -defmodule Pleroma.HTML.Transform.MediaProxy do - @moduledoc "Transforms inline image URIs to use MediaProxy." - - alias Pleroma.Web.MediaProxy - - def before_scrub(html), do: html - - def scrub_attribute("img", {"src", "http" <> target}) do - media_url = - ("http" <> target) - |> MediaProxy.url() - - {"src", media_url} - end - - def scrub_attribute(_tag, attribute), do: attribute - - def scrub({"img", attributes, children}) do - attributes = - attributes - |> Enum.map(fn attr -> scrub_attribute("img", attr) end) - |> Enum.reject(&is_nil(&1)) - - {"img", attributes, children} - end - - def scrub({:comment, _children}), do: "" - - def scrub({tag, attributes, children}), do: {tag, attributes, children} - def scrub({_tag, children}), do: children - def scrub(text), do: text -end