Scrub html from activity.content or user.bio for opengraph meta
authorraeno <just.raeno@gmail.com>
Fri, 14 Dec 2018 20:07:06 +0000 (21:07 +0100)
committerraeno <just.raeno@gmail.com>
Fri, 14 Dec 2018 20:07:06 +0000 (21:07 +0100)
lib/pleroma/web/ostatus/metadata.ex
mix.exs
mix.lock

index 120a89a8b931f1a12dda0b311647490febdbf225..9935726fc5008a58551234a86df2015d14564c38 100644 (file)
@@ -16,7 +16,7 @@ defmodule Pleroma.Web.Metadata do
 
   # opengraph for single status
   defp opengraph_tags(%{activity: activity, user: user}) do
-    with truncated_content = Formatter.truncate(activity.data["object"]["content"]) do
+    with truncated_content = scrub_html_and_truncate(activity.data["object"]["content"]) do
       [
         {:meta,
          [
@@ -35,7 +35,7 @@ defmodule Pleroma.Web.Metadata do
 
   # opengraph for user card
   defp opengraph_tags(%{user: user}) do
-    with truncated_bio = Formatter.truncate(user.bio) do
+    with truncated_bio = scrub_html_and_truncate(user.bio) do
       [
         {:meta,
          [
@@ -64,6 +64,14 @@ defmodule Pleroma.Web.Metadata do
     end
   end
 
+  defp scrub_html_and_truncate(content) do
+    content
+    # html content comes from DB already encoded, decode first and scrub after
+    |> HtmlEntities.decode()
+    |> Pleroma.HTML.strip_tags()
+    |> Formatter.truncate()
+  end
+
   defp user_avatar_url(user) do
     User.avatar_url(user) |> MediaProxy.url()
   end
diff --git a/mix.exs b/mix.exs
index 0fb40e07b11387b98ae161f5d83ddb939d6d2d5d..e5fb654dd52ccf082b985cec75fa2df500932ad8 100644 (file)
--- a/mix.exs
+++ b/mix.exs
@@ -58,6 +58,7 @@ defmodule Pleroma.Mixfile do
       {:pbkdf2_elixir, "~> 0.12.3"},
       {:trailing_format_plug, "~> 0.0.7"},
       {:html_sanitize_ex, "~> 1.3.0"},
+      {:html_entities, "~> 0.4"},
       {:phoenix_html, "~> 2.10"},
       {:calendar, "~> 0.17.4"},
       {:cachex, "~> 3.0.2"},
index d9ae9a83b22ddf0ae6050244f0dd7ed03684fc4d..007079652577c53345b79eef4afd925e337e319a 100644 (file)
--- a/mix.lock
+++ b/mix.lock
@@ -23,6 +23,7 @@
   "gen_smtp": {:hex, :gen_smtp, "0.13.0", "11f08504c4bdd831dc520b8f84a1dce5ce624474a797394e7aafd3c29f5dcd25", [:rebar3], [], "hexpm"},
   "gettext": {:hex, :gettext, "0.15.0", "40a2b8ce33a80ced7727e36768499fc9286881c43ebafccae6bab731e2b2b8ce", [:mix], [], "hexpm"},
   "hackney": {:hex, :hackney, "1.13.0", "24edc8cd2b28e1c652593833862435c80661834f6c9344e84b6a2255e7aeef03", [:rebar3], [{:certifi, "2.3.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "5.1.2", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "1.0.2", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.1", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"},
+  "html_entities": {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm"},
   "html_sanitize_ex": {:hex, :html_sanitize_ex, "1.3.0", "f005ad692b717691203f940c686208aa3d8ffd9dd4bb3699240096a51fa9564e", [:mix], [{:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"},
   "httpoison": {:hex, :httpoison, "1.2.0", "2702ed3da5fd7a8130fc34b11965c8cfa21ade2f232c00b42d96d4967c39a3a3", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
   "idna": {:hex, :idna, "5.1.2", "e21cb58a09f0228a9e0b95eaa1217f1bcfc31a1aaa6e1fdf2f53a33f7dbd9494", [:rebar3], [{:unicode_util_compat, "0.3.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"},