rich media: don't crawl bogus URIs
authorWilliam Pitcock <nenolod@dereferenced.org>
Sun, 24 Feb 2019 19:13:46 +0000 (19:13 +0000)
committerWilliam Pitcock <nenolod@dereferenced.org>
Mon, 4 Mar 2019 18:31:49 +0000 (18:31 +0000)
lib/pleroma/web/rich_media/helpers.ex

index abb1cf7f2b481617fc18236328a397abf2032b65..fc9cbc868d8f038eaef8f22b25369de146d30f90 100644 (file)
@@ -8,10 +8,17 @@ defmodule Pleroma.Web.RichMedia.Helpers do
   alias Pleroma.HTML
   alias Pleroma.Web.RichMedia.Parser
 
+  defp validate_page_url(nil), do: :error
+  defp validate_page_url(%URI{authority: nil}), do: :error
+  defp validate_page_url(%URI{scheme: nil}), do: :error
+  defp validate_page_url(%URI{}), do: :ok
+  defp validate_page_url(page_url), do: URI.parse(page_url) |> validate_page_url
+
   def fetch_data_for_activity(%Activity{} = activity) do
     with true <- Pleroma.Config.get([:rich_media, :enabled]),
          %Object{} = object <- Object.normalize(activity.data["object"]),
          {:ok, page_url} <- HTML.extract_first_external_url(object, object.data["content"]),
+         :ok <- validate_page_url(page_url),
          {:ok, rich_media} <- Parser.parse(page_url) do
       %{page_url: page_url, rich_media: rich_media}
     else