rich media: don't crawl bogus URIs
[akkoma] / lib / pleroma / web / rich_media / helpers.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright _ 2017-2019 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.Web.RichMedia.Helpers do
6 alias Pleroma.Activity
7 alias Pleroma.Object
8 alias Pleroma.HTML
9 alias Pleroma.Web.RichMedia.Parser
10
11 defp validate_page_url(nil), do: :error
12 defp validate_page_url(%URI{authority: nil}), do: :error
13 defp validate_page_url(%URI{scheme: nil}), do: :error
14 defp validate_page_url(%URI{}), do: :ok
15 defp validate_page_url(page_url), do: URI.parse(page_url) |> validate_page_url
16
17 def fetch_data_for_activity(%Activity{} = activity) do
18 with true <- Pleroma.Config.get([:rich_media, :enabled]),
19 %Object{} = object <- Object.normalize(activity.data["object"]),
20 {:ok, page_url} <- HTML.extract_first_external_url(object, object.data["content"]),
21 :ok <- validate_page_url(page_url),
22 {:ok, rich_media} <- Parser.parse(page_url) do
23 %{page_url: page_url, rich_media: rich_media}
24 else
25 _ -> %{}
26 end
27 end
28 end