instance.reachable?: Limit to binary input
[akkoma] / lib / pleroma / instances / instance.ex
index 74458c09aa5043ed0e491fb51eb64101f106c95f..2e1696fe217a1a401d3c649071fed8328df184ad 100644 (file)
@@ -14,9 +14,13 @@ defmodule Pleroma.Instances.Instance do
   import Ecto.Query
   import Ecto.Changeset
 
+  require Logger
+
   schema "instances" do
     field(:host, :string)
     field(:unreachable_since, :naive_datetime_usec)
+    field(:favicon, :string)
+    field(:favicon_updated_at, :naive_datetime)
 
     timestamps()
   end
@@ -25,7 +29,7 @@ defmodule Pleroma.Instances.Instance do
 
   def changeset(struct, params \\ %{}) do
     struct
-    |> cast(params, [:host, :unreachable_since])
+    |> cast(params, [:host, :unreachable_since, :favicon, :favicon_updated_at])
     |> validate_required([:host])
     |> unique_constraint(:host)
   end
@@ -73,7 +77,7 @@ defmodule Pleroma.Instances.Instance do
     )
   end
 
-  def reachable?(_), do: true
+  def reachable?(url_or_host) when is_binary(url_or_host), do: true
 
   def set_reachable(url_or_host) when is_binary(url_or_host) do
     with host <- host(url_or_host),
@@ -115,9 +119,80 @@ defmodule Pleroma.Instances.Instance do
 
   def set_unreachable(_, _), do: {:error, nil}
 
+  def get_consistently_unreachable do
+    reachability_datetime_threshold = Instances.reachability_datetime_threshold()
+
+    from(i in Instance,
+      where: ^reachability_datetime_threshold > i.unreachable_since,
+      order_by: i.unreachable_since,
+      select: {i.host, i.unreachable_since}
+    )
+    |> Repo.all()
+  end
+
   defp parse_datetime(datetime) when is_binary(datetime) do
     NaiveDateTime.from_iso8601(datetime)
   end
 
   defp parse_datetime(datetime), do: datetime
+
+  def get_or_update_favicon(%URI{host: host} = instance_uri) do
+    existing_record = Repo.get_by(Instance, %{host: host})
+    now = NaiveDateTime.utc_now()
+
+    if existing_record && existing_record.favicon_updated_at &&
+         NaiveDateTime.diff(now, existing_record.favicon_updated_at) < 86_400 do
+      existing_record.favicon
+    else
+      favicon = scrape_favicon(instance_uri)
+
+      if existing_record do
+        existing_record
+        |> changeset(%{favicon: favicon, favicon_updated_at: now})
+        |> Repo.update()
+      else
+        %Instance{}
+        |> changeset(%{host: host, favicon: favicon, favicon_updated_at: now})
+        |> Repo.insert()
+      end
+
+      favicon
+    end
+  rescue
+    e ->
+      Logger.warn("Instance.get_or_update_favicon(\"#{host}\") error: #{inspect(e)}")
+      nil
+  end
+
+  defp scrape_favicon(%URI{} = instance_uri) do
+    try do
+      with {_, true} <- {:reachable, reachable?(instance_uri.host)},
+           {:ok, %Tesla.Env{body: html}} <-
+             Pleroma.HTTP.get(to_string(instance_uri), [{"accept", "text/html"}], pool: :media),
+           {_, [favicon_rel | _]} when is_binary(favicon_rel) <-
+             {:parse,
+              html |> Floki.parse_document!() |> Floki.attribute("link[rel=icon]", "href")},
+           {_, favicon} when is_binary(favicon) <-
+             {:merge, URI.merge(instance_uri, favicon_rel) |> to_string()} do
+        favicon
+      else
+        {:reachable, false} ->
+          Logger.debug(
+            "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") ignored unreachable host"
+          )
+
+          nil
+
+        _ ->
+          nil
+      end
+    rescue
+      e ->
+        Logger.warn(
+          "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") error: #{inspect(e)}"
+        )
+
+        nil
+    end
+  end
 end