Scrape instance nodeinfo (#251)
[akkoma] / lib / pleroma / instances / instance.ex
index 533dbbb825789ef72936b0131b821a54a4529a24..fcf3181bf530550cb18120a6b873b0772f6b67db 100644 (file)
@@ -5,6 +5,8 @@
 defmodule Pleroma.Instances.Instance do
   @moduledoc "Instance."
 
+  @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
+
   alias Pleroma.Instances
   alias Pleroma.Instances.Instance
   alias Pleroma.Repo
@@ -22,7 +24,8 @@ defmodule Pleroma.Instances.Instance do
     field(:host, :string)
     field(:unreachable_since, :naive_datetime_usec)
     field(:favicon, :string)
-    field(:favicon_updated_at, :naive_datetime)
+    field(:metadata_updated_at, :naive_datetime)
+    field(:nodeinfo, :map, default: %{})
 
     timestamps()
   end
@@ -31,7 +34,7 @@ defmodule Pleroma.Instances.Instance do
 
   def changeset(struct, params \\ %{}) do
     struct
-    |> cast(params, [:host, :unreachable_since, :favicon, :favicon_updated_at])
+    |> cast(params, [:host, :unreachable_since, :favicon, :nodeinfo, :metadata_updated_at])
     |> validate_required([:host])
     |> unique_constraint(:host)
   end
@@ -138,63 +141,144 @@ defmodule Pleroma.Instances.Instance do
 
   defp parse_datetime(datetime), do: datetime
 
-  def get_or_update_favicon(%URI{host: host} = instance_uri) do
-    existing_record = Repo.get_by(Instance, %{host: host})
+  def needs_update(nil), do: true
+
+  def needs_update(%Instance{metadata_updated_at: nil}), do: true
+
+  def needs_update(%Instance{metadata_updated_at: metadata_updated_at}) do
     now = NaiveDateTime.utc_now()
+    NaiveDateTime.diff(now, metadata_updated_at) > 86_400
+  end
 
-    if existing_record && existing_record.favicon_updated_at &&
-         NaiveDateTime.diff(now, existing_record.favicon_updated_at) < 86_400 do
-      existing_record.favicon
+  def local do
+    %Instance{
+      host: Pleroma.Web.Endpoint.host(),
+      favicon: Pleroma.Web.Endpoint.url() <> "/favicon.png",
+      nodeinfo: Pleroma.Web.Nodeinfo.NodeinfoController.raw_nodeinfo()
+    }
+  end
+
+  def update_metadata(%URI{host: host} = uri) do
+    Logger.info("Checking metadata for #{host}")
+    existing_record = Repo.get_by(Instance, %{host: host})
+
+    if reachable?(host) do
+      do_update_metadata(uri, existing_record)
     else
-      favicon = scrape_favicon(instance_uri)
+      {:discard, :unreachable}
+    end
+  end
 
-      if existing_record do
-        existing_record
-        |> changeset(%{favicon: favicon, favicon_updated_at: now})
-        |> Repo.update()
+  defp do_update_metadata(%URI{host: host} = uri, existing_record) do
+    if existing_record do
+      if needs_update(existing_record) do
+        Logger.info("Updating metadata for #{host}")
+        favicon = scrape_favicon(uri)
+        nodeinfo = scrape_nodeinfo(uri)
+
+        {:ok, instance} =
+          existing_record
+          |> changeset(%{
+            host: host,
+            favicon: favicon,
+            nodeinfo: nodeinfo,
+            metadata_updated_at: NaiveDateTime.utc_now()
+          })
+          |> Repo.update()
+
+        @cachex.put(:instances_cache, "instances:#{host}", instance)
       else
+        {:discard, "Does not require update"}
+      end
+    else
+      favicon = scrape_favicon(uri)
+      nodeinfo = scrape_nodeinfo(uri)
+
+      Logger.info("Creating metadata for #{host}")
+
+      {:ok, instance} =
         %Instance{}
-        |> changeset(%{host: host, favicon: favicon, favicon_updated_at: now})
+        |> changeset(%{
+          host: host,
+          favicon: favicon,
+          nodeinfo: nodeinfo,
+          metadata_updated_at: NaiveDateTime.utc_now()
+        })
         |> Repo.insert()
-      end
 
-      favicon
+      @cachex.put(:instances_cache, "instances:#{host}", instance)
     end
-  rescue
-    e ->
-      Logger.warn("Instance.get_or_update_favicon(\"#{host}\") error: #{inspect(e)}")
+  end
+
+  def get_favicon(%URI{host: host}) do
+    existing_record = Repo.get_by(Instance, %{host: host})
+
+    if existing_record do
+      existing_record.favicon
+    else
       nil
+    end
   end
 
-  defp scrape_favicon(%URI{} = instance_uri) do
-    try do
-      with {_, true} <- {:reachable, reachable?(instance_uri.host)},
-           {:ok, %Tesla.Env{body: html}} <-
-             Pleroma.HTTP.get(to_string(instance_uri), [{"accept", "text/html"}], []),
-           {_, [favicon_rel | _]} when is_binary(favicon_rel) <-
-             {:parse,
-              html |> Floki.parse_document!() |> Floki.attribute("link[rel=icon]", "href")},
-           {_, favicon} when is_binary(favicon) <-
-             {:merge, URI.merge(instance_uri, favicon_rel) |> to_string()} do
-        favicon
-      else
-        {:reachable, false} ->
-          Logger.debug(
-            "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") ignored unreachable host"
-          )
+  defp scrape_nodeinfo(%URI{} = instance_uri) do
+    with true <- Pleroma.Config.get([:instances_nodeinfo, :enabled]),
+         {_, true} <- {:reachable, reachable?(instance_uri.host)},
+         {:ok, %Tesla.Env{status: 200, body: body}} <-
+           Tesla.get(
+             "https://#{instance_uri.host}/.well-known/nodeinfo",
+             headers: [{"Accept", "application/json"}]
+           ),
+         {:ok, json} <- Jason.decode(body),
+         {:ok, %{"links" => links}} <- {:ok, json},
+         {:ok, %{"href" => href}} <-
+           {:ok,
+            Enum.find(links, &(&1["rel"] == "http://nodeinfo.diaspora.software/ns/schema/2.0"))},
+         {:ok, %Tesla.Env{body: data}} <-
+           Pleroma.HTTP.get(href, [{"accept", "application/json"}], []),
+         {:length, true} <- {:length, String.length(data) < 50_000},
+         {:ok, nodeinfo} <- Jason.decode(data) do
+      nodeinfo
+    else
+      {:reachable, false} ->
+        Logger.debug(
+          "Instance.scrape_nodeinfo(\"#{to_string(instance_uri)}\") ignored unreachable host"
+        )
 
-          nil
+        nil
 
-        _ ->
-          nil
-      end
-    rescue
-      e ->
-        Logger.warn(
-          "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") error: #{inspect(e)}"
+      {:length, false} ->
+        Logger.debug(
+          "Instance.scrape_nodeinfo(\"#{to_string(instance_uri)}\") ignored too long body"
         )
 
         nil
+
+      _ ->
+        nil
+    end
+  end
+
+  defp scrape_favicon(%URI{} = instance_uri) do
+    with true <- Pleroma.Config.get([:instances_favicons, :enabled]),
+         {_, true} <- {:reachable, reachable?(instance_uri.host)},
+         {:ok, %Tesla.Env{body: html}} <-
+           Pleroma.HTTP.get(to_string(instance_uri), [{"accept", "text/html"}], []),
+         {_, [favicon_rel | _]} when is_binary(favicon_rel) <-
+           {:parse, html |> Floki.parse_document!() |> Floki.attribute("link[rel=icon]", "href")},
+         {_, favicon} when is_binary(favicon) <-
+           {:merge, URI.merge(instance_uri, favicon_rel) |> to_string()},
+         {:length, true} <- {:length, String.length(favicon) < 255} do
+      favicon
+    else
+      {:reachable, false} ->
+        Logger.debug(
+          "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") ignored unreachable host"
+        )
+
+        nil
+
+      _ ->
+        nil
     end
   end
 
@@ -217,4 +301,25 @@ defmodule Pleroma.Instances.Instance do
     end)
     |> Stream.run()
   end
+
+  def get_by_url(url_or_host) do
+    url = host(url_or_host)
+    Repo.get_by(Instance, host: url)
+  end
+
+  def get_cached_by_url(url_or_host) do
+    url = host(url_or_host)
+
+    if url == Pleroma.Web.Endpoint.host() do
+      {:ok, local()}
+    else
+      @cachex.fetch!(:instances_cache, "instances:#{url}", fn _ ->
+        with %Instance{} = instance <- get_by_url(url) do
+          {:commit, {:ok, instance}}
+        else
+          _ -> {:ignore, nil}
+        end
+      end)
+    end
+  end
 end