Add collection fetching module
authorFloatingGhost <hannah@coffee-and-dreams.uk>
Sun, 3 Jul 2022 18:20:59 +0000 (19:20 +0100)
committerFloatingGhost <hannah@coffee-and-dreams.uk>
Sun, 3 Jul 2022 18:20:59 +0000 (19:20 +0100)
config/config.exs
lib/pleroma/collections/fetcher.ex [new file with mode: 0644]
test/fixtures/collections/ordered_array.json [new file with mode: 0644]
test/fixtures/collections/unordered_array.json [new file with mode: 0644]
test/fixtures/collections/unordered_page_embedded.json [new file with mode: 0644]
test/fixtures/collections/unordered_page_first.json [new file with mode: 0644]
test/fixtures/collections/unordered_page_reference.json [new file with mode: 0644]
test/fixtures/collections/unordered_page_second.json [new file with mode: 0644]
test/pleroma/collections/collections_fetcher_test.exs [new file with mode: 0644]

index eb39155df4b66ac3f222f079ff993e5a30a514ab..914b5db618381f2fb43b933a806819b2f6d2557d 100644 (file)
@@ -363,7 +363,8 @@ config :pleroma, :activitypub,
   follow_handshake_timeout: 500,
   note_replies_output_limit: 5,
   sign_object_fetches: true,
-  authorized_fetch_mode: false
+  authorized_fetch_mode: false,
+  max_collection_objects: 50
 
 config :pleroma, :streamer,
   workers: 3,
diff --git a/lib/pleroma/collections/fetcher.ex b/lib/pleroma/collections/fetcher.ex
new file mode 100644 (file)
index 0000000..205c62b
--- /dev/null
@@ -0,0 +1,68 @@
+# Akkoma: The cooler fediverse server
+# Copyright © 2022- Akkoma Authors <https://akkoma.dev/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Akkoma.Collections.Fetcher do
+  @moduledoc """
+  Activitypub Collections fetching functions
+  see: https://www.w3.org/TR/activitystreams-core/#paging
+  """
+  alias Pleroma.Object.Fetcher
+  alias Pleroma.Config
+
+  def fetch_collection_by_ap_id(ap_id) when is_binary(ap_id) do
+    fetch_collection(ap_id)
+  end
+
+  defp fetch_collection(ap_id) do
+    with {:ok, page} <- Fetcher.fetch_and_contain_remote_object_from_id(ap_id) do
+      {:ok, objects_from_collection(page)}
+    end
+  end
+
+  defp items_in_page(%{"type" => type, "orderedItems" => items})
+       when is_list(items) and type in ["OrderedCollection", "OrderedCollectionPage"],
+       do: items
+
+  defp items_in_page(%{"type" => type, "items" => items})
+       when is_list(items) and type in ["Collection", "CollectionPage"],
+       do: items
+
+  defp objects_from_collection(%{"type" => "OrderedCollection", "orderedItems" => items})
+       when is_list(items),
+       do: items
+
+  defp objects_from_collection(%{"type" => "Collection", "items" => items}) when is_list(items),
+    do: items
+
+  defp objects_from_collection(%{"type" => type, "first" => first})
+       when is_binary(first) and type in ["Collection", "OrderedCollection"] do
+    fetch_page_items(first)
+  end
+
+  defp objects_from_collection(%{"type" => type, "first" => %{"id" => id}})
+       when is_binary(id) and type in ["Collection", "OrderedCollection"] do
+    fetch_page_items(id)
+  end
+
+  defp fetch_page_items(id, items \\ []) do
+    if Enum.count(items) >= Config.get([:activitypub, :max_collection_objects]) do
+      items
+    else
+      {:ok, page} = Fetcher.fetch_and_contain_remote_object_from_id(id)
+      objects = items_in_page(page)
+
+      if Enum.count(objects) > 0 do
+        maybe_next_page(page, items ++ objects)
+      else
+        items
+      end
+    end
+  end
+
+  defp maybe_next_page(%{"next" => id}, items) when is_binary(id) do
+    fetch_page_items(id, items)
+  end
+
+  defp maybe_next_page(_, items), do: items
+end
diff --git a/test/fixtures/collections/ordered_array.json b/test/fixtures/collections/ordered_array.json
new file mode 100644 (file)
index 0000000..1f196c6
--- /dev/null
@@ -0,0 +1,19 @@
+{
+  "@context": "https://www.w3.org/ns/activitystreams",
+  "id": "https://example.com/collection/ordered_array",
+  "summary": "Object history",
+  "type": "OrderedCollection",
+  "totalItems": 2,
+  "orderedItems": [
+    {
+      "type": "Create",
+      "actor": "http://www.test.example/sally",
+      "object": "http://example.org/foo"
+    },
+    {
+      "type": "Like",
+      "actor": "http://www.test.example/joe",
+      "object": "http://example.org/foo"
+    }
+  ]
+}
diff --git a/test/fixtures/collections/unordered_array.json b/test/fixtures/collections/unordered_array.json
new file mode 100644 (file)
index 0000000..05d9f81
--- /dev/null
@@ -0,0 +1,19 @@
+{
+  "@context": "https://www.w3.org/ns/activitystreams",
+  "id": "https://example.com/collection/unordered_array",
+  "summary": "Object history",
+  "type": "Collection",
+  "totalItems": 2,
+  "items": [
+    {
+      "type": "Create",
+      "actor": "http://www.test.example/sally",
+      "object": "http://example.org/foo"
+    },
+    {
+      "type": "Like",
+      "actor": "http://www.test.example/joe",
+      "object": "http://example.org/foo"
+    }
+  ]
+}
diff --git a/test/fixtures/collections/unordered_page_embedded.json b/test/fixtures/collections/unordered_page_embedded.json
new file mode 100644 (file)
index 0000000..01f9230
--- /dev/null
@@ -0,0 +1,20 @@
+{
+  "@context": "https://www.w3.org/ns/activitystreams",
+  "summary": "Sally's recent activities",
+  "type": "Collection",
+  "id": "http://example.org/foo",
+  "totalItems": 10,
+  "first": {
+    "type": "CollectionPage",
+    "id": "http://example.org/foo?page=1",
+    "partOf": "http://example.org/foo",
+    "next": "http://example.org/foo?page=2",
+    "items": [
+      {
+        "type": "Create",
+        "actor": "http://www.test.example/sally",
+        "object": "http://example.org/foo"
+      }
+    ]
+  }
+}
diff --git a/test/fixtures/collections/unordered_page_first.json b/test/fixtures/collections/unordered_page_first.json
new file mode 100644 (file)
index 0000000..f6d54f3
--- /dev/null
@@ -0,0 +1,13 @@
+{
+  "type": "CollectionPage",
+  "id": "https://example.com/collection/unordered_page_reference?page=1",
+  "partOf": "https://example.com/collection/unordered_page_reference",
+  "next": "https://example.com/collection/unordered_page_reference?page=2",
+  "items": [
+    {
+      "type": "Create",
+      "actor": "http://www.test.example/sally",
+      "object": "http://example.org/foo"
+    }
+  ]
+}
diff --git a/test/fixtures/collections/unordered_page_reference.json b/test/fixtures/collections/unordered_page_reference.json
new file mode 100644 (file)
index 0000000..7376e4f
--- /dev/null
@@ -0,0 +1,8 @@
+{
+  "@context": "https://www.w3.org/ns/activitystreams",
+  "summary": "Sally's recent activities",
+  "type": "Collection",
+  "id": "https://example.com/collection/unordered_page_reference",
+  "totalItems": 10,
+  "first": "https://example.com/collection/unordered_page_reference?page=1"
+}
diff --git a/test/fixtures/collections/unordered_page_second.json b/test/fixtures/collections/unordered_page_second.json
new file mode 100644 (file)
index 0000000..ee557cb
--- /dev/null
@@ -0,0 +1,12 @@
+{
+  "type": "CollectionPage",
+  "id": "https://example.com/collection/unordered_page_reference?page=2",
+  "partOf": "https://example.com/collection/unordered_page_reference",
+  "items": [
+    {
+      "type": "Like",
+      "actor": "http://www.test.example/sally",
+      "object": "http://example.org/foo"
+    }
+  ]
+}
diff --git a/test/pleroma/collections/collections_fetcher_test.exs b/test/pleroma/collections/collections_fetcher_test.exs
new file mode 100644 (file)
index 0000000..b9f84f5
--- /dev/null
@@ -0,0 +1,167 @@
+defmodule Akkoma.Collections.FetcherTest do
+  use Pleroma.DataCase
+  use Oban.Testing, repo: Pleroma.Repo
+
+  alias Akkoma.Collections.Fetcher
+
+  import Tesla.Mock
+
+  setup do
+    mock(fn env -> apply(HttpRequestMock, :request, [env]) end)
+    :ok
+  end
+
+  test "it should extract items from an embedded array in a Collection" do
+    unordered_collection =
+      "test/fixtures/collections/unordered_array.json"
+      |> File.read!()
+
+    ap_id = "https://example.com/collection/ordered_array"
+
+    Tesla.Mock.mock(fn
+      %{
+        method: :get,
+        url: ^ap_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: unordered_collection,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+    end)
+
+    {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
+    assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects
+  end
+
+  test "it should extract items from an embedded array in an OrderedCollection" do
+    ordered_collection =
+      "test/fixtures/collections/ordered_array.json"
+      |> File.read!()
+
+    ap_id = "https://example.com/collection/ordered_array"
+
+    Tesla.Mock.mock(fn
+      %{
+        method: :get,
+        url: ^ap_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: ordered_collection,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+    end)
+
+    {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
+    assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects
+  end
+
+  test "it should extract items from an referenced first page in a Collection" do
+    unordered_collection =
+      "test/fixtures/collections/unordered_page_reference.json"
+      |> File.read!()
+
+    first_page =
+      "test/fixtures/collections/unordered_page_first.json"
+      |> File.read!()
+
+    second_page =
+      "test/fixtures/collections/unordered_page_second.json"
+      |> File.read!()
+
+    ap_id = "https://example.com/collection/unordered_page_reference"
+    first_page_id = "https://example.com/collection/unordered_page_reference?page=1"
+    second_page_id = "https://example.com/collection/unordered_page_reference?page=2"
+
+    Tesla.Mock.mock(fn
+      %{
+        method: :get,
+        url: ^ap_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: unordered_collection,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+
+      %{
+        method: :get,
+        url: ^first_page_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: first_page,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+
+      %{
+        method: :get,
+        url: ^second_page_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: second_page,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+    end)
+
+    {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
+    assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects
+  end
+
+  test "it should stop fetching when we hit :max_collection_objects" do
+    clear_config([:activitypub, :max_collection_objects], 1)
+
+    unordered_collection =
+      "test/fixtures/collections/unordered_page_reference.json"
+      |> File.read!()
+
+    first_page =
+      "test/fixtures/collections/unordered_page_first.json"
+      |> File.read!()
+
+    second_page =
+      "test/fixtures/collections/unordered_page_second.json"
+      |> File.read!()
+
+    ap_id = "https://example.com/collection/unordered_page_reference"
+    first_page_id = "https://example.com/collection/unordered_page_reference?page=1"
+    second_page_id = "https://example.com/collection/unordered_page_reference?page=2"
+
+    Tesla.Mock.mock(fn
+      %{
+        method: :get,
+        url: ^ap_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: unordered_collection,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+
+      %{
+        method: :get,
+        url: ^first_page_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: first_page,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+
+      %{
+        method: :get,
+        url: ^second_page_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: second_page,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+    end)
+
+    {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
+    assert [%{"type" => "Create"}] = objects
+  end
+end