Merge pull request 'Update development docs' (#43) from norm/akkoma:docs-development...
authorfloatingghost <hannah@coffee-and-dreams.uk>
Mon, 4 Jul 2022 15:49:08 +0000 (15:49 +0000)
committerfloatingghost <hannah@coffee-and-dreams.uk>
Mon, 4 Jul 2022 15:49:08 +0000 (15:49 +0000)
Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/43

15 files changed:
config/config.exs
config/description.exs
docs/configuration/cheatsheet.md
lib/pleroma/collections/fetcher.ex [new file with mode: 0644]
lib/pleroma/web/activity_pub/activity_pub.ex
test/fixtures/collections/ordered_array.json [new file with mode: 0644]
test/fixtures/collections/unordered_array.json [new file with mode: 0644]
test/fixtures/collections/unordered_page_embedded.json [new file with mode: 0644]
test/fixtures/collections/unordered_page_first.json [new file with mode: 0644]
test/fixtures/collections/unordered_page_reference.json [new file with mode: 0644]
test/fixtures/collections/unordered_page_second.json [new file with mode: 0644]
test/fixtures/friendica/friendica_featured_collection.json [new file with mode: 0644]
test/fixtures/friendica/friendica_featured_collection_first.json [new file with mode: 0644]
test/pleroma/collections/collections_fetcher_test.exs [new file with mode: 0644]
test/pleroma/web/activity_pub/activity_pub_test.exs

index eb39155df4b66ac3f222f079ff993e5a30a514ab..914b5db618381f2fb43b933a806819b2f6d2557d 100644 (file)
@@ -363,7 +363,8 @@ config :pleroma, :activitypub,
   follow_handshake_timeout: 500,
   note_replies_output_limit: 5,
   sign_object_fetches: true,
-  authorized_fetch_mode: false
+  authorized_fetch_mode: false,
+  max_collection_objects: 50
 
 config :pleroma, :streamer,
   workers: 3,
index 9401bed5c506cb60990f3b091b9310d8deeab405..3777905a36ddc0af2b412faaf8fcd63a278e7b4c 100644 (file)
@@ -1689,6 +1689,13 @@ config :pleroma, :config_description, [
         type: :integer,
         description: "Following handshake timeout",
         suggestions: [500]
+      },
+      %{
+        key: :max_collection_objects,
+        type: :integer,
+        description:
+          "The maximum number of items to fetch from a remote collections. Setting this too low can lead to only getting partial collections, but too high and you can end up fetching far too many objects.",
+        suggestions: [50]
       }
     ]
   },
index 3097f1190323d99d7f97879a1060b0c35811c846..11083e831527b0ae07b4705f77622f0b7f8e217a 100644 (file)
@@ -236,6 +236,7 @@ Notes:
 * `deny_follow_blocked`: Whether to disallow following an account that has blocked the user in question
 * `sign_object_fetches`: Sign object fetches with HTTP signatures
 * `authorized_fetch_mode`: Require HTTP signatures for AP fetches
+* `max_collection_objects`: The maximum number of objects to fetch from a remote AP collection.
 
 ## Pleroma.User
 
diff --git a/lib/pleroma/collections/fetcher.ex b/lib/pleroma/collections/fetcher.ex
new file mode 100644 (file)
index 0000000..382deff
--- /dev/null
@@ -0,0 +1,77 @@
+# Akkoma: The cooler fediverse server
+# Copyright © 2022- Akkoma Authors <https://akkoma.dev/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Akkoma.Collections.Fetcher do
+  @moduledoc """
+  Activitypub Collections fetching functions
+  see: https://www.w3.org/TR/activitystreams-core/#paging
+  """
+  alias Pleroma.Object.Fetcher
+  alias Pleroma.Config
+
+  def fetch_collection_by_ap_id(ap_id) when is_binary(ap_id) do
+    fetch_collection(ap_id)
+  end
+
+  def fetch_collection(ap_id) when is_binary(ap_id) do
+    with {:ok, page} <- Fetcher.fetch_and_contain_remote_object_from_id(ap_id) do
+      {:ok, objects_from_collection(page)}
+    else
+      e ->
+        Logger.error("Could not fetch collection #{ap_id} - #{inspect(e)}")
+        e
+    end
+  end
+
+  def fetch_collection(%{"type" => type} = page)
+      when type in ["Collection", "OrderedCollection"] do
+    {:ok, objects_from_collection(page)}
+  end
+
+  defp items_in_page(%{"type" => type, "orderedItems" => items})
+       when is_list(items) and type in ["OrderedCollection", "OrderedCollectionPage"],
+       do: items
+
+  defp items_in_page(%{"type" => type, "items" => items})
+       when is_list(items) and type in ["Collection", "CollectionPage"],
+       do: items
+
+  defp objects_from_collection(%{"type" => "OrderedCollection", "orderedItems" => items})
+       when is_list(items),
+       do: items
+
+  defp objects_from_collection(%{"type" => "Collection", "items" => items}) when is_list(items),
+    do: items
+
+  defp objects_from_collection(%{"type" => type, "first" => first})
+       when is_binary(first) and type in ["Collection", "OrderedCollection"] do
+    fetch_page_items(first)
+  end
+
+  defp objects_from_collection(%{"type" => type, "first" => %{"id" => id}})
+       when is_binary(id) and type in ["Collection", "OrderedCollection"] do
+    fetch_page_items(id)
+  end
+
+  defp fetch_page_items(id, items \\ []) do
+    if Enum.count(items) >= Config.get([:activitypub, :max_collection_objects]) do
+      items
+    else
+      {:ok, page} = Fetcher.fetch_and_contain_remote_object_from_id(id)
+      objects = items_in_page(page)
+
+      if Enum.count(objects) > 0 do
+        maybe_next_page(page, items ++ objects)
+      else
+        items
+      end
+    end
+  end
+
+  defp maybe_next_page(%{"next" => id}, items) when is_binary(id) do
+    fetch_page_items(id, items)
+  end
+
+  defp maybe_next_page(_, items), do: items
+end
index e6548a8188a535d35cdfc29763bcce96cd63e94b..77f38f9f19bafb086bb2b4921a743d408c55f743 100644 (file)
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: AGPL-3.0-only
 
 defmodule Pleroma.Web.ActivityPub.ActivityPub do
+  alias Akkoma.Collections
   alias Pleroma.Activity
   alias Pleroma.Activity.Ir.Topics
   alias Pleroma.Config
@@ -1663,10 +1664,27 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
   end
 
   def pin_data_from_featured_collection(%{
-        "type" => type,
-        "orderedItems" => objects
-      })
+        "type" => "OrderedCollection",
+        "first" => first
+      }) do
+    with {:ok, page} <- Fetcher.fetch_and_contain_remote_object_from_id(first) do
+      page
+      |> Map.get("orderedItems")
+      |> Map.new(fn %{"id" => object_ap_id} -> {object_ap_id, NaiveDateTime.utc_now()} end)
+    else
+      e ->
+        Logger.error("Could not decode featured collection at fetch #{first}, #{inspect(e)}")
+        {:ok, %{}}
+    end
+  end
+
+  def pin_data_from_featured_collection(
+        %{
+          "type" => type
+        } = collection
+      )
       when type in ["OrderedCollection", "Collection"] do
+    {:ok, objects} = Collections.Fetcher.fetch_collection(collection)
     Map.new(objects, fn %{"id" => object_ap_id} -> {object_ap_id, NaiveDateTime.utc_now()} end)
   end
 
diff --git a/test/fixtures/collections/ordered_array.json b/test/fixtures/collections/ordered_array.json
new file mode 100644 (file)
index 0000000..1f196c6
--- /dev/null
@@ -0,0 +1,19 @@
+{
+  "@context": "https://www.w3.org/ns/activitystreams",
+  "id": "https://example.com/collection/ordered_array",
+  "summary": "Object history",
+  "type": "OrderedCollection",
+  "totalItems": 2,
+  "orderedItems": [
+    {
+      "type": "Create",
+      "actor": "http://www.test.example/sally",
+      "object": "http://example.org/foo"
+    },
+    {
+      "type": "Like",
+      "actor": "http://www.test.example/joe",
+      "object": "http://example.org/foo"
+    }
+  ]
+}
diff --git a/test/fixtures/collections/unordered_array.json b/test/fixtures/collections/unordered_array.json
new file mode 100644 (file)
index 0000000..05d9f81
--- /dev/null
@@ -0,0 +1,19 @@
+{
+  "@context": "https://www.w3.org/ns/activitystreams",
+  "id": "https://example.com/collection/unordered_array",
+  "summary": "Object history",
+  "type": "Collection",
+  "totalItems": 2,
+  "items": [
+    {
+      "type": "Create",
+      "actor": "http://www.test.example/sally",
+      "object": "http://example.org/foo"
+    },
+    {
+      "type": "Like",
+      "actor": "http://www.test.example/joe",
+      "object": "http://example.org/foo"
+    }
+  ]
+}
diff --git a/test/fixtures/collections/unordered_page_embedded.json b/test/fixtures/collections/unordered_page_embedded.json
new file mode 100644 (file)
index 0000000..01f9230
--- /dev/null
@@ -0,0 +1,20 @@
+{
+  "@context": "https://www.w3.org/ns/activitystreams",
+  "summary": "Sally's recent activities",
+  "type": "Collection",
+  "id": "http://example.org/foo",
+  "totalItems": 10,
+  "first": {
+    "type": "CollectionPage",
+    "id": "http://example.org/foo?page=1",
+    "partOf": "http://example.org/foo",
+    "next": "http://example.org/foo?page=2",
+    "items": [
+      {
+        "type": "Create",
+        "actor": "http://www.test.example/sally",
+        "object": "http://example.org/foo"
+      }
+    ]
+  }
+}
diff --git a/test/fixtures/collections/unordered_page_first.json b/test/fixtures/collections/unordered_page_first.json
new file mode 100644 (file)
index 0000000..f6d54f3
--- /dev/null
@@ -0,0 +1,13 @@
+{
+  "type": "CollectionPage",
+  "id": "https://example.com/collection/unordered_page_reference?page=1",
+  "partOf": "https://example.com/collection/unordered_page_reference",
+  "next": "https://example.com/collection/unordered_page_reference?page=2",
+  "items": [
+    {
+      "type": "Create",
+      "actor": "http://www.test.example/sally",
+      "object": "http://example.org/foo"
+    }
+  ]
+}
diff --git a/test/fixtures/collections/unordered_page_reference.json b/test/fixtures/collections/unordered_page_reference.json
new file mode 100644 (file)
index 0000000..7376e4f
--- /dev/null
@@ -0,0 +1,8 @@
+{
+  "@context": "https://www.w3.org/ns/activitystreams",
+  "summary": "Sally's recent activities",
+  "type": "Collection",
+  "id": "https://example.com/collection/unordered_page_reference",
+  "totalItems": 10,
+  "first": "https://example.com/collection/unordered_page_reference?page=1"
+}
diff --git a/test/fixtures/collections/unordered_page_second.json b/test/fixtures/collections/unordered_page_second.json
new file mode 100644 (file)
index 0000000..ee557cb
--- /dev/null
@@ -0,0 +1,12 @@
+{
+  "type": "CollectionPage",
+  "id": "https://example.com/collection/unordered_page_reference?page=2",
+  "partOf": "https://example.com/collection/unordered_page_reference",
+  "items": [
+    {
+      "type": "Like",
+      "actor": "http://www.test.example/sally",
+      "object": "http://example.org/foo"
+    }
+  ]
+}
diff --git a/test/fixtures/friendica/friendica_featured_collection.json b/test/fixtures/friendica/friendica_featured_collection.json
new file mode 100644 (file)
index 0000000..f640975
--- /dev/null
@@ -0,0 +1,29 @@
+{
+    "@context": [
+        "https://www.w3.org/ns/activitystreams",
+        "https://w3id.org/security/v1",
+        {
+            "vcard": "http://www.w3.org/2006/vcard/ns#",
+            "dfrn": "http://purl.org/macgirvin/dfrn/1.0/",
+            "diaspora": "https://diasporafoundation.org/ns/",
+            "litepub": "http://litepub.social/ns#",
+            "toot": "http://joinmastodon.org/ns#",
+            "featured": {
+                "@id": "toot:featured",
+                "@type": "@id"
+            },
+            "schema": "http://schema.org#",
+            "manuallyApprovesFollowers": "as:manuallyApprovesFollowers",
+            "sensitive": "as:sensitive",
+            "Hashtag": "as:Hashtag",
+            "directMessage": "litepub:directMessage",
+            "discoverable": "toot:discoverable",
+            "PropertyValue": "schema:PropertyValue",
+            "value": "schema:value"
+        }
+    ],
+    "id": "https://friendica.example.com/featured/raha",
+    "type": "OrderedCollection",
+    "totalItems": 0,
+    "first": "https://friendica.example.com/featured/raha?page=1"
+}
diff --git a/test/fixtures/friendica/friendica_featured_collection_first.json b/test/fixtures/friendica/friendica_featured_collection_first.json
new file mode 100644 (file)
index 0000000..1f9dce4
--- /dev/null
@@ -0,0 +1,34 @@
+{
+    "@context": [
+        "https://www.w3.org/ns/activitystreams",
+        "https://w3id.org/security/v1",
+        {
+            "vcard": "http://www.w3.org/2006/vcard/ns#",
+            "dfrn": "http://purl.org/macgirvin/dfrn/1.0/",
+            "diaspora": "https://diasporafoundation.org/ns/",
+            "litepub": "http://litepub.social/ns#",
+            "toot": "http://joinmastodon.org/ns#",
+            "featured": {
+                "@id": "toot:featured",
+                "@type": "@id"
+            },
+            "schema": "http://schema.org#",
+            "manuallyApprovesFollowers": "as:manuallyApprovesFollowers",
+            "sensitive": "as:sensitive",
+            "Hashtag": "as:Hashtag",
+            "directMessage": "litepub:directMessage",
+            "discoverable": "toot:discoverable",
+            "PropertyValue": "schema:PropertyValue",
+            "value": "schema:value"
+        }
+    ],
+    "id": "https://friendica.example.com/featured/raha?page=1",
+    "type": "OrderedCollectionPage",
+    "totalItems": 0,
+    "partOf": "https://friendica.example.com/featured/raha",
+    "orderedItems": [
+        {
+            "id": "http://inserted"
+        }
+     ]
+}
diff --git a/test/pleroma/collections/collections_fetcher_test.exs b/test/pleroma/collections/collections_fetcher_test.exs
new file mode 100644 (file)
index 0000000..b9f84f5
--- /dev/null
@@ -0,0 +1,167 @@
+defmodule Akkoma.Collections.FetcherTest do
+  use Pleroma.DataCase
+  use Oban.Testing, repo: Pleroma.Repo
+
+  alias Akkoma.Collections.Fetcher
+
+  import Tesla.Mock
+
+  setup do
+    mock(fn env -> apply(HttpRequestMock, :request, [env]) end)
+    :ok
+  end
+
+  test "it should extract items from an embedded array in a Collection" do
+    unordered_collection =
+      "test/fixtures/collections/unordered_array.json"
+      |> File.read!()
+
+    ap_id = "https://example.com/collection/ordered_array"
+
+    Tesla.Mock.mock(fn
+      %{
+        method: :get,
+        url: ^ap_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: unordered_collection,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+    end)
+
+    {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
+    assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects
+  end
+
+  test "it should extract items from an embedded array in an OrderedCollection" do
+    ordered_collection =
+      "test/fixtures/collections/ordered_array.json"
+      |> File.read!()
+
+    ap_id = "https://example.com/collection/ordered_array"
+
+    Tesla.Mock.mock(fn
+      %{
+        method: :get,
+        url: ^ap_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: ordered_collection,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+    end)
+
+    {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
+    assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects
+  end
+
+  test "it should extract items from an referenced first page in a Collection" do
+    unordered_collection =
+      "test/fixtures/collections/unordered_page_reference.json"
+      |> File.read!()
+
+    first_page =
+      "test/fixtures/collections/unordered_page_first.json"
+      |> File.read!()
+
+    second_page =
+      "test/fixtures/collections/unordered_page_second.json"
+      |> File.read!()
+
+    ap_id = "https://example.com/collection/unordered_page_reference"
+    first_page_id = "https://example.com/collection/unordered_page_reference?page=1"
+    second_page_id = "https://example.com/collection/unordered_page_reference?page=2"
+
+    Tesla.Mock.mock(fn
+      %{
+        method: :get,
+        url: ^ap_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: unordered_collection,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+
+      %{
+        method: :get,
+        url: ^first_page_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: first_page,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+
+      %{
+        method: :get,
+        url: ^second_page_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: second_page,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+    end)
+
+    {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
+    assert [%{"type" => "Create"}, %{"type" => "Like"}] = objects
+  end
+
+  test "it should stop fetching when we hit :max_collection_objects" do
+    clear_config([:activitypub, :max_collection_objects], 1)
+
+    unordered_collection =
+      "test/fixtures/collections/unordered_page_reference.json"
+      |> File.read!()
+
+    first_page =
+      "test/fixtures/collections/unordered_page_first.json"
+      |> File.read!()
+
+    second_page =
+      "test/fixtures/collections/unordered_page_second.json"
+      |> File.read!()
+
+    ap_id = "https://example.com/collection/unordered_page_reference"
+    first_page_id = "https://example.com/collection/unordered_page_reference?page=1"
+    second_page_id = "https://example.com/collection/unordered_page_reference?page=2"
+
+    Tesla.Mock.mock(fn
+      %{
+        method: :get,
+        url: ^ap_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: unordered_collection,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+
+      %{
+        method: :get,
+        url: ^first_page_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: first_page,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+
+      %{
+        method: :get,
+        url: ^second_page_id
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: second_page,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+    end)
+
+    {:ok, objects} = Fetcher.fetch_collection_by_ap_id(ap_id)
+    assert [%{"type" => "Create"}] = objects
+  end
+end
index 574ef0d7155da69623db2c7488495e5735a87e4a..2b65f59e0ac9335d54f83d6a6bafb04f79d69d82 100644 (file)
@@ -314,6 +314,44 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubTest do
     end
   end
 
+  test "fetches user featured collection using the first property" do
+    featured_url = "https://friendica.example.com/raha/collections/featured"
+    first_url = "https://friendica.example.com/featured/raha?page=1"
+
+    featured_data =
+      "test/fixtures/friendica/friendica_featured_collection.json"
+      |> File.read!()
+
+    page_data =
+      "test/fixtures/friendica/friendica_featured_collection_first.json"
+      |> File.read!()
+
+    Tesla.Mock.mock(fn
+      %{
+        method: :get,
+        url: ^featured_url
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: featured_data,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+
+      %{
+        method: :get,
+        url: ^first_url
+      } ->
+        %Tesla.Env{
+          status: 200,
+          body: page_data,
+          headers: [{"content-type", "application/activity+json"}]
+        }
+    end)
+
+    {:ok, data} = ActivityPub.fetch_and_prepare_featured_from_ap_id(featured_url)
+    assert Map.has_key?(data, "http://inserted")
+  end
+
   test "it fetches the appropriate tag-restricted posts" do
     user = insert(:user)