|> Repo.delete_all(timeout: :infinity)
if Keyword.get(options, :prune_orphaned_activities) do
+ # Prune activities who link to a single object
"""
delete from public.activities
where id in (
- select a.id from public.activities a
- left join public.objects o on a.data ->> 'object' = o.data ->> 'id'
- left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id'
- left join public.users u on a.data ->> 'object' = u.ap_id
- -- Only clean up remote activities
- where not a.local
- -- For now we only focus on activities with direct links to objects
- -- e.g. not json objects (in case of embedded objects) or json arrays (in case of multiple objects)
- and jsonb_typeof(a."data" -> 'object') = 'string'
- -- Find Activities that don't have existing objects
- and o.id is null
- and a2.id is null
- and u.id is null
+ select a.id from public.activities a
+ left join public.objects o on a.data ->> 'object' = o.data ->> 'id'
+ left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id'
+ left join public.users u on a.data ->> 'object' = u.ap_id
+ where not a.local
+ and jsonb_typeof(a."data" -> 'object') = 'string'
+ and o.id is null
+ and a2.id is null
+ and u.id is null
)
"""
- |> Repo.query()
+ |> Repo.query([], timeout: :infinity)
+
+ # Prune activities who link to an array of objects
+ """
+ delete from public.activities
+ where id in (
+ select a.id from public.activities a
+ join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array'
+ left join public.objects o on j.value = o.data ->> 'id'
+ left join public.activities a2 on j.value = a2.data ->> 'id'
+ left join public.users u on j.value = u.ap_id
+ group by a.id
+ having max(o.data ->> 'id') is null
+ and max(a2.data ->> 'id') is null
+ and max(u.ap_id) is null
+ )
+ """
+ |> Repo.query([], timeout: :infinity)
end
- prune_hashtags_query = """
+ """
DELETE FROM hashtags AS ht
WHERE NOT EXISTS (
SELECT 1 FROM hashtags_objects hto
WHERE ht.id = hto.hashtag_id)
"""
-
- Repo.query(prune_hashtags_query)
+ |> Repo.query()
if Keyword.get(options, :vacuum) do
Maintenance.vacuum("full")
assert length(Repo.all(Object)) == 1
end
- test "We don't have unexpected tables which can contain objects that are referenced by activities" do
+ test "We don't have unexpected tables which may contain objects that are referenced by activities" do
# We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table.
# If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we
# add logic for that in the 'prune_objects' task so that we don't wrongly delete their corresponding activities.
assert length(activities) == 4
end
+
+ test "it prunes orphaned activities with the --prune-orphaned-activities when the objects are referenced from an array" do
+ %Object{} |> Map.merge(%{data: %{"id" => "existing_object"}}) |> Repo.insert()
+ %User{} |> Map.merge(%{ap_id: "existing_actor"}) |> Repo.insert()
+
+ # Multiple objects, one object exists (keep)
+ %Activity{}
+ |> Map.merge(%{
+ local: false,
+ data: %{
+ "id" => "remote_activity_existing_object",
+ "object" => ["non_ existing_object", "existing_object"]
+ }
+ })
+ |> Repo.insert()
+
+ # Multiple objects, one actor exists (keep)
+ %Activity{}
+ |> Map.merge(%{
+ local: false,
+ data: %{
+ "id" => "remote_activity_existing_actor",
+ "object" => ["non_ existing_object", "existing_actor"]
+ }
+ })
+ |> Repo.insert()
+
+ # Multiple objects, one activity exists (keep)
+ %Activity{}
+ |> Map.merge(%{
+ local: false,
+ data: %{
+ "id" => "remote_activity_existing_activity",
+ "object" => ["non_ existing_object", "remote_activity_existing_actor"]
+ }
+ })
+ |> Repo.insert()
+
+ # Multiple objects none exist (prune)
+ %Activity{}
+ |> Map.merge(%{
+ local: false,
+ data: %{
+ "id" => "remote_activity_without_existing_referenced_object",
+ "object" => ["owo", "whats_this"]
+ }
+ })
+ |> Repo.insert()
+
+ assert length(Repo.all(Activity)) == 4
+ Mix.Tasks.Pleroma.Database.run(["prune_objects"])
+ assert length(Repo.all(Activity)) == 4
+ Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"])
+ activities = Repo.all(Activity)
+ assert length(activities) == 3
+
+ assert "remote_activity_without_existing_referenced_object" not in Enum.map(
+ activities,
+ fn a -> a.data["id"] end
+ )
+
+ assert length(activities) == 3
+ end
end
describe "running update_users_following_followers_counts" do