OptionParser.parse(
args,
strict: [
- vacuum: :boolean
+ vacuum: :boolean,
+ keep_threads: :boolean,
+ keep_non_public: :boolean
]
)
start_pleroma()
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days])
+ time_deadline = NaiveDateTime.utc_now() |> NaiveDateTime.add(-(deadline * 86_400))
- Logger.info("Pruning objects older than #{deadline} days")
+ log_message = "Pruning objects older than #{deadline} days"
- time_deadline =
- NaiveDateTime.utc_now()
- |> NaiveDateTime.add(-(deadline * 86_400))
+ log_message =
+ if Keyword.get(options, :keep_non_public) do
+ log_message <> ", keeping non public posts"
+ else
+ log_message
+ end
- from(o in Object,
- where:
- fragment(
- "?->'to' \\? ? OR ?->'cc' \\? ?",
- o.data,
- ^Pleroma.Constants.as_public(),
- o.data,
- ^Pleroma.Constants.as_public()
- ),
- where: o.inserted_at < ^time_deadline,
- where:
+ log_message =
+ if Keyword.get(options, :keep_threads) do
+ log_message <> ", keeping threads intact"
+ else
+ log_message
+ end
+
+ Logger.info(log_message)
+
+ if Keyword.get(options, :keep_threads) do
+ # We want to delete objects from threads where
+ # 1. the newest post is still old
+ # 2. none of the activities is local
+ # 3. none of the activities is bookmarked
+ # 4. optionally none of the posts is non-public
+ deletable_context =
+ if Keyword.get(options, :keep_non_public) do
+ Pleroma.Activity
+ |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
+ |> group_by([a], fragment("? ->> 'context'::text", a.data))
+ |> having(
+ [a],
+ not fragment(
+ # Posts (checked on Create Activity) is non-public
+ "bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')",
+ a.data,
+ ^Pleroma.Constants.as_public(),
+ a.data,
+ ^Pleroma.Constants.as_public(),
+ a.data
+ )
+ )
+ else
+ Pleroma.Activity
+ |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
+ |> group_by([a], fragment("? ->> 'context'::text", a.data))
+ end
+ |> having([a], max(a.updated_at) < ^time_deadline)
+ |> having([a], not fragment("bool_or(?)", a.local))
+ |> having([_, b], fragment("max(?::text) is null", b.id))
+ |> select([a], fragment("? ->> 'context'::text", a.data))
+
+ Pleroma.Object
+ |> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context))
+ else
+ if Keyword.get(options, :keep_non_public) do
+ Pleroma.Object
+ |> where(
+ [o],
+ fragment(
+ "?->'to' \\? ? OR ?->'cc' \\? ?",
+ o.data,
+ ^Pleroma.Constants.as_public(),
+ o.data,
+ ^Pleroma.Constants.as_public()
+ )
+ )
+ else
+ Pleroma.Object
+ end
+ |> where([o], o.updated_at < ^time_deadline)
+ |> where(
+ [o],
fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
- )
+ )
+ end
|> Repo.delete_all(timeout: :infinity)
prune_hashtags_query = """
describe "prune_objects" do
test "it prunes old objects from the database" do
+ deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
+
+ date =
+ Timex.now()
+ |> Timex.shift(days: -deadline)
+ |> Timex.to_naive_datetime()
+ |> NaiveDateTime.truncate(:second)
+
insert(:note)
+
+ %{id: note_remote_public_id} =
+ :note
+ |> insert()
+ |> Ecto.Changeset.change(%{updated_at: date})
+ |> Repo.update!()
+
+ note_remote_non_public =
+ %{id: note_remote_non_public_id, data: note_remote_non_public_data} =
+ :note
+ |> insert()
+
+ note_remote_non_public
+ |> Ecto.Changeset.change(%{
+ updated_at: date,
+ data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end)
+ })
+ |> Repo.update!()
+
+ assert length(Repo.all(Object)) == 3
+
+ Mix.Tasks.Pleroma.Database.run(["prune_objects"])
+
+ assert length(Repo.all(Object)) == 1
+ refute Object.get_by_id(note_remote_public_id)
+ refute Object.get_by_id(note_remote_non_public_id)
+ end
+
+ test "with the --keep-non-public option it still keeps non-public posts even if they are not local" do
deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
date =
|> Timex.to_naive_datetime()
|> NaiveDateTime.truncate(:second)
- %{id: id} =
+ insert(:note)
+
+ %{id: note_remote_id} =
:note
|> insert()
- |> Ecto.Changeset.change(%{inserted_at: date})
+ |> Ecto.Changeset.change(%{updated_at: date})
|> Repo.update!()
+ note_remote_non_public =
+ %{data: note_remote_non_public_data} =
+ :note
+ |> insert()
+
+ note_remote_non_public
+ |> Ecto.Changeset.change(%{
+ updated_at: date,
+ data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end)
+ })
+ |> Repo.update!()
+
+ assert length(Repo.all(Object)) == 3
+
+ Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-non-public"])
+
assert length(Repo.all(Object)) == 2
+ refute Object.get_by_id(note_remote_id)
+ end
- Mix.Tasks.Pleroma.Database.run(["prune_objects"])
+ test "with the --keep-threads and --keep-non-public option it keeps old threads with non-public replies even if the interaction is not local" do
+ # For non-public we only check Create Activities because only these are relevant for threads
+ # Flags are always non-public, Announces from relays can be non-public...
+ deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
+
+ old_insert_date =
+ Timex.now()
+ |> Timex.shift(days: -deadline)
+ |> Timex.to_naive_datetime()
+ |> NaiveDateTime.truncate(:second)
+
+ remote_user1 = insert(:user, local: false)
+ remote_user2 = insert(:user, local: false)
+
+ # Old remote non-public reply (should be kept)
+ {:ok, old_remote_post1_activity} =
+ CommonAPI.post(remote_user1, %{status: "some thing", local: false})
+
+ old_remote_post1_activity
+ |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
+ |> Repo.update!()
+
+ {:ok, old_remote_non_public_reply_activity} =
+ CommonAPI.post(remote_user2, %{
+ status: "some reply",
+ in_reply_to_status_id: old_remote_post1_activity.id
+ })
+
+ old_remote_non_public_reply_activity
+ |> Ecto.Changeset.change(%{
+ local: false,
+ updated_at: old_insert_date,
+ data: old_remote_non_public_reply_activity.data |> update_in(["to"], fn _ -> [] end)
+ })
+ |> Repo.update!()
+
+ # Old remote non-public Announce (should be removed)
+ {:ok, old_remote_post2_activity = %{data: %{"object" => old_remote_post2_id}}} =
+ CommonAPI.post(remote_user1, %{status: "some thing", local: false})
+
+ old_remote_post2_activity
+ |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
+ |> Repo.update!()
+
+ {:ok, old_remote_non_public_repeat_activity} =
+ CommonAPI.repeat(old_remote_post2_activity.id, remote_user2)
+
+ old_remote_non_public_repeat_activity
+ |> Ecto.Changeset.change(%{
+ local: false,
+ updated_at: old_insert_date,
+ data: old_remote_non_public_repeat_activity.data |> update_in(["to"], fn _ -> [] end)
+ })
+ |> Repo.update!()
+
+ assert length(Repo.all(Object)) == 3
+
+ Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads", "--keep-non-public"])
+
+ Repo.all(Pleroma.Activity)
+ assert length(Repo.all(Object)) == 2
+ refute Object.get_by_ap_id(old_remote_post2_id)
+ end
+
+ test "with the --keep-threads option it still keeps non-old threads even with no local interactions" do
+ remote_user = insert(:user, local: false)
+ remote_user2 = insert(:user, local: false)
+
+ {:ok, remote_post_activity} =
+ CommonAPI.post(remote_user, %{status: "some thing", local: false})
+
+ {:ok, remote_post_reply_activity} =
+ CommonAPI.post(remote_user2, %{
+ status: "some reply",
+ in_reply_to_status_id: remote_post_activity.id
+ })
+
+ remote_post_activity
+ |> Ecto.Changeset.change(%{local: false})
+ |> Repo.update!()
+
+ remote_post_reply_activity
+ |> Ecto.Changeset.change(%{local: false})
+ |> Repo.update!()
+
+ assert length(Repo.all(Object)) == 2
+
+ Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
+
+ assert length(Repo.all(Object)) == 2
+ end
+
+ test "with the --keep-threads option it deletes old threads with no local interaction" do
+ deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
+
+ old_insert_date =
+ Timex.now()
+ |> Timex.shift(days: -deadline)
+ |> Timex.to_naive_datetime()
+ |> NaiveDateTime.truncate(:second)
+
+ remote_user = insert(:user, local: false)
+ remote_user2 = insert(:user, local: false)
+
+ {:ok, old_remote_post_activity} =
+ CommonAPI.post(remote_user, %{status: "some thing", local: false})
+
+ old_remote_post_activity
+ |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
+ |> Repo.update!()
+
+ {:ok, old_remote_post_reply_activity} =
+ CommonAPI.post(remote_user2, %{
+ status: "some reply",
+ in_reply_to_status_id: old_remote_post_activity.id
+ })
+
+ old_remote_post_reply_activity
+ |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
+ |> Repo.update!()
+
+ {:ok, old_favourite_activity} =
+ CommonAPI.favorite(remote_user2, old_remote_post_activity.id)
+
+ old_favourite_activity
+ |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
+ |> Repo.update!()
+
+ {:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post_activity.id, remote_user2)
+
+ old_repeat_activity
+ |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
+ |> Repo.update!()
+
+ assert length(Repo.all(Object)) == 2
+
+ Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
+
+ assert length(Repo.all(Object)) == 0
+ end
+
+ test "with the --keep-threads option it keeps old threads with local interaction" do
+ deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
+
+ old_insert_date =
+ Timex.now()
+ |> Timex.shift(days: -deadline)
+ |> Timex.to_naive_datetime()
+ |> NaiveDateTime.truncate(:second)
+
+ remote_user = insert(:user, local: false)
+ local_user = insert(:user, local: true)
+
+ # local reply
+ {:ok, old_remote_post1_activity} =
+ CommonAPI.post(remote_user, %{status: "some thing", local: false})
+
+ old_remote_post1_activity
+ |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
+ |> Repo.update!()
+
+ {:ok, old_local_post2_reply_activity} =
+ CommonAPI.post(local_user, %{
+ status: "some reply",
+ in_reply_to_status_id: old_remote_post1_activity.id
+ })
+
+ old_local_post2_reply_activity
+ |> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
+ |> Repo.update!()
+
+ # local Like
+ {:ok, old_remote_post3_activity} =
+ CommonAPI.post(remote_user, %{status: "some thing", local: false})
+
+ old_remote_post3_activity
+ |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
+ |> Repo.update!()
+
+ {:ok, old_favourite_activity} = CommonAPI.favorite(local_user, old_remote_post3_activity.id)
+
+ old_favourite_activity
+ |> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
+ |> Repo.update!()
+
+ # local Announce
+ {:ok, old_remote_post4_activity} =
+ CommonAPI.post(remote_user, %{status: "some thing", local: false})
+
+ old_remote_post4_activity
+ |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
+ |> Repo.update!()
+
+ {:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post4_activity.id, local_user)
+
+ old_repeat_activity
+ |> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
+ |> Repo.update!()
+
+ assert length(Repo.all(Object)) == 4
+
+ Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
+
+ assert length(Repo.all(Object)) == 4
+ end
+
+ test "with the --keep-threads option it keeps old threads with bookmarked posts" do
+ deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
+
+ old_insert_date =
+ Timex.now()
+ |> Timex.shift(days: -deadline)
+ |> Timex.to_naive_datetime()
+ |> NaiveDateTime.truncate(:second)
+
+ remote_user = insert(:user, local: false)
+ local_user = insert(:user, local: true)
+
+ {:ok, old_remote_post_activity} =
+ CommonAPI.post(remote_user, %{status: "some thing", local: false})
+
+ old_remote_post_activity
+ |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
+ |> Repo.update!()
+
+ Pleroma.Bookmark.create(local_user.id, old_remote_post_activity.id)
+
+ assert length(Repo.all(Object)) == 1
+
+ Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
assert length(Repo.all(Object)) == 1
- refute Object.get_by_id(id)
end
end