mix pleroma.database remove_embedded_objects [option ...]
```
-
### Options
- `--vacuum` - run `VACUUM FULL` after the embedded objects are replaced with their references
This will prune remote posts older than 90 days (configurable with [`config :pleroma, :instance, remote_post_retention_days`](../../configuration/cheatsheet.md#instance)) from the database. Pruned posts may be refetched in some cases.
+!!! note
+ The disk space will only be reclaimed after a proper vacuum. By default Postgresql does this for you on a regular basis, but if your instance has been running for a long time and there are many rows deleted, it may be advantageous to use `VACUUM FULL` (e.g. by using the `--vacuum` option).
+
!!! danger
- The disk space will only be reclaimed after `VACUUM FULL`. You may run out of disk space during the execution of the task or vacuuming if you don't have about 1/3rds of the database size free.
+ You may run out of disk space during the execution of the task or vacuuming if you don't have about 1/3rds of the database size free. Vacuum causes a substantial increase in I/O traffic, and may lead to a degraded experience while it is running.
=== "OTP"
### Options
-- `--keep-threads` - don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...)
-- `--keep-non-public` - keep non-public posts like DM's and followers-only, even if they are remote
-- `--vacuum` - run `VACUUM FULL` after the objects are pruned
+- `--keep-threads` - Don't prune posts when they are part of a thread where at least one post has seen local interaction (e.g. one of the posts is a local post, or is favourited by a local user, or has been repeated by a local user...). It also wont delete posts when at least one of the posts in that thread is kept (e.g. because one of the posts has seen recent activity).
+- `--keep-non-public` - Keep non-public posts like DM's and followers-only, even if they are remote.
+- `--prune-orphaned-activities` - Also prune orphaned activities afterwards. Activities are things like Like, Create, Announce, Flag (aka reports)... They can significantly help reduce the database size.
+- `--vacuum` - Run `VACUUM FULL` after the objects are pruned. This should not be used on a regular basis, but is useful if your instance has been running for a long time before pruning.
## Create a conversation for all existing DMs
## Vacuum the database
+!!! note
+ By default Postgresql has an autovacuum deamon running. While the tasks described here can help in some cases, they shouldn't be needed on a regular basis. See [the Postgresql docs on vacuuming](https://www.postgresql.org/docs/current/sql-vacuum.html) for more information on this.
+
### Analyze
Running an `analyze` vacuum job can improve performance by updating statistics used by the query planner. **It is safe to cancel this.**
strict: [
vacuum: :boolean,
keep_threads: :boolean,
- keep_non_public: :boolean
+ keep_non_public: :boolean,
+ prune_orphaned_activities: :boolean
]
)
log_message
end
+ log_message =
+ if Keyword.get(options, :prune_orphaned_activities) do
+ log_message <> ", pruning orphaned activities"
+ else
+ log_message
+ end
+
+ log_message =
+ if Keyword.get(options, :vacuum) do
+ log_message <>
+ ", doing a full vacuum (you shouldn't do this as a recurring maintanance task)"
+ else
+ log_message
+ end
+
Logger.info(log_message)
if Keyword.get(options, :keep_threads) do
end
|> Repo.delete_all(timeout: :infinity)
- prune_hashtags_query = """
+ if Keyword.get(options, :prune_orphaned_activities) do
+ # Prune activities who link to a single object
+ """
+ delete from public.activities
+ where id in (
+ select a.id from public.activities a
+ left join public.objects o on a.data ->> 'object' = o.data ->> 'id'
+ left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id'
+ left join public.users u on a.data ->> 'object' = u.ap_id
+ where not a.local
+ and jsonb_typeof(a."data" -> 'object') = 'string'
+ and o.id is null
+ and a2.id is null
+ and u.id is null
+ )
+ """
+ |> Repo.query([], timeout: :infinity)
+
+ # Prune activities who link to an array of objects
+ """
+ delete from public.activities
+ where id in (
+ select a.id from public.activities a
+ join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array'
+ left join public.objects o on j.value = o.data ->> 'id'
+ left join public.activities a2 on j.value = a2.data ->> 'id'
+ left join public.users u on j.value = u.ap_id
+ group by a.id
+ having max(o.data ->> 'id') is null
+ and max(a2.data ->> 'id') is null
+ and max(u.ap_id) is null
+ )
+ """
+ |> Repo.query([], timeout: :infinity)
+ end
+
+ """
DELETE FROM hashtags AS ht
WHERE NOT EXISTS (
SELECT 1 FROM hashtags_objects hto
WHERE ht.id = hto.hashtag_id)
"""
-
- Repo.query(prune_hashtags_query)
+ |> Repo.query()
if Keyword.get(options, :vacuum) do
Maintenance.vacuum("full")
assert length(Repo.all(Object)) == 1
end
+
+ test "We don't have unexpected tables which may contain objects that are referenced by activities" do
+ # We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table.
+ # If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we
+ # add logic for that in the 'prune_objects' task so that we don't wrongly delete their corresponding activities.
+ # So when someone adds (or removes) a table, this test will fail.
+ # Either the table contains objects which can be referenced from the activities table
+ # => in that case the prune_objects job should be adapted so we don't delete activities who still have the referenced object.
+ # Or it doesn't contain objects which can be referenced from the activities table
+ # => in that case you can add/remove the table to/from this (sorted) list.
+
+ assert Repo.query!(
+ "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';"
+ ).rows
+ |> Enum.sort() == [
+ ["activities"],
+ ["announcement_read_relationships"],
+ ["announcements"],
+ ["apps"],
+ ["backups"],
+ ["bookmarks"],
+ ["chat_message_references"],
+ ["chats"],
+ ["config"],
+ ["conversation_participation_recipient_ships"],
+ ["conversation_participations"],
+ ["conversations"],
+ ["counter_cache"],
+ ["data_migration_failed_ids"],
+ ["data_migrations"],
+ ["deliveries"],
+ ["filters"],
+ ["following_relationships"],
+ ["hashtags"],
+ ["hashtags_objects"],
+ ["instances"],
+ ["lists"],
+ ["markers"],
+ ["mfa_tokens"],
+ ["moderation_log"],
+ ["notifications"],
+ ["oauth_authorizations"],
+ ["oauth_tokens"],
+ ["oban_jobs"],
+ ["oban_peers"],
+ ["objects"],
+ ["password_reset_tokens"],
+ ["push_subscriptions"],
+ ["registrations"],
+ ["report_notes"],
+ ["scheduled_activities"],
+ ["schema_migrations"],
+ ["thread_mutes"],
+ ["user_follows_hashtag"],
+ ["user_frontend_setting_profiles"],
+ ["user_invite_tokens"],
+ ["user_notes"],
+ ["user_relationships"],
+ ["users"]
+ ]
+ end
+
+ test "it prunes orphaned activities with the --prune-orphaned-activities" do
+ %Object{} |> Map.merge(%{data: %{"id" => "object_for_activity"}}) |> Repo.insert()
+
+ %Activity{}
+ |> Map.merge(%{
+ local: false,
+ data: %{"id" => "remote_activity_with_object", "object" => "object_for_activity"}
+ })
+ |> Repo.insert()
+
+ %Activity{}
+ |> Map.merge(%{
+ local: false,
+ data: %{
+ "id" => "remote_activity_with_activity",
+ "object" => "remote_activity_with_object"
+ }
+ })
+ |> Repo.insert()
+
+ %User{} |> Map.merge(%{ap_id: "actor"}) |> Repo.insert()
+
+ %Activity{}
+ |> Map.merge(%{
+ local: false,
+ data: %{"id" => "remote_activity_with_actor", "object" => "actor"}
+ })
+ |> Repo.insert()
+
+ %Activity{}
+ |> Map.merge(%{
+ local: false,
+ data: %{
+ "id" => "remote_activity_without_existing_referenced_object",
+ "object" => "non_existing"
+ }
+ })
+ |> Repo.insert()
+
+ %Activity{}
+ |> Map.merge(%{
+ local: true,
+ data: %{"id" => "local_activity_with_actor", "object" => "non_existing"}
+ })
+ |> Repo.insert()
+
+ assert length(Repo.all(Activity)) == 5
+ Mix.Tasks.Pleroma.Database.run(["prune_objects"])
+ assert length(Repo.all(Activity)) == 5
+ Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"])
+ activities = Repo.all(Activity)
+
+ assert "remote_activity_without_existing_referenced_object" not in Enum.map(
+ activities,
+ fn a -> a.data["id"] end
+ )
+
+ assert length(activities) == 4
+ end
+
+ test "it prunes orphaned activities with the --prune-orphaned-activities when the objects are referenced from an array" do
+ %Object{} |> Map.merge(%{data: %{"id" => "existing_object"}}) |> Repo.insert()
+ %User{} |> Map.merge(%{ap_id: "existing_actor"}) |> Repo.insert()
+
+ %Activity{}
+ |> Map.merge(%{
+ local: false,
+ data: %{
+ "id" => "remote_activity_existing_object",
+ "object" => ["non_ existing_object", "existing_object"]
+ }
+ })
+ |> Repo.insert()
+
+ %Activity{}
+ |> Map.merge(%{
+ local: false,
+ data: %{
+ "id" => "remote_activity_existing_actor",
+ "object" => ["non_ existing_object", "existing_actor"]
+ }
+ })
+ |> Repo.insert()
+
+ %Activity{}
+ |> Map.merge(%{
+ local: false,
+ data: %{
+ "id" => "remote_activity_existing_activity",
+ "object" => ["non_ existing_object", "remote_activity_existing_actor"]
+ }
+ })
+ |> Repo.insert()
+
+ %Activity{}
+ |> Map.merge(%{
+ local: false,
+ data: %{
+ "id" => "remote_activity_without_existing_referenced_object",
+ "object" => ["owo", "whats_this"]
+ }
+ })
+ |> Repo.insert()
+
+ assert length(Repo.all(Activity)) == 4
+ Mix.Tasks.Pleroma.Database.run(["prune_objects"])
+ assert length(Repo.all(Activity)) == 4
+ Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"])
+ activities = Repo.all(Activity)
+ assert length(activities) == 3
+
+ assert "remote_activity_without_existing_referenced_object" not in Enum.map(
+ activities,
+ fn a -> a.data["id"] end
+ )
+
+ assert length(activities) == 3
+ end
end
describe "running update_users_following_followers_counts" do