remote_fetcher: 2,
attachments_cleanup: 1,
new_users_digest: 1,
+ hashtags_cleanup: 1,
mute_expire: 5
],
plugins: [Oban.Plugins.Pruner],
crontab: [
+ {"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker},
{"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
{"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}
]
type: {:list, :tuple},
description: "Settings for cron background jobs",
suggestions: [
+ {"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker},
{"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
{"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}
]
|> maybe_handle_hashtags_change(struct)
end
+ # Note: not checking activity type; HashtagsCleanupWorker should clean up unused records later
defp maybe_handle_hashtags_change(changeset, struct) do
with data_hashtags_change = get_change(changeset, :data),
true <- hashtags_changed?(struct, data_hashtags_change),
--- /dev/null
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Workers.Cron.HashtagsCleanupWorker do
+ @moduledoc """
+ The worker to clean up unused hashtags_objects and hashtags.
+ """
+
+ use Oban.Worker, queue: "hashtags_cleanup"
+
+ alias Pleroma.Repo
+
+ require Logger
+
+ @hashtags_objects_query """
+ DELETE FROM hashtags_objects WHERE object_id IN
+ (SELECT DISTINCT objects.id FROM objects
+ JOIN hashtags_objects ON hashtags_objects.object_id = objects.id LEFT JOIN activities
+ ON COALESCE(activities.data->'object'->>'id', activities.data->>'object') =
+ (objects.data->>'id')
+ AND activities.data->>'type' = 'Create'
+ WHERE activities.id IS NULL);
+ """
+
+ @hashtags_query """
+ DELETE FROM hashtags WHERE id IN
+ (SELECT hashtags.id FROM hashtags
+ LEFT OUTER JOIN hashtags_objects
+ ON hashtags_objects.hashtag_id = hashtags.id
+ WHERE hashtags_objects.hashtag_id IS NULL AND hashtags.inserted_at < $1);
+ """
+
+ @impl Oban.Worker
+ def perform(_job) do
+ Logger.info("Cleaning up unused `hashtags_objects` records...")
+
+ {:ok, %{num_rows: hashtags_objects_count}} =
+ Repo.query(@hashtags_objects_query, [], timeout: :infinity)
+
+ Logger.info("Deleted #{hashtags_objects_count} unused `hashtags_objects` records.")
+
+ Logger.info("Cleaning up unused `hashtags` records...")
+
+ # Note: ignoring recently created hashtags since references are added after hashtag is created
+ {:ok, %{num_rows: hashtags_count}} =
+ Repo.query(@hashtags_query, [NaiveDateTime.add(NaiveDateTime.utc_now(), -3600 * 24)],
+ timeout: :infinity
+ )
+
+ Logger.info("Deleted #{hashtags_count} unused `hashtags` records.")
+
+ Logger.info("HashtagsCleanupWorker complete.")
+
+ :ok
+ end
+end