[#3213] Added HashtagsCleanupWorker periodic job.
authorIvan Tashkinov <ivantashkinov@gmail.com>
Sun, 31 Jan 2021 15:24:19 +0000 (18:24 +0300)
committerIvan Tashkinov <ivantashkinov@gmail.com>
Sun, 31 Jan 2021 15:24:19 +0000 (18:24 +0300)
config/config.exs
config/description.exs
lib/pleroma/migrators/hashtags_table_migrator.ex
lib/pleroma/object.ex
lib/pleroma/workers/cron/hashtags_cleanup_worker.ex [new file with mode: 0644]

index c4a69079925df1a4de625c8e9c43d9926b358e39..dfd2fc434e47a4b7ff668dc6b9c0435de3663dcb 100644 (file)
@@ -553,10 +553,12 @@ config :pleroma, Oban,
     remote_fetcher: 2,
     attachments_cleanup: 1,
     new_users_digest: 1,
+    hashtags_cleanup: 1,
     mute_expire: 5
   ],
   plugins: [Oban.Plugins.Pruner],
   crontab: [
+    {"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker},
     {"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
     {"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}
   ]
index 46f085c70de263935c40e2a391b2381a73351d1d..147c1930c3692849b0e14582d3a991944fcf5f5d 100644 (file)
@@ -1943,6 +1943,7 @@ config :pleroma, :config_description, [
         type: {:list, :tuple},
         description: "Settings for cron background jobs",
         suggestions: [
+          {"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker},
           {"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
           {"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}
         ]
index 6a1c9592c46f6676ba24c431dc42343cfe89f8e4..07b42a7f4d946ede09fc0813c884c34846009ece 100644 (file)
@@ -152,6 +152,7 @@ defmodule Pleroma.Migrators.HashtagsTableMigrator do
 
   defp query do
     # Note: most objects have Mention-type AS2 tags and no hashtags (but we can't filter them out)
+    # Note: not checking activity type; HashtagsCleanupWorker should clean up unused records later
     from(
       object in Object,
       where:
index 9edf43e04a9ad294721e38b69ce08d4f7a04dab2..52b77e41ca2b269636e57d09be5788c767d1025d 100644 (file)
@@ -65,6 +65,7 @@ defmodule Pleroma.Object do
     |> maybe_handle_hashtags_change(struct)
   end
 
+  # Note: not checking activity type; HashtagsCleanupWorker should clean up unused records later
   defp maybe_handle_hashtags_change(changeset, struct) do
     with data_hashtags_change = get_change(changeset, :data),
          true <- hashtags_changed?(struct, data_hashtags_change),
diff --git a/lib/pleroma/workers/cron/hashtags_cleanup_worker.ex b/lib/pleroma/workers/cron/hashtags_cleanup_worker.ex
new file mode 100644 (file)
index 0000000..b319067
--- /dev/null
@@ -0,0 +1,57 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Workers.Cron.HashtagsCleanupWorker do
+  @moduledoc """
+  The worker to clean up unused hashtags_objects and hashtags.
+  """
+
+  use Oban.Worker, queue: "hashtags_cleanup"
+
+  alias Pleroma.Repo
+
+  require Logger
+
+  @hashtags_objects_query """
+  DELETE FROM hashtags_objects WHERE object_id IN
+    (SELECT DISTINCT objects.id FROM objects
+      JOIN hashtags_objects ON hashtags_objects.object_id = objects.id LEFT JOIN activities
+        ON COALESCE(activities.data->'object'->>'id', activities.data->>'object') =
+          (objects.data->>'id')
+        AND activities.data->>'type' = 'Create'
+      WHERE activities.id IS NULL);
+  """
+
+  @hashtags_query """
+  DELETE FROM hashtags WHERE id IN
+    (SELECT hashtags.id FROM hashtags
+      LEFT OUTER JOIN hashtags_objects
+        ON hashtags_objects.hashtag_id = hashtags.id
+      WHERE hashtags_objects.hashtag_id IS NULL AND hashtags.inserted_at < $1);
+  """
+
+  @impl Oban.Worker
+  def perform(_job) do
+    Logger.info("Cleaning up unused `hashtags_objects` records...")
+
+    {:ok, %{num_rows: hashtags_objects_count}} =
+      Repo.query(@hashtags_objects_query, [], timeout: :infinity)
+
+    Logger.info("Deleted #{hashtags_objects_count} unused `hashtags_objects` records.")
+
+    Logger.info("Cleaning up unused `hashtags` records...")
+
+    # Note: ignoring recently created hashtags since references are added after hashtag is created
+    {:ok, %{num_rows: hashtags_count}} =
+      Repo.query(@hashtags_query, [NaiveDateTime.add(NaiveDateTime.utc_now(), -3600 * 24)],
+        timeout: :infinity
+      )
+
+    Logger.info("Deleted #{hashtags_count} unused `hashtags` records.")
+
+    Logger.info("HashtagsCleanupWorker complete.")
+
+    :ok
+  end
+end