Prune Objects --keep-threads option (#350)
[akkoma] / lib / mix / tasks / pleroma / database.ex
index 30c0d2bf17c467d6f561a3bc46873ae039cd08e0..be59e2271e6b748aacb2da44d71138a07ee2eb00 100644 (file)
@@ -18,31 +18,7 @@ defmodule Mix.Tasks.Pleroma.Database do
   use Mix.Task
 
   @shortdoc "A collection of database related tasks"
-  @moduledoc File.read!("docs/administration/CLI_tasks/database.md")
-
-  # Rolls back a specific migration (leaving subsequent migrations applied)
-  # Based on https://stackoverflow.com/a/53825840
-  def run(["rollback", version]) do
-    start_pleroma()
-
-    version = String.to_integer(version)
-    re = ~r/^#{version}_.*\.exs/
-    path = Application.app_dir(:pleroma, Path.join(["priv", "repo", "migrations"]))
-
-    result =
-      with {:find, "" <> file} <- {:find, Enum.find(File.ls!(path), &String.match?(&1, re))},
-           {:compile, [{mod, _} | _]} <- {:compile, Code.compile_file(Path.join(path, file))},
-           {:rollback, :ok} <- {:rollback, Ecto.Migrator.down(Repo, version, mod)} do
-        {:ok, "Reversed migration: #{file}"}
-      else
-        {:find, _} -> {:error, "No migration found with version prefix: #{version}"}
-        {:compile, e} -> {:error, "Problem compiling migration module: #{inspect(e)}"}
-        {:rollback, e} -> {:error, "Problem reversing migration: #{inspect(e)}"}
-        e -> {:error, "Something unexpected happened: #{inspect(e)}"}
-      end
-
-    IO.inspect(result)
-  end
+  @moduledoc File.read!("docs/docs/administration/CLI_tasks/database.md")
 
   def run(["remove_embedded_objects" | args]) do
     {options, [], []} =
@@ -91,40 +67,115 @@ defmodule Mix.Tasks.Pleroma.Database do
       OptionParser.parse(
         args,
         strict: [
-          vacuum: :boolean
+          vacuum: :boolean,
+          keep_threads: :boolean,
+          keep_non_public: :boolean
         ]
       )
 
     start_pleroma()
 
     deadline = Pleroma.Config.get([:instance, :remote_post_retention_days])
+    time_deadline = NaiveDateTime.utc_now() |> NaiveDateTime.add(-(deadline * 86_400))
 
-    Logger.info("Pruning objects older than #{deadline} days")
+    log_message = "Pruning objects older than #{deadline} days"
 
-    time_deadline =
-      NaiveDateTime.utc_now()
-      |> NaiveDateTime.add(-(deadline * 86_400))
+    log_message =
+      if Keyword.get(options, :keep_non_public) do
+        log_message <> ", keeping non public posts"
+      else
+        log_message
+      end
 
-    from(o in Object,
-      where:
-        fragment(
-          "?->'to' \\? ? OR ?->'cc' \\? ?",
-          o.data,
-          ^Pleroma.Constants.as_public(),
-          o.data,
-          ^Pleroma.Constants.as_public()
-        ),
-      where: o.inserted_at < ^time_deadline,
-      where:
+    log_message =
+      if Keyword.get(options, :keep_threads) do
+        log_message <> ", keeping threads intact"
+      else
+        log_message
+      end
+
+    Logger.info(log_message)
+
+    if Keyword.get(options, :keep_threads) do
+      # We want to delete objects from threads where
+      # 1. the newest post is still old
+      # 2. none of the activities is local
+      # 3. none of the activities is bookmarked
+      # 4. optionally none of the posts is non-public
+      deletable_context =
+        if Keyword.get(options, :keep_non_public) do
+          Pleroma.Activity
+          |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
+          |> group_by([a], fragment("? ->> 'context'::text", a.data))
+          |> having(
+            [a],
+            not fragment(
+              # Posts (checked on Create Activity) is non-public
+              "bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')",
+              a.data,
+              ^Pleroma.Constants.as_public(),
+              a.data,
+              ^Pleroma.Constants.as_public(),
+              a.data
+            )
+          )
+        else
+          Pleroma.Activity
+          |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
+          |> group_by([a], fragment("? ->> 'context'::text", a.data))
+        end
+        |> having([a], max(a.updated_at) < ^time_deadline)
+        |> having([a], not fragment("bool_or(?)", a.local))
+        |> having([_, b], fragment("max(?::text) is null", b.id))
+        |> select([a], fragment("? ->> 'context'::text", a.data))
+
+      Pleroma.Object
+      |> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context))
+    else
+      if Keyword.get(options, :keep_non_public) do
+        Pleroma.Object
+        |> where(
+          [o],
+          fragment(
+            "?->'to' \\? ? OR ?->'cc' \\? ?",
+            o.data,
+            ^Pleroma.Constants.as_public(),
+            o.data,
+            ^Pleroma.Constants.as_public()
+          )
+        )
+      else
+        Pleroma.Object
+      end
+      |> where([o], o.updated_at < ^time_deadline)
+      |> where(
+        [o],
         fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
-    )
+      )
+    end
     |> Repo.delete_all(timeout: :infinity)
 
+    prune_hashtags_query = """
+    DELETE FROM hashtags AS ht
+    WHERE NOT EXISTS (
+      SELECT 1 FROM hashtags_objects hto
+      WHERE ht.id = hto.hashtag_id)
+    """
+
+    Repo.query(prune_hashtags_query)
+
     if Keyword.get(options, :vacuum) do
       Maintenance.vacuum("full")
     end
   end
 
+  def run(["prune_task"]) do
+    start_pleroma()
+
+    nil
+    |> Pleroma.Workers.Cron.PruneDatabaseWorker.perform()
+  end
+
   def run(["fix_likes_collections"]) do
     start_pleroma()
 
@@ -194,4 +245,83 @@ defmodule Mix.Tasks.Pleroma.Database do
     end)
     |> Stream.run()
   end
+
+  def run(["set_text_search_config", tsconfig]) do
+    start_pleroma()
+    %{rows: [[tsc]]} = Ecto.Adapters.SQL.query!(Pleroma.Repo, "SHOW default_text_search_config;")
+    shell_info("Current default_text_search_config: #{tsc}")
+
+    %{rows: [[db]]} = Ecto.Adapters.SQL.query!(Pleroma.Repo, "SELECT current_database();")
+    shell_info("Update default_text_search_config: #{tsconfig}")
+
+    %{messages: msg} =
+      Ecto.Adapters.SQL.query!(
+        Pleroma.Repo,
+        "ALTER DATABASE #{db} SET default_text_search_config = '#{tsconfig}';"
+      )
+
+    # non-exist config will not raise excpetion but only give >0 messages
+    if length(msg) > 0 do
+      shell_info("Error: #{inspect(msg, pretty: true)}")
+    else
+      rum_enabled = Pleroma.Config.get([:database, :rum_enabled])
+      shell_info("Recreate index, RUM: #{rum_enabled}")
+
+      # Note SQL below needs to be kept up-to-date with latest GIN or RUM index definition in future
+      if rum_enabled do
+        Ecto.Adapters.SQL.query!(
+          Pleroma.Repo,
+          "CREATE OR REPLACE FUNCTION objects_fts_update() RETURNS trigger AS $$ BEGIN
+          new.fts_content := to_tsvector(new.data->>'content');
+          RETURN new;
+          END
+          $$ LANGUAGE plpgsql",
+          [],
+          timeout: :infinity
+        )
+
+        shell_info("Refresh RUM index")
+        Ecto.Adapters.SQL.query!(Pleroma.Repo, "UPDATE objects SET updated_at = NOW();")
+      else
+        Ecto.Adapters.SQL.query!(Pleroma.Repo, "DROP INDEX IF EXISTS objects_fts;")
+
+        Ecto.Adapters.SQL.query!(
+          Pleroma.Repo,
+          "CREATE INDEX CONCURRENTLY objects_fts ON objects USING gin(to_tsvector('#{tsconfig}', data->>'content')); ",
+          [],
+          timeout: :infinity
+        )
+      end
+
+      shell_info('Done.')
+    end
+  end
+
+  # Rolls back a specific migration (leaving subsequent migrations applied).
+  # WARNING: imposes a risk of unrecoverable data loss — proceed at your own responsibility.
+  # Based on https://stackoverflow.com/a/53825840
+  def run(["rollback", version]) do
+    prompt = "SEVERE WARNING: this operation may result in unrecoverable data loss. Continue?"
+
+    if shell_prompt(prompt, "n") in ~w(Yn Y y) do
+      {_, result, _} =
+        Ecto.Migrator.with_repo(Pleroma.Repo, fn repo ->
+          version = String.to_integer(version)
+          re = ~r/^#{version}_.*\.exs/
+          path = Ecto.Migrator.migrations_path(repo)
+
+          with {_, "" <> file} <- {:find, Enum.find(File.ls!(path), &String.match?(&1, re))},
+               {_, [{mod, _} | _]} <- {:compile, Code.compile_file(Path.join(path, file))},
+               {_, :ok} <- {:rollback, Ecto.Migrator.down(repo, version, mod)} do
+            {:ok, "Reversed migration: #{file}"}
+          else
+            {:find, _} -> {:error, "No migration found with version prefix: #{version}"}
+            {:compile, e} -> {:error, "Problem compiling migration module: #{inspect(e)}"}
+            {:rollback, e} -> {:error, "Problem reversing migration: #{inspect(e)}"}
+          end
+        end)
+
+      shell_info(inspect(result))
+    end
+  end
 end