prune_objects can prune orphaned activities who reference an array of objects
[akkoma] / lib / mix / tasks / pleroma / database.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Mix.Tasks.Pleroma.Database do
6 alias Pleroma.Conversation
7 alias Pleroma.Maintenance
8 alias Pleroma.Object
9 alias Pleroma.Repo
10 alias Pleroma.User
11
12 require Logger
13 require Pleroma.Constants
14
15 import Ecto.Query
16 import Mix.Pleroma
17
18 use Mix.Task
19
20 @shortdoc "A collection of database related tasks"
21 @moduledoc File.read!("docs/docs/administration/CLI_tasks/database.md")
22
23 def run(["remove_embedded_objects" | args]) do
24 {options, [], []} =
25 OptionParser.parse(
26 args,
27 strict: [
28 vacuum: :boolean
29 ]
30 )
31
32 start_pleroma()
33 Logger.info("Removing embedded objects")
34
35 Repo.query!(
36 "update activities set data = safe_jsonb_set(data, '{object}'::text[], data->'object'->'id') where data->'object'->>'id' is not null;",
37 [],
38 timeout: :infinity
39 )
40
41 if Keyword.get(options, :vacuum) do
42 Maintenance.vacuum("full")
43 end
44 end
45
46 def run(["bump_all_conversations"]) do
47 start_pleroma()
48 Conversation.bump_for_all_activities()
49 end
50
51 def run(["update_users_following_followers_counts"]) do
52 start_pleroma()
53
54 Repo.transaction(
55 fn ->
56 from(u in User, select: u)
57 |> Repo.stream()
58 |> Stream.each(&User.update_follower_count/1)
59 |> Stream.run()
60 end,
61 timeout: :infinity
62 )
63 end
64
65 def run(["prune_objects" | args]) do
66 {options, [], []} =
67 OptionParser.parse(
68 args,
69 strict: [
70 vacuum: :boolean,
71 keep_threads: :boolean,
72 keep_non_public: :boolean,
73 prune_orphaned_activities: :boolean
74 ]
75 )
76
77 start_pleroma()
78
79 deadline = Pleroma.Config.get([:instance, :remote_post_retention_days])
80 time_deadline = NaiveDateTime.utc_now() |> NaiveDateTime.add(-(deadline * 86_400))
81
82 log_message = "Pruning objects older than #{deadline} days"
83
84 log_message =
85 if Keyword.get(options, :keep_non_public) do
86 log_message <> ", keeping non public posts"
87 else
88 log_message
89 end
90
91 log_message =
92 if Keyword.get(options, :keep_threads) do
93 log_message <> ", keeping threads intact"
94 else
95 log_message
96 end
97
98 log_message =
99 if Keyword.get(options, :prune_orphaned_activities) do
100 log_message <> ", pruning orphaned activities"
101 else
102 log_message
103 end
104
105 log_message =
106 if Keyword.get(options, :vacuum) do
107 log_message <>
108 ", doing a full vacuum (you shouldn't do this as a recurring maintanance task)"
109 else
110 log_message
111 end
112
113 Logger.info(log_message)
114
115 if Keyword.get(options, :keep_threads) do
116 # We want to delete objects from threads where
117 # 1. the newest post is still old
118 # 2. none of the activities is local
119 # 3. none of the activities is bookmarked
120 # 4. optionally none of the posts is non-public
121 deletable_context =
122 if Keyword.get(options, :keep_non_public) do
123 Pleroma.Activity
124 |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
125 |> group_by([a], fragment("? ->> 'context'::text", a.data))
126 |> having(
127 [a],
128 not fragment(
129 # Posts (checked on Create Activity) is non-public
130 "bool_or((not(?->'to' \\? ? OR ?->'cc' \\? ?)) and ? ->> 'type' = 'Create')",
131 a.data,
132 ^Pleroma.Constants.as_public(),
133 a.data,
134 ^Pleroma.Constants.as_public(),
135 a.data
136 )
137 )
138 else
139 Pleroma.Activity
140 |> join(:left, [a], b in Pleroma.Bookmark, on: a.id == b.activity_id)
141 |> group_by([a], fragment("? ->> 'context'::text", a.data))
142 end
143 |> having([a], max(a.updated_at) < ^time_deadline)
144 |> having([a], not fragment("bool_or(?)", a.local))
145 |> having([_, b], fragment("max(?::text) is null", b.id))
146 |> select([a], fragment("? ->> 'context'::text", a.data))
147
148 Pleroma.Object
149 |> where([o], fragment("? ->> 'context'::text", o.data) in subquery(deletable_context))
150 else
151 if Keyword.get(options, :keep_non_public) do
152 Pleroma.Object
153 |> where(
154 [o],
155 fragment(
156 "?->'to' \\? ? OR ?->'cc' \\? ?",
157 o.data,
158 ^Pleroma.Constants.as_public(),
159 o.data,
160 ^Pleroma.Constants.as_public()
161 )
162 )
163 else
164 Pleroma.Object
165 end
166 |> where([o], o.updated_at < ^time_deadline)
167 |> where(
168 [o],
169 fragment("split_part(?->>'actor', '/', 3) != ?", o.data, ^Pleroma.Web.Endpoint.host())
170 )
171 end
172 |> Repo.delete_all(timeout: :infinity)
173
174 if Keyword.get(options, :prune_orphaned_activities) do
175 # Prune activities who link to a single object
176 """
177 delete from public.activities
178 where id in (
179 select a.id from public.activities a
180 left join public.objects o on a.data ->> 'object' = o.data ->> 'id'
181 left join public.activities a2 on a.data ->> 'object' = a2.data ->> 'id'
182 left join public.users u on a.data ->> 'object' = u.ap_id
183 where not a.local
184 and jsonb_typeof(a."data" -> 'object') = 'string'
185 and o.id is null
186 and a2.id is null
187 and u.id is null
188 )
189 """
190 |> Repo.query([], timeout: :infinity)
191
192 # Prune activities who link to an array of objects
193 """
194 delete from public.activities
195 where id in (
196 select a.id from public.activities a
197 join json_array_elements_text((a."data" -> 'object')::json) as j on jsonb_typeof(a."data" -> 'object') = 'array'
198 left join public.objects o on j.value = o.data ->> 'id'
199 left join public.activities a2 on j.value = a2.data ->> 'id'
200 left join public.users u on j.value = u.ap_id
201 group by a.id
202 having max(o.data ->> 'id') is null
203 and max(a2.data ->> 'id') is null
204 and max(u.ap_id) is null
205 )
206 """
207 |> Repo.query([], timeout: :infinity)
208 end
209
210 """
211 DELETE FROM hashtags AS ht
212 WHERE NOT EXISTS (
213 SELECT 1 FROM hashtags_objects hto
214 WHERE ht.id = hto.hashtag_id)
215 """
216 |> Repo.query()
217
218 if Keyword.get(options, :vacuum) do
219 Maintenance.vacuum("full")
220 end
221 end
222
223 def run(["prune_task"]) do
224 start_pleroma()
225
226 nil
227 |> Pleroma.Workers.Cron.PruneDatabaseWorker.perform()
228 end
229
230 def run(["fix_likes_collections"]) do
231 start_pleroma()
232
233 from(object in Object,
234 where: fragment("(?)->>'likes' is not null", object.data),
235 select: %{id: object.id, likes: fragment("(?)->>'likes'", object.data)}
236 )
237 |> Pleroma.Repo.chunk_stream(100, :batches)
238 |> Stream.each(fn objects ->
239 ids =
240 objects
241 |> Enum.filter(fn object -> object.likes |> Jason.decode!() |> is_map() end)
242 |> Enum.map(& &1.id)
243
244 Object
245 |> where([object], object.id in ^ids)
246 |> update([object],
247 set: [
248 data:
249 fragment(
250 "safe_jsonb_set(?, '{likes}', '[]'::jsonb, true)",
251 object.data
252 )
253 ]
254 )
255 |> Repo.update_all([], timeout: :infinity)
256 end)
257 |> Stream.run()
258 end
259
260 def run(["vacuum", args]) do
261 start_pleroma()
262
263 Maintenance.vacuum(args)
264 end
265
266 def run(["ensure_expiration"]) do
267 start_pleroma()
268 days = Pleroma.Config.get([:mrf_activity_expiration, :days], 365)
269
270 Pleroma.Activity
271 |> join(:inner, [a], o in Object,
272 on:
273 fragment(
274 "(?->>'id') = COALESCE((?)->'object'->> 'id', (?)->>'object')",
275 o.data,
276 a.data,
277 a.data
278 )
279 )
280 |> where(local: true)
281 |> where([a], fragment("(? ->> 'type'::text) = 'Create'", a.data))
282 |> where([_a, o], fragment("?->>'type' = 'Note'", o.data))
283 |> Pleroma.Repo.chunk_stream(100, :batches)
284 |> Stream.each(fn activities ->
285 Enum.each(activities, fn activity ->
286 expires_at =
287 activity.inserted_at
288 |> DateTime.from_naive!("Etc/UTC")
289 |> Timex.shift(days: days)
290
291 Pleroma.Workers.PurgeExpiredActivity.enqueue(%{
292 activity_id: activity.id,
293 expires_at: expires_at
294 })
295 end)
296 end)
297 |> Stream.run()
298 end
299
300 def run(["set_text_search_config", tsconfig]) do
301 start_pleroma()
302 %{rows: [[tsc]]} = Ecto.Adapters.SQL.query!(Pleroma.Repo, "SHOW default_text_search_config;")
303 shell_info("Current default_text_search_config: #{tsc}")
304
305 %{rows: [[db]]} = Ecto.Adapters.SQL.query!(Pleroma.Repo, "SELECT current_database();")
306 shell_info("Update default_text_search_config: #{tsconfig}")
307
308 %{messages: msg} =
309 Ecto.Adapters.SQL.query!(
310 Pleroma.Repo,
311 "ALTER DATABASE #{db} SET default_text_search_config = '#{tsconfig}';"
312 )
313
314 # non-exist config will not raise excpetion but only give >0 messages
315 if length(msg) > 0 do
316 shell_info("Error: #{inspect(msg, pretty: true)}")
317 else
318 rum_enabled = Pleroma.Config.get([:database, :rum_enabled])
319 shell_info("Recreate index, RUM: #{rum_enabled}")
320
321 # Note SQL below needs to be kept up-to-date with latest GIN or RUM index definition in future
322 if rum_enabled do
323 Ecto.Adapters.SQL.query!(
324 Pleroma.Repo,
325 "CREATE OR REPLACE FUNCTION objects_fts_update() RETURNS trigger AS $$ BEGIN
326 new.fts_content := to_tsvector(new.data->>'content');
327 RETURN new;
328 END
329 $$ LANGUAGE plpgsql",
330 [],
331 timeout: :infinity
332 )
333
334 shell_info("Refresh RUM index")
335 Ecto.Adapters.SQL.query!(Pleroma.Repo, "UPDATE objects SET updated_at = NOW();")
336 else
337 Ecto.Adapters.SQL.query!(Pleroma.Repo, "DROP INDEX IF EXISTS objects_fts;")
338
339 Ecto.Adapters.SQL.query!(
340 Pleroma.Repo,
341 "CREATE INDEX CONCURRENTLY objects_fts ON objects USING gin(to_tsvector('#{tsconfig}', data->>'content')); ",
342 [],
343 timeout: :infinity
344 )
345 end
346
347 shell_info('Done.')
348 end
349 end
350
351 # Rolls back a specific migration (leaving subsequent migrations applied).
352 # WARNING: imposes a risk of unrecoverable data loss — proceed at your own responsibility.
353 # Based on https://stackoverflow.com/a/53825840
354 def run(["rollback", version]) do
355 prompt = "SEVERE WARNING: this operation may result in unrecoverable data loss. Continue?"
356
357 if shell_prompt(prompt, "n") in ~w(Yn Y y) do
358 {_, result, _} =
359 Ecto.Migrator.with_repo(Pleroma.Repo, fn repo ->
360 version = String.to_integer(version)
361 re = ~r/^#{version}_.*\.exs/
362 path = Ecto.Migrator.migrations_path(repo)
363
364 with {_, "" <> file} <- {:find, Enum.find(File.ls!(path), &String.match?(&1, re))},
365 {_, [{mod, _} | _]} <- {:compile, Code.compile_file(Path.join(path, file))},
366 {_, :ok} <- {:rollback, Ecto.Migrator.down(repo, version, mod)} do
367 {:ok, "Reversed migration: #{file}"}
368 else
369 {:find, _} -> {:error, "No migration found with version prefix: #{version}"}
370 {:compile, e} -> {:error, "Problem compiling migration module: #{inspect(e)}"}
371 {:rollback, e} -> {:error, "Problem reversing migration: #{inspect(e)}"}
372 end
373 end)
374
375 shell_info(inspect(result))
376 end
377 end
378 end