prune_objects can prune orphaned activities who reference an array of objects
[akkoma] / test / mix / tasks / pleroma / database_test.exs
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Mix.Tasks.Pleroma.DatabaseTest do
6 use Pleroma.DataCase, async: true
7 use Oban.Testing, repo: Pleroma.Repo
8
9 alias Pleroma.Activity
10 alias Pleroma.Object
11 alias Pleroma.Repo
12 alias Pleroma.User
13 alias Pleroma.Web.CommonAPI
14
15 import Pleroma.Factory
16
17 setup_all do
18 Mix.shell(Mix.Shell.Process)
19
20 on_exit(fn ->
21 Mix.shell(Mix.Shell.IO)
22 end)
23
24 :ok
25 end
26
27 describe "running remove_embedded_objects" do
28 test "it replaces objects with references" do
29 user = insert(:user)
30 {:ok, activity} = CommonAPI.post(user, %{status: "test"})
31 new_data = Map.put(activity.data, "object", activity.object.data)
32
33 {:ok, activity} =
34 activity
35 |> Activity.change(%{data: new_data})
36 |> Repo.update()
37
38 assert is_map(activity.data["object"])
39
40 Mix.Tasks.Pleroma.Database.run(["remove_embedded_objects"])
41
42 activity = Activity.get_by_id_with_object(activity.id)
43 assert is_binary(activity.data["object"])
44 end
45 end
46
47 describe "prune_objects" do
48 test "it prunes old objects from the database" do
49 deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
50
51 date =
52 Timex.now()
53 |> Timex.shift(days: -deadline)
54 |> Timex.to_naive_datetime()
55 |> NaiveDateTime.truncate(:second)
56
57 insert(:note)
58
59 %{id: note_remote_public_id} =
60 :note
61 |> insert()
62 |> Ecto.Changeset.change(%{updated_at: date})
63 |> Repo.update!()
64
65 note_remote_non_public =
66 %{id: note_remote_non_public_id, data: note_remote_non_public_data} =
67 :note
68 |> insert()
69
70 note_remote_non_public
71 |> Ecto.Changeset.change(%{
72 updated_at: date,
73 data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end)
74 })
75 |> Repo.update!()
76
77 assert length(Repo.all(Object)) == 3
78
79 Mix.Tasks.Pleroma.Database.run(["prune_objects"])
80
81 assert length(Repo.all(Object)) == 1
82 refute Object.get_by_id(note_remote_public_id)
83 refute Object.get_by_id(note_remote_non_public_id)
84 end
85
86 test "with the --keep-non-public option it still keeps non-public posts even if they are not local" do
87 deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
88
89 date =
90 Timex.now()
91 |> Timex.shift(days: -deadline)
92 |> Timex.to_naive_datetime()
93 |> NaiveDateTime.truncate(:second)
94
95 insert(:note)
96
97 %{id: note_remote_id} =
98 :note
99 |> insert()
100 |> Ecto.Changeset.change(%{updated_at: date})
101 |> Repo.update!()
102
103 note_remote_non_public =
104 %{data: note_remote_non_public_data} =
105 :note
106 |> insert()
107
108 note_remote_non_public
109 |> Ecto.Changeset.change(%{
110 updated_at: date,
111 data: note_remote_non_public_data |> update_in(["to"], fn _ -> [] end)
112 })
113 |> Repo.update!()
114
115 assert length(Repo.all(Object)) == 3
116
117 Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-non-public"])
118
119 assert length(Repo.all(Object)) == 2
120 refute Object.get_by_id(note_remote_id)
121 end
122
123 test "with the --keep-threads and --keep-non-public option it keeps old threads with non-public replies even if the interaction is not local" do
124 # For non-public we only check Create Activities because only these are relevant for threads
125 # Flags are always non-public, Announces from relays can be non-public...
126 deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
127
128 old_insert_date =
129 Timex.now()
130 |> Timex.shift(days: -deadline)
131 |> Timex.to_naive_datetime()
132 |> NaiveDateTime.truncate(:second)
133
134 remote_user1 = insert(:user, local: false)
135 remote_user2 = insert(:user, local: false)
136
137 # Old remote non-public reply (should be kept)
138 {:ok, old_remote_post1_activity} =
139 CommonAPI.post(remote_user1, %{status: "some thing", local: false})
140
141 old_remote_post1_activity
142 |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
143 |> Repo.update!()
144
145 {:ok, old_remote_non_public_reply_activity} =
146 CommonAPI.post(remote_user2, %{
147 status: "some reply",
148 in_reply_to_status_id: old_remote_post1_activity.id
149 })
150
151 old_remote_non_public_reply_activity
152 |> Ecto.Changeset.change(%{
153 local: false,
154 updated_at: old_insert_date,
155 data: old_remote_non_public_reply_activity.data |> update_in(["to"], fn _ -> [] end)
156 })
157 |> Repo.update!()
158
159 # Old remote non-public Announce (should be removed)
160 {:ok, old_remote_post2_activity = %{data: %{"object" => old_remote_post2_id}}} =
161 CommonAPI.post(remote_user1, %{status: "some thing", local: false})
162
163 old_remote_post2_activity
164 |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
165 |> Repo.update!()
166
167 {:ok, old_remote_non_public_repeat_activity} =
168 CommonAPI.repeat(old_remote_post2_activity.id, remote_user2)
169
170 old_remote_non_public_repeat_activity
171 |> Ecto.Changeset.change(%{
172 local: false,
173 updated_at: old_insert_date,
174 data: old_remote_non_public_repeat_activity.data |> update_in(["to"], fn _ -> [] end)
175 })
176 |> Repo.update!()
177
178 assert length(Repo.all(Object)) == 3
179
180 Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads", "--keep-non-public"])
181
182 Repo.all(Pleroma.Activity)
183 assert length(Repo.all(Object)) == 2
184 refute Object.get_by_ap_id(old_remote_post2_id)
185 end
186
187 test "with the --keep-threads option it still keeps non-old threads even with no local interactions" do
188 remote_user = insert(:user, local: false)
189 remote_user2 = insert(:user, local: false)
190
191 {:ok, remote_post_activity} =
192 CommonAPI.post(remote_user, %{status: "some thing", local: false})
193
194 {:ok, remote_post_reply_activity} =
195 CommonAPI.post(remote_user2, %{
196 status: "some reply",
197 in_reply_to_status_id: remote_post_activity.id
198 })
199
200 remote_post_activity
201 |> Ecto.Changeset.change(%{local: false})
202 |> Repo.update!()
203
204 remote_post_reply_activity
205 |> Ecto.Changeset.change(%{local: false})
206 |> Repo.update!()
207
208 assert length(Repo.all(Object)) == 2
209
210 Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
211
212 assert length(Repo.all(Object)) == 2
213 end
214
215 test "with the --keep-threads option it deletes old threads with no local interaction" do
216 deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
217
218 old_insert_date =
219 Timex.now()
220 |> Timex.shift(days: -deadline)
221 |> Timex.to_naive_datetime()
222 |> NaiveDateTime.truncate(:second)
223
224 remote_user = insert(:user, local: false)
225 remote_user2 = insert(:user, local: false)
226
227 {:ok, old_remote_post_activity} =
228 CommonAPI.post(remote_user, %{status: "some thing", local: false})
229
230 old_remote_post_activity
231 |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
232 |> Repo.update!()
233
234 {:ok, old_remote_post_reply_activity} =
235 CommonAPI.post(remote_user2, %{
236 status: "some reply",
237 in_reply_to_status_id: old_remote_post_activity.id
238 })
239
240 old_remote_post_reply_activity
241 |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
242 |> Repo.update!()
243
244 {:ok, old_favourite_activity} =
245 CommonAPI.favorite(remote_user2, old_remote_post_activity.id)
246
247 old_favourite_activity
248 |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
249 |> Repo.update!()
250
251 {:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post_activity.id, remote_user2)
252
253 old_repeat_activity
254 |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
255 |> Repo.update!()
256
257 assert length(Repo.all(Object)) == 2
258
259 Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
260
261 assert length(Repo.all(Object)) == 0
262 end
263
264 test "with the --keep-threads option it keeps old threads with local interaction" do
265 deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
266
267 old_insert_date =
268 Timex.now()
269 |> Timex.shift(days: -deadline)
270 |> Timex.to_naive_datetime()
271 |> NaiveDateTime.truncate(:second)
272
273 remote_user = insert(:user, local: false)
274 local_user = insert(:user, local: true)
275
276 # local reply
277 {:ok, old_remote_post1_activity} =
278 CommonAPI.post(remote_user, %{status: "some thing", local: false})
279
280 old_remote_post1_activity
281 |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
282 |> Repo.update!()
283
284 {:ok, old_local_post2_reply_activity} =
285 CommonAPI.post(local_user, %{
286 status: "some reply",
287 in_reply_to_status_id: old_remote_post1_activity.id
288 })
289
290 old_local_post2_reply_activity
291 |> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
292 |> Repo.update!()
293
294 # local Like
295 {:ok, old_remote_post3_activity} =
296 CommonAPI.post(remote_user, %{status: "some thing", local: false})
297
298 old_remote_post3_activity
299 |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
300 |> Repo.update!()
301
302 {:ok, old_favourite_activity} = CommonAPI.favorite(local_user, old_remote_post3_activity.id)
303
304 old_favourite_activity
305 |> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
306 |> Repo.update!()
307
308 # local Announce
309 {:ok, old_remote_post4_activity} =
310 CommonAPI.post(remote_user, %{status: "some thing", local: false})
311
312 old_remote_post4_activity
313 |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
314 |> Repo.update!()
315
316 {:ok, old_repeat_activity} = CommonAPI.repeat(old_remote_post4_activity.id, local_user)
317
318 old_repeat_activity
319 |> Ecto.Changeset.change(%{local: true, updated_at: old_insert_date})
320 |> Repo.update!()
321
322 assert length(Repo.all(Object)) == 4
323
324 Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
325
326 assert length(Repo.all(Object)) == 4
327 end
328
329 test "with the --keep-threads option it keeps old threads with bookmarked posts" do
330 deadline = Pleroma.Config.get([:instance, :remote_post_retention_days]) + 1
331
332 old_insert_date =
333 Timex.now()
334 |> Timex.shift(days: -deadline)
335 |> Timex.to_naive_datetime()
336 |> NaiveDateTime.truncate(:second)
337
338 remote_user = insert(:user, local: false)
339 local_user = insert(:user, local: true)
340
341 {:ok, old_remote_post_activity} =
342 CommonAPI.post(remote_user, %{status: "some thing", local: false})
343
344 old_remote_post_activity
345 |> Ecto.Changeset.change(%{local: false, updated_at: old_insert_date})
346 |> Repo.update!()
347
348 Pleroma.Bookmark.create(local_user.id, old_remote_post_activity.id)
349
350 assert length(Repo.all(Object)) == 1
351
352 Mix.Tasks.Pleroma.Database.run(["prune_objects", "--keep-threads"])
353
354 assert length(Repo.all(Object)) == 1
355 end
356
357 test "We don't have unexpected tables which may contain objects that are referenced by activities" do
358 # We can delete orphaned activities. For that we look for the objects they reference in the 'objects', 'activities', and 'users' table.
359 # If someone adds another table with objects (idk, maybe with separate relations, or collections or w/e), then we need to make sure we
360 # add logic for that in the 'prune_objects' task so that we don't wrongly delete their corresponding activities.
361 # So when someone adds (or removes) a table, this test will fail.
362 # Either the table contains objects which can be referenced from the activities table
363 # => in that case the prune_objects job should be adapted so we don't delete activities who still have the referenced object.
364 # Or it doesn't contain objects which can be referenced from the activities table
365 # => in that case you can add/remove the table to/from this (sorted) list.
366
367 assert Repo.query!(
368 "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';"
369 ).rows
370 |> Enum.sort() == [
371 ["activities"],
372 ["announcement_read_relationships"],
373 ["announcements"],
374 ["apps"],
375 ["backups"],
376 ["bookmarks"],
377 ["chat_message_references"],
378 ["chats"],
379 ["config"],
380 ["conversation_participation_recipient_ships"],
381 ["conversation_participations"],
382 ["conversations"],
383 ["counter_cache"],
384 ["data_migration_failed_ids"],
385 ["data_migrations"],
386 ["deliveries"],
387 ["filters"],
388 ["following_relationships"],
389 ["hashtags"],
390 ["hashtags_objects"],
391 ["instances"],
392 ["lists"],
393 ["markers"],
394 ["mfa_tokens"],
395 ["moderation_log"],
396 ["notifications"],
397 ["oauth_authorizations"],
398 ["oauth_tokens"],
399 ["oban_jobs"],
400 ["oban_peers"],
401 ["objects"],
402 ["password_reset_tokens"],
403 ["push_subscriptions"],
404 ["registrations"],
405 ["report_notes"],
406 ["scheduled_activities"],
407 ["schema_migrations"],
408 ["thread_mutes"],
409 ["user_follows_hashtag"],
410 ["user_frontend_setting_profiles"],
411 ["user_invite_tokens"],
412 ["user_notes"],
413 ["user_relationships"],
414 ["users"]
415 ]
416 end
417
418 test "it prunes orphaned activities with the --prune-orphaned-activities" do
419 # Add a remote activity which references an Object
420 %Object{} |> Map.merge(%{data: %{"id" => "object_for_activity"}}) |> Repo.insert()
421
422 %Activity{}
423 |> Map.merge(%{
424 local: false,
425 data: %{"id" => "remote_activity_with_object", "object" => "object_for_activity"}
426 })
427 |> Repo.insert()
428
429 # Add a remote activity which references an activity
430 %Activity{}
431 |> Map.merge(%{
432 local: false,
433 data: %{
434 "id" => "remote_activity_with_activity",
435 "object" => "remote_activity_with_object"
436 }
437 })
438 |> Repo.insert()
439
440 # Add a remote activity which references an Actor
441 %User{} |> Map.merge(%{ap_id: "actor"}) |> Repo.insert()
442
443 %Activity{}
444 |> Map.merge(%{
445 local: false,
446 data: %{"id" => "remote_activity_with_actor", "object" => "actor"}
447 })
448 |> Repo.insert()
449
450 # Add a remote activity without existing referenced object, activity or actor
451 %Activity{}
452 |> Map.merge(%{
453 local: false,
454 data: %{
455 "id" => "remote_activity_without_existing_referenced_object",
456 "object" => "non_existing"
457 }
458 })
459 |> Repo.insert()
460
461 # Add a local activity without existing referenced object, activity or actor
462 %Activity{}
463 |> Map.merge(%{
464 local: true,
465 data: %{"id" => "local_activity_with_actor", "object" => "non_existing"}
466 })
467 |> Repo.insert()
468
469 # The remote activities without existing reference, and only the remote activities without existing reference, are deleted
470 # if, and only if, we provide the --prune-orphaned-activities option
471 assert length(Repo.all(Activity)) == 5
472 Mix.Tasks.Pleroma.Database.run(["prune_objects"])
473 assert length(Repo.all(Activity)) == 5
474 Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"])
475 activities = Repo.all(Activity)
476
477 assert "remote_activity_without_existing_referenced_object" not in Enum.map(
478 activities,
479 fn a -> a.data["id"] end
480 )
481
482 assert length(activities) == 4
483 end
484
485 test "it prunes orphaned activities with the --prune-orphaned-activities when the objects are referenced from an array" do
486 %Object{} |> Map.merge(%{data: %{"id" => "existing_object"}}) |> Repo.insert()
487 %User{} |> Map.merge(%{ap_id: "existing_actor"}) |> Repo.insert()
488
489 # Multiple objects, one object exists (keep)
490 %Activity{}
491 |> Map.merge(%{
492 local: false,
493 data: %{
494 "id" => "remote_activity_existing_object",
495 "object" => ["non_ existing_object", "existing_object"]
496 }
497 })
498 |> Repo.insert()
499
500 # Multiple objects, one actor exists (keep)
501 %Activity{}
502 |> Map.merge(%{
503 local: false,
504 data: %{
505 "id" => "remote_activity_existing_actor",
506 "object" => ["non_ existing_object", "existing_actor"]
507 }
508 })
509 |> Repo.insert()
510
511 # Multiple objects, one activity exists (keep)
512 %Activity{}
513 |> Map.merge(%{
514 local: false,
515 data: %{
516 "id" => "remote_activity_existing_activity",
517 "object" => ["non_ existing_object", "remote_activity_existing_actor"]
518 }
519 })
520 |> Repo.insert()
521
522 # Multiple objects none exist (prune)
523 %Activity{}
524 |> Map.merge(%{
525 local: false,
526 data: %{
527 "id" => "remote_activity_without_existing_referenced_object",
528 "object" => ["owo", "whats_this"]
529 }
530 })
531 |> Repo.insert()
532
533 assert length(Repo.all(Activity)) == 4
534 Mix.Tasks.Pleroma.Database.run(["prune_objects"])
535 assert length(Repo.all(Activity)) == 4
536 Mix.Tasks.Pleroma.Database.run(["prune_objects", "--prune-orphaned-activities"])
537 activities = Repo.all(Activity)
538 assert length(activities) == 3
539
540 assert "remote_activity_without_existing_referenced_object" not in Enum.map(
541 activities,
542 fn a -> a.data["id"] end
543 )
544
545 assert length(activities) == 3
546 end
547 end
548
549 describe "running update_users_following_followers_counts" do
550 test "following and followers count are updated" do
551 [user, user2] = insert_pair(:user)
552 {:ok, %User{} = user, _user2} = User.follow(user, user2)
553
554 following = User.following(user)
555
556 assert length(following) == 2
557 assert user.follower_count == 0
558
559 {:ok, user} =
560 user
561 |> Ecto.Changeset.change(%{follower_count: 3})
562 |> Repo.update()
563
564 assert user.follower_count == 3
565
566 assert {:ok, :ok} ==
567 Mix.Tasks.Pleroma.Database.run(["update_users_following_followers_counts"])
568
569 user = User.get_by_id(user.id)
570
571 assert length(User.following(user)) == 2
572 assert user.follower_count == 0
573 end
574 end
575
576 describe "running fix_likes_collections" do
577 test "it turns OrderedCollection likes into empty arrays" do
578 [user, user2] = insert_pair(:user)
579
580 {:ok, %{id: id, object: object}} = CommonAPI.post(user, %{status: "test"})
581 {:ok, %{object: object2}} = CommonAPI.post(user, %{status: "test test"})
582
583 CommonAPI.favorite(user2, id)
584
585 likes = %{
586 "first" =>
587 "http://mastodon.example.org/objects/dbdbc507-52c8-490d-9b7c-1e1d52e5c132/likes?page=1",
588 "id" => "http://mastodon.example.org/objects/dbdbc507-52c8-490d-9b7c-1e1d52e5c132/likes",
589 "totalItems" => 3,
590 "type" => "OrderedCollection"
591 }
592
593 new_data = Map.put(object2.data, "likes", likes)
594
595 object2
596 |> Ecto.Changeset.change(%{data: new_data})
597 |> Repo.update()
598
599 assert length(Object.get_by_id(object.id).data["likes"]) == 1
600 assert is_map(Object.get_by_id(object2.id).data["likes"])
601
602 assert :ok == Mix.Tasks.Pleroma.Database.run(["fix_likes_collections"])
603
604 assert length(Object.get_by_id(object.id).data["likes"]) == 1
605 assert Enum.empty?(Object.get_by_id(object2.id).data["likes"])
606 end
607 end
608
609 describe "ensure_expiration" do
610 test "it adds to expiration old statuses" do
611 activity1 = insert(:note_activity)
612
613 {:ok, inserted_at, 0} = DateTime.from_iso8601("2015-01-23T23:50:07Z")
614 activity2 = insert(:note_activity, %{inserted_at: inserted_at})
615
616 %{id: activity_id3} = insert(:note_activity)
617
618 expires_at = DateTime.add(DateTime.utc_now(), 60 * 61)
619
620 Pleroma.Workers.PurgeExpiredActivity.enqueue(%{
621 activity_id: activity_id3,
622 expires_at: expires_at
623 })
624
625 Mix.Tasks.Pleroma.Database.run(["ensure_expiration"])
626
627 assert_enqueued(
628 worker: Pleroma.Workers.PurgeExpiredActivity,
629 args: %{activity_id: activity1.id},
630 scheduled_at:
631 activity1.inserted_at
632 |> DateTime.from_naive!("Etc/UTC")
633 |> Timex.shift(days: 365)
634 )
635
636 assert_enqueued(
637 worker: Pleroma.Workers.PurgeExpiredActivity,
638 args: %{activity_id: activity2.id},
639 scheduled_at:
640 activity2.inserted_at
641 |> DateTime.from_naive!("Etc/UTC")
642 |> Timex.shift(days: 365)
643 )
644
645 assert_enqueued(
646 worker: Pleroma.Workers.PurgeExpiredActivity,
647 args: %{activity_id: activity_id3},
648 scheduled_at: expires_at
649 )
650 end
651 end
652 end