[#3213] Hashtag-filtering functions in ActivityPub. Mix task for migrating hashtags...
authorIvan Tashkinov <ivantashkinov@gmail.com>
Sat, 26 Dec 2020 19:20:55 +0000 (22:20 +0300)
committerIvan Tashkinov <ivantashkinov@gmail.com>
Sat, 26 Dec 2020 19:20:55 +0000 (22:20 +0300)
lib/mix/tasks/pleroma/database.ex
lib/pleroma/web/activity_pub/activity_pub.ex
test/pleroma/web/activity_pub/activity_pub_test.exs

index 22151ce08e4d6fef4c42c23f04490d15837656d3..093c7dd30de565c61fafcdf5839497daafa33820 100644 (file)
@@ -4,14 +4,18 @@
 
 defmodule Mix.Tasks.Pleroma.Database do
   alias Pleroma.Conversation
+  alias Pleroma.Hashtag
   alias Pleroma.Maintenance
   alias Pleroma.Object
   alias Pleroma.Repo
   alias Pleroma.User
+
   require Logger
   require Pleroma.Constants
+
   import Ecto.Query
   import Mix.Pleroma
+
   use Mix.Task
 
   @shortdoc "A collection of database related tasks"
@@ -128,6 +132,66 @@ defmodule Mix.Tasks.Pleroma.Database do
     |> Stream.run()
   end
 
+  def run(["transfer_hashtags"]) do
+    import Ecto.Query
+
+    start_pleroma()
+
+    from(
+      object in Object,
+      left_join: hashtag in assoc(object, :hashtags),
+      where: is_nil(hashtag.id),
+      where: fragment("(?)->>'tag' != '[]'", object.data),
+      select: %{
+        id: object.id,
+        inserted_at: object.inserted_at,
+        tag: fragment("(?)->>'tag'", object.data)
+      },
+      order_by: [desc: object.id]
+    )
+    |> Pleroma.Repo.chunk_stream(100, :batches)
+    |> Stream.each(fn objects ->
+      chunk_start = List.first(objects)
+      chunk_end = List.last(objects)
+
+      Logger.info(
+        "transfer_hashtags: " <>
+          "#{chunk_start.id} (#{chunk_start.inserted_at}) -- " <>
+          "#{chunk_end.id} (#{chunk_end.inserted_at})"
+      )
+
+      Enum.map(
+        objects,
+        fn object ->
+          hashtags =
+            object.tag
+            |> Jason.decode!()
+            |> Enum.filter(&is_bitstring(&1))
+
+          with {:ok, hashtag_records} <- Hashtag.get_or_create_by_names(hashtags) do
+            Repo.transaction(fn ->
+              for hashtag_record <- hashtag_records do
+                with {:error, _} <-
+                       Ecto.Adapters.SQL.query(
+                         Repo,
+                         "insert into hashtags_objects(hashtag_id, object_id) values " <>
+                           "(#{hashtag_record.id}, #{object.id});"
+                       ) do
+                  Logger.warn(
+                    "ERROR: could not link object #{object.id} and hashtag #{hashtag_record.id}"
+                  )
+                end
+              end
+            end)
+          else
+            e -> Logger.warn("ERROR: could not process object #{object.id}: #{inspect(e)}")
+          end
+        end
+      )
+    end)
+    |> Stream.run()
+  end
+
   def run(["vacuum", args]) do
     start_pleroma()
 
index 1c91bc07482b22ac3905512860537f5a009486d3..2e25412c6ba7cd4e723da8ca0eeae94bc588b425 100644 (file)
@@ -660,33 +660,41 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
   defp restrict_since(query, _), do: query
 
   defp restrict_tag_reject(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
-    raise "Can't use the child object without preloading!"
+    raise_on_missing_preload()
   end
 
-  defp restrict_tag_reject(query, %{tag_reject: [_ | _] = tag_reject}) do
+  defp restrict_tag_reject(query, %{tag_reject: tag_reject}) when is_list(tag_reject) do
     from(
       [_activity, object] in query,
       where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject)
     )
   end
 
+  defp restrict_tag_reject(query, %{tag_reject: tag_reject}) when is_binary(tag_reject) do
+    restrict_tag_reject(query, %{tag_reject: [tag_reject]})
+  end
+
   defp restrict_tag_reject(query, _), do: query
 
   defp restrict_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do
-    raise "Can't use the child object without preloading!"
+    raise_on_missing_preload()
   end
 
-  defp restrict_tag_all(query, %{tag_all: [_ | _] = tag_all}) do
+  defp restrict_tag_all(query, %{tag_all: tag_all}) when is_list(tag_all) do
     from(
       [_activity, object] in query,
       where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all)
     )
   end
 
+  defp restrict_tag_all(query, %{tag_all: tag}) when is_binary(tag) do
+    restrict_tag(query, %{tag: tag})
+  end
+
   defp restrict_tag_all(query, _), do: query
 
   defp restrict_tag(_query, %{tag: _tag, skip_preload: true}) do
-    raise "Can't use the child object without preloading!"
+    raise_on_missing_preload()
   end
 
   defp restrict_tag(query, %{tag: tag}) when is_list(tag) do
@@ -697,13 +705,79 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
   end
 
   defp restrict_tag(query, %{tag: tag}) when is_binary(tag) do
+    restrict_tag(query, %{tag: [tag]})
+  end
+
+  defp restrict_tag(query, _), do: query
+
+  defp restrict_hashtag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
+    raise_on_missing_preload()
+  end
+
+  defp restrict_hashtag_reject_any(query, %{tag_reject: tags_reject}) when is_list(tags_reject) do
+    if has_named_binding?(query, :thread_mute) do
+      from(
+        [activity, object, thread_mute] in query,
+        group_by: [activity.id, object.id, thread_mute.id]
+      )
+    else
+      from(
+        [activity, object] in query,
+        group_by: [activity.id, object.id]
+      )
+    end
+    |> join(:left, [_activity, object], hashtag in assoc(object, :hashtags), as: :hashtag)
+    |> having(
+      [hashtag: hashtag],
+      fragment("not(array_agg(?) && (?))", hashtag.name, ^tags_reject)
+    )
+  end
+
+  defp restrict_hashtag_reject_any(query, %{tag_reject: tag_reject}) when is_binary(tag_reject) do
+    restrict_hashtag_reject_any(query, %{tag_reject: [tag_reject]})
+  end
+
+  defp restrict_hashtag_reject_any(query, _), do: query
+
+  defp restrict_hashtag_all(_query, %{tag_all: _tag, skip_preload: true}) do
+    raise_on_missing_preload()
+  end
+
+  defp restrict_hashtag_all(query, %{tag_all: tags}) when is_list(tags) do
+    Enum.reduce(
+      tags,
+      query,
+      fn tag, acc -> restrict_hashtag_any(acc, %{tag: tag}) end
+    )
+  end
+
+  defp restrict_hashtag_all(query, %{tag_all: tag}) when is_binary(tag) do
+    restrict_hashtag_any(query, %{tag: tag})
+  end
+
+  defp restrict_hashtag_all(query, _), do: query
+
+  defp restrict_hashtag_any(_query, %{tag: _tag, skip_preload: true}) do
+    raise_on_missing_preload()
+  end
+
+  defp restrict_hashtag_any(query, %{tag: tags}) when is_list(tags) do
     from(
       [_activity, object] in query,
-      where: fragment("(?)->'tag' \\? (?)", object.data, ^tag)
+      join: hashtag in assoc(object, :hashtags),
+      where: hashtag.name in ^tags
     )
   end
 
-  defp restrict_tag(query, _), do: query
+  defp restrict_hashtag_any(query, %{tag: tag}) when is_binary(tag) do
+    restrict_hashtag_any(query, %{tag: [tag]})
+  end
+
+  defp restrict_hashtag_any(query, _), do: query
+
+  defp raise_on_missing_preload do
+    raise "Can't use the child object without preloading!"
+  end
 
   defp restrict_recipients(query, [], _user), do: query
 
@@ -1088,40 +1162,51 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
       skip_thread_containment: Config.get([:instance, :skip_thread_containment])
     }
 
-    Activity
-    |> maybe_preload_objects(opts)
-    |> maybe_preload_bookmarks(opts)
-    |> maybe_preload_report_notes(opts)
-    |> maybe_set_thread_muted_field(opts)
-    |> maybe_order(opts)
-    |> restrict_recipients(recipients, opts[:user])
-    |> restrict_replies(opts)
-    |> restrict_tag(opts)
-    |> restrict_tag_reject(opts)
-    |> restrict_tag_all(opts)
-    |> restrict_since(opts)
-    |> restrict_local(opts)
-    |> restrict_actor(opts)
-    |> restrict_type(opts)
-    |> restrict_state(opts)
-    |> restrict_favorited_by(opts)
-    |> restrict_blocked(restrict_blocked_opts)
-    |> restrict_muted(restrict_muted_opts)
-    |> restrict_filtered(opts)
-    |> restrict_media(opts)
-    |> restrict_visibility(opts)
-    |> restrict_thread_visibility(opts, config)
-    |> restrict_reblogs(opts)
-    |> restrict_pinned(opts)
-    |> restrict_muted_reblogs(restrict_muted_reblogs_opts)
-    |> restrict_instance(opts)
-    |> restrict_announce_object_actor(opts)
-    |> restrict_filtered(opts)
-    |> Activity.restrict_deactivated_users()
-    |> exclude_poll_votes(opts)
-    |> exclude_chat_messages(opts)
-    |> exclude_invisible_actors(opts)
-    |> exclude_visibility(opts)
+    query =
+      Activity
+      |> distinct([a], true)
+      |> maybe_preload_objects(opts)
+      |> maybe_preload_bookmarks(opts)
+      |> maybe_preload_report_notes(opts)
+      |> maybe_set_thread_muted_field(opts)
+      |> maybe_order(opts)
+      |> restrict_recipients(recipients, opts[:user])
+      |> restrict_replies(opts)
+      |> restrict_since(opts)
+      |> restrict_local(opts)
+      |> restrict_actor(opts)
+      |> restrict_type(opts)
+      |> restrict_state(opts)
+      |> restrict_favorited_by(opts)
+      |> restrict_blocked(restrict_blocked_opts)
+      |> restrict_muted(restrict_muted_opts)
+      |> restrict_filtered(opts)
+      |> restrict_media(opts)
+      |> restrict_visibility(opts)
+      |> restrict_thread_visibility(opts, config)
+      |> restrict_reblogs(opts)
+      |> restrict_pinned(opts)
+      |> restrict_muted_reblogs(restrict_muted_reblogs_opts)
+      |> restrict_instance(opts)
+      |> restrict_announce_object_actor(opts)
+      |> restrict_filtered(opts)
+      |> Activity.restrict_deactivated_users()
+      |> exclude_poll_votes(opts)
+      |> exclude_chat_messages(opts)
+      |> exclude_invisible_actors(opts)
+      |> exclude_visibility(opts)
+
+    if Config.get([:instance, :improved_hashtag_timeline]) do
+      query
+      |> restrict_hashtag_any(opts)
+      |> restrict_hashtag_all(opts)
+      |> restrict_hashtag_reject_any(opts)
+    else
+      query
+      |> restrict_tag(opts)
+      |> restrict_tag_reject(opts)
+      |> restrict_tag_all(opts)
+    end
   end
 
   def fetch_activities(recipients, opts \\ %{}, pagination \\ :keyset) do
index bfec32042db39c432ad10309a37a8c34e994d1d4..573b26d66d6c671cfbd6744db2beb96170e4dca1 100644 (file)
@@ -199,33 +199,37 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubTest do
     {:ok, status_two} = CommonAPI.post(user, %{status: ". #essais"})
     {:ok, status_three} = CommonAPI.post(user, %{status: ". #test #reject"})
 
-    fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"})
+    for new_timeline_enabled <- [true, false] do
+      clear_config([:instance, :improved_hashtag_timeline], new_timeline_enabled)
 
-    fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["test", "essais"]})
+      fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"})
 
-    fetch_three =
-      ActivityPub.fetch_activities([], %{
-        type: "Create",
-        tag: ["test", "essais"],
-        tag_reject: ["reject"]
-      })
+      fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["test", "essais"]})
 
-    fetch_four =
-      ActivityPub.fetch_activities([], %{
-        type: "Create",
-        tag: ["test"],
-        tag_all: ["test", "reject"]
-      })
+      fetch_three =
+        ActivityPub.fetch_activities([], %{
+          type: "Create",
+          tag: ["test", "essais"],
+          tag_reject: ["reject"]
+        })
 
-    [fetch_one, fetch_two, fetch_three, fetch_four] =
-      Enum.map([fetch_one, fetch_two, fetch_three, fetch_four], fn statuses ->
-        Enum.map(statuses, fn s -> Repo.preload(s, object: :hashtags) end)
-      end)
+      fetch_four =
+        ActivityPub.fetch_activities([], %{
+          type: "Create",
+          tag: ["test"],
+          tag_all: ["test", "reject"]
+        })
 
-    assert fetch_one == [status_one, status_three]
-    assert fetch_two == [status_one, status_two, status_three]
-    assert fetch_three == [status_one, status_two]
-    assert fetch_four == [status_three]
+      [fetch_one, fetch_two, fetch_three, fetch_four] =
+        Enum.map([fetch_one, fetch_two, fetch_three, fetch_four], fn statuses ->
+          Enum.map(statuses, fn s -> Repo.preload(s, object: :hashtags) end)
+        end)
+
+      assert fetch_one == [status_one, status_three]
+      assert fetch_two == [status_one, status_two, status_three]
+      assert fetch_three == [status_one, status_two]
+      assert fetch_four == [status_three]
+    end
   end
 
   describe "insertion" do