From: Ivan Tashkinov Date: Sun, 31 Jan 2021 17:38:58 +0000 (+0300) Subject: Merge remote-tracking branch 'remotes/origin/develop' into feature/object-hashtags... X-Git-Url: http://git.squeep.com/?a=commitdiff_plain;h=1b49b8efe57256b3f64b4b7e8a1de805ab030814;hp=7ac0a819811496fe512544e91b6f0ff3d15b8856;p=akkoma Merge remote-tracking branch 'remotes/origin/develop' into feature/object-hashtags-rework # Conflicts: # CHANGELOG.md --- diff --git a/CHANGELOG.md b/CHANGELOG.md index c4f3867a2..47c5078b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Admin API: Reports now ordered by newest - Deprecated `Pleroma.Uploaders.S3, :public_endpoint`. Now `Pleroma.Upload, :base_url` is the standard configuration key for all uploaders. - Improved Apache webserver support: updated sample configuration, MediaProxy cache invalidation verified with the included sample script +- Extracted object hashtags into separate table in order to improve hashtag timeline performance (via background migration in `Pleroma.Migrators.HashtagsTableMigrator`). ### Added diff --git a/config/config.exs b/config/config.exs index b9af250c5..36c609936 100644 --- a/config/config.exs +++ b/config/config.exs @@ -555,10 +555,12 @@ config :pleroma, Oban, remote_fetcher: 2, attachments_cleanup: 1, new_users_digest: 1, + hashtags_cleanup: 1, mute_expire: 5 ], plugins: [Oban.Plugins.Pruner], crontab: [ + {"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker}, {"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker}, {"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker} ] diff --git a/config/description.exs b/config/description.exs index f84b52a4f..ed3a534a0 100644 --- a/config/description.exs +++ b/config/description.exs @@ -941,6 +941,12 @@ config :pleroma, :config_description, [ key: :show_reactions, type: :boolean, description: "Let favourites and emoji reactions be viewed through the API." + }, + %{ + key: :improved_hashtag_timeline, + type: :keyword, + description: + "If `true`, hashtags will be fetched from `hashtags` table for hashtags timeline. When `false`, object-embedded hashtags will be used (slower). Is auto-set to `true` (unless overridden) when HashtagsTableMigrator completes." } ] }, @@ -1950,6 +1956,7 @@ config :pleroma, :config_description, [ type: {:list, :tuple}, description: "Settings for cron background jobs", suggestions: [ + {"0 1 * * *", Pleroma.Workers.Cron.HashtagsCleanupWorker}, {"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker}, {"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker} ] diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 6261910f0..4ddace9c9 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -8,10 +8,13 @@ defmodule Mix.Tasks.Pleroma.Database do alias Pleroma.Object alias Pleroma.Repo alias Pleroma.User + require Logger require Pleroma.Constants + import Ecto.Query import Mix.Pleroma + use Mix.Task @shortdoc "A collection of database related tasks" diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index 6542e684e..d59403884 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -113,6 +113,7 @@ defmodule Pleroma.Activity do from([a] in query, left_join: b in Bookmark, on: b.user_id == ^user.id and b.activity_id == a.id, + as: :bookmark, preload: [bookmark: b] ) end @@ -123,6 +124,7 @@ defmodule Pleroma.Activity do from([a] in query, left_join: r in ReportNote, on: a.id == r.activity_id, + as: :report_note, preload: [report_notes: r] ) end diff --git a/lib/pleroma/activity/ir/topics.ex b/lib/pleroma/activity/ir/topics.ex index d94395fc1..7a603a615 100644 --- a/lib/pleroma/activity/ir/topics.ex +++ b/lib/pleroma/activity/ir/topics.ex @@ -48,14 +48,12 @@ defmodule Pleroma.Activity.Ir.Topics do tags end - defp hashtags_to_topics(%{data: %{"tag" => tags}}) do - tags - |> Enum.filter(&is_bitstring(&1)) - |> Enum.map(fn tag -> "hashtag:" <> tag end) + defp hashtags_to_topics(object) do + object + |> Object.hashtags() + |> Enum.map(fn hashtag -> "hashtag:" <> hashtag end) end - defp hashtags_to_topics(_), do: [] - defp remote_topics(%{local: true}), do: [] defp remote_topics(%{actor: actor}) when is_binary(actor), diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index 9e262235e..d39451a9d 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -104,7 +104,8 @@ defmodule Pleroma.Application do chat_child(chat_enabled?()) ++ [ Pleroma.Web.Endpoint, - Pleroma.Gopher.Server + Pleroma.Gopher.Server, + Pleroma.Migrators.HashtagsTableMigrator ] # See http://elixir-lang.org/docs/stable/elixir/Supervisor.html diff --git a/lib/pleroma/config.ex b/lib/pleroma/config.ex index f17e14128..0a6ac0ad0 100644 --- a/lib/pleroma/config.ex +++ b/lib/pleroma/config.ex @@ -96,6 +96,9 @@ defmodule Pleroma.Config do end end + def improved_hashtag_timeline_path, do: [:instance, :improved_hashtag_timeline] + def improved_hashtag_timeline, do: get(improved_hashtag_timeline_path()) + def oauth_consumer_strategies, do: get([:auth, :oauth_consumer_strategies], []) def oauth_consumer_enabled?, do: oauth_consumer_strategies() != [] diff --git a/lib/pleroma/data_migration.ex b/lib/pleroma/data_migration.ex new file mode 100644 index 000000000..64fa155ff --- /dev/null +++ b/lib/pleroma/data_migration.ex @@ -0,0 +1,46 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.DataMigration do + use Ecto.Schema + + alias Pleroma.DataMigration + alias Pleroma.DataMigration.State + alias Pleroma.Repo + + import Ecto.Changeset + + schema "data_migrations" do + field(:name, :string) + field(:state, State, default: :pending) + field(:feature_lock, :boolean, default: false) + field(:params, :map, default: %{}) + field(:data, :map, default: %{}) + + timestamps() + end + + def changeset(data_migration, params \\ %{}) do + data_migration + |> cast(params, [:name, :state, :feature_lock, :params, :data]) + |> validate_required([:name]) + |> unique_constraint(:name) + end + + def update(data_migration, params \\ %{}) do + data_migration + |> changeset(params) + |> Repo.update() + end + + def update_state(data_migration, new_state) do + update(data_migration, %{state: new_state}) + end + + def get_by_name(name) do + Repo.get_by(DataMigration, name: name) + end + + def populate_hashtags_table, do: get_by_name("populate_hashtags_table") +end diff --git a/lib/pleroma/delivery.ex b/lib/pleroma/delivery.ex index e8d536767..511d5cf58 100644 --- a/lib/pleroma/delivery.ex +++ b/lib/pleroma/delivery.ex @@ -9,7 +9,6 @@ defmodule Pleroma.Delivery do alias Pleroma.Object alias Pleroma.Repo alias Pleroma.User - alias Pleroma.User import Ecto.Changeset import Ecto.Query diff --git a/lib/pleroma/ecto_enums.ex b/lib/pleroma/ecto_enums.ex index f198cccb7..2a9addabc 100644 --- a/lib/pleroma/ecto_enums.ex +++ b/lib/pleroma/ecto_enums.ex @@ -17,3 +17,11 @@ defenum(Pleroma.FollowingRelationship.State, follow_accept: 2, follow_reject: 3 ) + +defenum(Pleroma.DataMigration.State, + pending: 1, + running: 2, + complete: 3, + failed: 4, + manual: 5 +) diff --git a/lib/pleroma/hashtag.ex b/lib/pleroma/hashtag.ex new file mode 100644 index 000000000..b05927563 --- /dev/null +++ b/lib/pleroma/hashtag.ex @@ -0,0 +1,58 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2020 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Hashtag do + use Ecto.Schema + + import Ecto.Changeset + + alias Pleroma.Hashtag + alias Pleroma.Repo + + @derive {Jason.Encoder, only: [:data]} + + schema "hashtags" do + field(:name, :string) + field(:data, :map, default: %{}) + + many_to_many(:objects, Pleroma.Object, join_through: "hashtags_objects", on_replace: :delete) + + timestamps() + end + + def get_by_name(name) do + Repo.get_by(Hashtag, name: name) + end + + def get_or_create_by_name(name) when is_bitstring(name) do + with %Hashtag{} = hashtag <- get_by_name(name) do + {:ok, hashtag} + else + _ -> + %Hashtag{} + |> changeset(%{name: name}) + |> Repo.insert() + end + end + + def get_or_create_by_names(names) when is_list(names) do + Enum.reduce_while(names, {:ok, []}, fn name, {:ok, list} -> + case get_or_create_by_name(name) do + {:ok, %Hashtag{} = hashtag} -> + {:cont, {:ok, list ++ [hashtag]}} + + error -> + {:halt, error} + end + end) + end + + def changeset(%Hashtag{} = struct, params) do + struct + |> cast(params, [:name, :data]) + |> update_change(:name, &String.downcase/1) + |> validate_required([:name]) + |> unique_constraint(:name) + end +end diff --git a/lib/pleroma/migrators/hashtags_table_migrator.ex b/lib/pleroma/migrators/hashtags_table_migrator.ex new file mode 100644 index 000000000..07b42a7f4 --- /dev/null +++ b/lib/pleroma/migrators/hashtags_table_migrator.ex @@ -0,0 +1,309 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Migrators.HashtagsTableMigrator do + use GenServer + + require Logger + + import Ecto.Query + + alias __MODULE__.State + alias Pleroma.Config + alias Pleroma.DataMigration + alias Pleroma.Hashtag + alias Pleroma.Object + alias Pleroma.Repo + + defdelegate state(), to: State, as: :get + defdelegate put_stat(key, value), to: State, as: :put + defdelegate increment_stat(key, increment), to: State, as: :increment + + defdelegate data_migration(), to: DataMigration, as: :populate_hashtags_table + + @reg_name {:global, __MODULE__} + + def whereis, do: GenServer.whereis(@reg_name) + + def start_link(_) do + case whereis() do + nil -> + GenServer.start_link(__MODULE__, nil, name: @reg_name) + + pid -> + {:ok, pid} + end + end + + @impl true + def init(_) do + {:ok, nil, {:continue, :init_state}} + end + + @impl true + def handle_continue(:init_state, _state) do + {:ok, _} = State.start_link(nil) + + update_status(:init) + + data_migration = data_migration() + manual_migrations = Config.get([:instance, :manual_data_migrations], []) + + cond do + Config.get(:env) == :test -> + update_status(:noop) + + is_nil(data_migration) -> + update_status(:halt, "Data migration does not exist.") + + data_migration.state == :manual or data_migration.name in manual_migrations -> + update_status(:noop, "Data migration is in manual execution state.") + + data_migration.state == :complete -> + handle_success(data_migration) + + true -> + send(self(), :migrate_hashtags) + end + + {:noreply, nil} + end + + @impl true + def handle_info(:migrate_hashtags, state) do + State.clear() + + data_migration = data_migration() + + persistent_data = Map.take(data_migration.data, ["max_processed_id"]) + + {:ok, data_migration} = + DataMigration.update(data_migration, %{state: :running, data: persistent_data}) + + update_status(:running) + put_stat(:started_at, NaiveDateTime.utc_now()) + + Logger.info("Starting transferring object embedded hashtags to `hashtags` table...") + + max_processed_id = data_migration.data["max_processed_id"] || 0 + + query() + |> where([object], object.id > ^max_processed_id) + |> Repo.chunk_stream(100, :batches, timeout: :infinity) + |> Stream.each(fn objects -> + object_ids = Enum.map(objects, & &1.id) + + failed_ids = + objects + |> Enum.map(&transfer_object_hashtags(&1)) + |> Enum.filter(&(elem(&1, 0) == :error)) + |> Enum.map(&elem(&1, 1)) + + for failed_id <- failed_ids do + _ = + Repo.query( + "INSERT INTO data_migration_failed_ids(data_migration_id, record_id) " <> + "VALUES ($1, $2) ON CONFLICT DO NOTHING;", + [data_migration.id, failed_id] + ) + end + + _ = + Repo.query( + "DELETE FROM data_migration_failed_ids " <> + "WHERE data_migration_id = $1 AND record_id = ANY($2)", + [data_migration.id, object_ids -- failed_ids] + ) + + max_object_id = Enum.at(object_ids, -1) + + put_stat(:max_processed_id, max_object_id) + increment_stat(:processed_count, length(object_ids)) + increment_stat(:failed_count, length(failed_ids)) + + put_stat( + :records_per_second, + state()[:processed_count] / + Enum.max([NaiveDateTime.diff(NaiveDateTime.utc_now(), state()[:started_at]), 1]) + ) + + persist_stats(data_migration) + + # A quick and dirty approach to controlling the load this background migration imposes + sleep_interval = Config.get([:populate_hashtags_table, :sleep_interval_ms], 0) + Process.sleep(sleep_interval) + end) + |> Stream.run() + + with 0 <- failures_count(data_migration.id) do + {:ok, data_migration} = DataMigration.update_state(data_migration, :complete) + + handle_success(data_migration) + else + _ -> + _ = DataMigration.update_state(data_migration, :failed) + + update_status(:failed, "Please check data_migration_failed_ids records.") + end + + {:noreply, state} + end + + defp query do + # Note: most objects have Mention-type AS2 tags and no hashtags (but we can't filter them out) + # Note: not checking activity type; HashtagsCleanupWorker should clean up unused records later + from( + object in Object, + where: + fragment("(?)->'tag' IS NOT NULL AND (?)->'tag' != '[]'::jsonb", object.data, object.data), + select: %{ + id: object.id, + tag: fragment("(?)->'tag'", object.data) + } + ) + |> join(:left, [o], hashtags_objects in fragment("SELECT object_id FROM hashtags_objects"), + on: hashtags_objects.object_id == o.id + ) + |> where([_o, hashtags_objects], is_nil(hashtags_objects.object_id)) + end + + defp transfer_object_hashtags(object) do + embedded_tags = if Map.has_key?(object, :tag), do: object.tag, else: object.data["tag"] + hashtags = Object.object_data_hashtags(%{"tag" => embedded_tags}) + + if Enum.any?(hashtags) do + transfer_object_hashtags(object, hashtags) + else + {:ok, object.id} + end + end + + defp transfer_object_hashtags(object, hashtags) do + Repo.transaction(fn -> + with {:ok, hashtag_records} <- Hashtag.get_or_create_by_names(hashtags) do + for hashtag_record <- hashtag_records do + with {:ok, _} <- + Repo.query( + "insert into hashtags_objects(hashtag_id, object_id) values ($1, $2);", + [hashtag_record.id, object.id] + ) do + nil + else + {:error, e} -> + error = + "ERROR: could not link object #{object.id} and hashtag " <> + "#{hashtag_record.id}: #{inspect(e)}" + + Logger.error(error) + Repo.rollback(object.id) + end + end + + object.id + else + e -> + error = "ERROR: could not create hashtags for object #{object.id}: #{inspect(e)}" + Logger.error(error) + Repo.rollback(object.id) + end + end) + end + + @doc "Approximate count for current iteration (including processed records count)" + def count(force \\ false, timeout \\ :infinity) do + stored_count = state()[:count] + + if stored_count && !force do + stored_count + else + processed_count = state()[:processed_count] || 0 + max_processed_id = data_migration().data["max_processed_id"] || 0 + query = where(query(), [object], object.id > ^max_processed_id) + + count = Repo.aggregate(query, :count, :id, timeout: timeout) + processed_count + put_stat(:count, count) + count + end + end + + defp persist_stats(data_migration) do + runner_state = Map.drop(state(), [:status]) + _ = DataMigration.update(data_migration, %{data: runner_state}) + end + + defp handle_success(data_migration) do + update_status(:complete) + + cond do + data_migration.feature_lock -> + :noop + + not is_nil(Config.improved_hashtag_timeline()) -> + :noop + + true -> + Config.put(Config.improved_hashtag_timeline_path(), true) + :ok + end + end + + def failed_objects_query do + from(o in Object) + |> join(:inner, [o], dmf in fragment("SELECT * FROM data_migration_failed_ids"), + on: dmf.record_id == o.id + ) + |> where([_o, dmf], dmf.data_migration_id == ^data_migration().id) + |> order_by([o], asc: o.id) + end + + def failures_count(data_migration_id \\ nil) do + data_migration_id = data_migration_id || data_migration().id + + with {:ok, %{rows: [[count]]}} <- + Repo.query( + "SELECT COUNT(record_id) FROM data_migration_failed_ids WHERE data_migration_id = $1;", + [data_migration_id] + ) do + count + end + end + + def retry_failed do + data_migration = data_migration() + + failed_objects_query() + |> Repo.chunk_stream(100, :one) + |> Stream.each(fn object -> + with {:ok, _} <- transfer_object_hashtags(object) do + _ = + Repo.query( + "DELETE FROM data_migration_failed_ids " <> + "WHERE data_migration_id = $1 AND record_id = $2", + [data_migration.id, object.id] + ) + end + end) + |> Stream.run() + end + + def force_continue do + send(whereis(), :migrate_hashtags) + end + + def force_restart do + {:ok, _} = DataMigration.update(data_migration(), %{state: :pending, data: %{}}) + force_continue() + end + + def force_complete do + {:ok, data_migration} = DataMigration.update_state(data_migration(), :complete) + + handle_success(data_migration) + end + + defp update_status(status, message \\ nil) do + put_stat(:status, status) + put_stat(:message, message) + end +end diff --git a/lib/pleroma/migrators/hashtags_table_migrator/state.ex b/lib/pleroma/migrators/hashtags_table_migrator/state.ex new file mode 100644 index 000000000..901563426 --- /dev/null +++ b/lib/pleroma/migrators/hashtags_table_migrator/state.ex @@ -0,0 +1,35 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Migrators.HashtagsTableMigrator.State do + use Agent + + @init_state %{} + @reg_name {:global, __MODULE__} + + def start_link(_) do + Agent.start_link(fn -> @init_state end, name: @reg_name) + end + + def clear do + Agent.update(@reg_name, fn _state -> @init_state end) + end + + def get do + Agent.get(@reg_name, & &1) + end + + def put(key, value) do + Agent.update(@reg_name, fn state -> + Map.put(state, key, value) + end) + end + + def increment(key, increment \\ 1) do + Agent.update(@reg_name, fn state -> + updated_value = (state[key] || 0) + increment + Map.put(state, key, updated_value) + end) + end +end diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex index aaf123840..52b77e41c 100644 --- a/lib/pleroma/object.ex +++ b/lib/pleroma/object.ex @@ -10,6 +10,7 @@ defmodule Pleroma.Object do alias Pleroma.Activity alias Pleroma.Config + alias Pleroma.Hashtag alias Pleroma.Object alias Pleroma.Object.Fetcher alias Pleroma.ObjectTombstone @@ -28,6 +29,8 @@ defmodule Pleroma.Object do schema "objects" do field(:data, :map) + many_to_many(:hashtags, Hashtag, join_through: "hashtags_objects", on_replace: :delete) + timestamps() end @@ -49,7 +52,8 @@ defmodule Pleroma.Object do end def create(data) do - Object.change(%Object{}, %{data: data}) + %Object{} + |> Object.change(%{data: data}) |> Repo.insert() end @@ -58,8 +62,38 @@ defmodule Pleroma.Object do |> cast(params, [:data]) |> validate_required([:data]) |> unique_constraint(:ap_id, name: :objects_unique_apid_index) + |> maybe_handle_hashtags_change(struct) + end + + # Note: not checking activity type; HashtagsCleanupWorker should clean up unused records later + defp maybe_handle_hashtags_change(changeset, struct) do + with data_hashtags_change = get_change(changeset, :data), + true <- hashtags_changed?(struct, data_hashtags_change), + {:ok, hashtag_records} <- + data_hashtags_change + |> object_data_hashtags() + |> Hashtag.get_or_create_by_names() do + put_assoc(changeset, :hashtags, hashtag_records) + else + false -> + changeset + + {:error, hashtag_changeset} -> + failed_hashtag = get_field(hashtag_changeset, :name) + + validate_change(changeset, :data, fn _, _ -> + [data: "error referencing hashtag: #{failed_hashtag}"] + end) + end + end + + defp hashtags_changed?(%Object{} = struct, %{"tag" => _} = data) do + Enum.sort(embedded_hashtags(struct)) != + Enum.sort(object_data_hashtags(data)) end + defp hashtags_changed?(_, _), do: false + def get_by_id(nil), do: nil def get_by_id(id), do: Repo.get(Object, id) @@ -349,4 +383,39 @@ defmodule Pleroma.Object do def self_replies(object, opts \\ []), do: replies(object, Keyword.put(opts, :self_only, true)) + + def tags(%Object{data: %{"tag" => tags}}) when is_list(tags), do: tags + + def tags(_), do: [] + + def hashtags(%Object{} = object) do + # Note: always using embedded hashtags regardless whether they are migrated to hashtags table + # (embedded hashtags stay in sync anyways, and we avoid extra joins and preload hassle) + embedded_hashtags(object) + end + + def embedded_hashtags(%Object{data: data}) do + object_data_hashtags(data) + end + + def embedded_hashtags(_), do: [] + + def object_data_hashtags(%{"tag" => tags}) when is_list(tags) do + tags + |> Enum.filter(fn + %{"type" => "Hashtag"} = data -> Map.has_key?(data, "name") + plain_text when is_bitstring(plain_text) -> true + _ -> false + end) + |> Enum.map(fn + %{"name" => "#" <> hashtag} -> String.downcase(hashtag) + %{"name" => hashtag} -> String.downcase(hashtag) + hashtag when is_bitstring(hashtag) -> String.downcase(hashtag) + end) + |> Enum.uniq() + # Note: "" elements (plain text) might occur in `data.tag` for incoming objects + |> Enum.filter(&(&1 not in [nil, ""])) + end + + def object_data_hashtags(_), do: [] end diff --git a/lib/pleroma/repo.ex b/lib/pleroma/repo.ex index 4556352d0..61b64ed3e 100644 --- a/lib/pleroma/repo.ex +++ b/lib/pleroma/repo.ex @@ -63,8 +63,8 @@ defmodule Pleroma.Repo do iex> Pleroma.Repo.chunk_stream(Pleroma.Activity.Queries.by_actor(ap_id), 500, :batches) """ @spec chunk_stream(Ecto.Query.t(), integer(), atom()) :: Enumerable.t() - def chunk_stream(query, chunk_size, returns_as \\ :one) do - # We don't actually need start and end funcitons of resource streaming, + def chunk_stream(query, chunk_size, returns_as \\ :one, query_options \\ []) do + # We don't actually need start and end functions of resource streaming, # but it seems to be the only way to not fetch records one-by-one and # have individual records be the elements of the stream, instead of # lists of records @@ -76,7 +76,7 @@ defmodule Pleroma.Repo do |> order_by(asc: :id) |> where([r], r.id > ^last_id) |> limit(^chunk_size) - |> all() + |> all(query_options) |> case do [] -> {:halt, last_id} diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index d0bb07aab..cda8d3f54 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -669,51 +669,141 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do defp restrict_since(query, _), do: query - defp restrict_tag_reject(_query, %{tag_reject: _tag_reject, skip_preload: true}) do - raise "Can't use the child object without preloading!" + defp restrict_embedded_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do + raise_on_missing_preload() + end + + defp restrict_embedded_tag_all(query, %{tag_all: tag_all}) when is_list(tag_all) do + from( + [_activity, object] in query, + where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all) + ) + end + + defp restrict_embedded_tag_all(query, %{tag_all: tag}) when is_binary(tag) do + restrict_embedded_tag_any(query, %{tag: tag}) end - defp restrict_tag_reject(query, %{tag_reject: [_ | _] = tag_reject}) do + defp restrict_embedded_tag_all(query, _), do: query + + defp restrict_embedded_tag_any(_query, %{tag: _tag, skip_preload: true}) do + raise_on_missing_preload() + end + + defp restrict_embedded_tag_any(query, %{tag: tag}) when is_list(tag) do + from( + [_activity, object] in query, + where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag) + ) + end + + defp restrict_embedded_tag_any(query, %{tag: tag}) when is_binary(tag) do + restrict_embedded_tag_any(query, %{tag: [tag]}) + end + + defp restrict_embedded_tag_any(query, _), do: query + + defp restrict_embedded_tag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do + raise_on_missing_preload() + end + + defp restrict_embedded_tag_reject_any(query, %{tag_reject: tag_reject}) + when is_list(tag_reject) do from( [_activity, object] in query, where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject) ) end - defp restrict_tag_reject(query, _), do: query + defp restrict_embedded_tag_reject_any(query, %{tag_reject: tag_reject}) + when is_binary(tag_reject) do + restrict_embedded_tag_reject_any(query, %{tag_reject: [tag_reject]}) + end - defp restrict_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do - raise "Can't use the child object without preloading!" + defp restrict_embedded_tag_reject_any(query, _), do: query + + defp restrict_hashtag_all(_query, %{tag_all: _tag, skip_preload: true}) do + raise_on_missing_preload() end - defp restrict_tag_all(query, %{tag_all: [_ | _] = tag_all}) do + defp restrict_hashtag_all(query, %{tag_all: tags}) when is_list(tags) do from( [_activity, object] in query, - where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all) + where: + fragment( + """ + (SELECT array_agg(hashtags.name) FROM hashtags JOIN hashtags_objects + ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?) + AND hashtags_objects.object_id = ?) @> ? + """, + ^tags, + object.id, + ^tags + ) ) end - defp restrict_tag_all(query, _), do: query + defp restrict_hashtag_all(query, %{tag_all: tag}) when is_binary(tag) do + restrict_hashtag_any(query, %{tag: tag}) + end + + defp restrict_hashtag_all(query, _), do: query - defp restrict_tag(_query, %{tag: _tag, skip_preload: true}) do - raise "Can't use the child object without preloading!" + defp restrict_hashtag_any(_query, %{tag: _tag, skip_preload: true}) do + raise_on_missing_preload() end - defp restrict_tag(query, %{tag: tag}) when is_list(tag) do + defp restrict_hashtag_any(query, %{tag: tags}) when is_list(tags) do from( [_activity, object] in query, - where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag) + where: + fragment( + """ + EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects + ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?) + AND hashtags_objects.object_id = ? LIMIT 1) + """, + ^tags, + object.id + ) ) end - defp restrict_tag(query, %{tag: tag}) when is_binary(tag) do + defp restrict_hashtag_any(query, %{tag: tag}) when is_binary(tag) do + restrict_hashtag_any(query, %{tag: [tag]}) + end + + defp restrict_hashtag_any(query, _), do: query + + defp restrict_hashtag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do + raise_on_missing_preload() + end + + defp restrict_hashtag_reject_any(query, %{tag_reject: tags_reject}) when is_list(tags_reject) do from( [_activity, object] in query, - where: fragment("(?)->'tag' \\? (?)", object.data, ^tag) + where: + fragment( + """ + NOT EXISTS (SELECT 1 FROM hashtags JOIN hashtags_objects + ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?) + AND hashtags_objects.object_id = ? LIMIT 1) + """, + ^tags_reject, + object.id + ) ) end - defp restrict_tag(query, _), do: query + defp restrict_hashtag_reject_any(query, %{tag_reject: tag_reject}) when is_binary(tag_reject) do + restrict_hashtag_reject_any(query, %{tag_reject: [tag_reject]}) + end + + defp restrict_hashtag_reject_any(query, _), do: query + + defp raise_on_missing_preload do + raise "Can't use the child object without preloading!" + end defp restrict_recipients(query, [], _user), do: query @@ -1098,40 +1188,50 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do skip_thread_containment: Config.get([:instance, :skip_thread_containment]) } - Activity - |> maybe_preload_objects(opts) - |> maybe_preload_bookmarks(opts) - |> maybe_preload_report_notes(opts) - |> maybe_set_thread_muted_field(opts) - |> maybe_order(opts) - |> restrict_recipients(recipients, opts[:user]) - |> restrict_replies(opts) - |> restrict_tag(opts) - |> restrict_tag_reject(opts) - |> restrict_tag_all(opts) - |> restrict_since(opts) - |> restrict_local(opts) - |> restrict_actor(opts) - |> restrict_type(opts) - |> restrict_state(opts) - |> restrict_favorited_by(opts) - |> restrict_blocked(restrict_blocked_opts) - |> restrict_muted(restrict_muted_opts) - |> restrict_filtered(opts) - |> restrict_media(opts) - |> restrict_visibility(opts) - |> restrict_thread_visibility(opts, config) - |> restrict_reblogs(opts) - |> restrict_pinned(opts) - |> restrict_muted_reblogs(restrict_muted_reblogs_opts) - |> restrict_instance(opts) - |> restrict_announce_object_actor(opts) - |> restrict_filtered(opts) - |> Activity.restrict_deactivated_users() - |> exclude_poll_votes(opts) - |> exclude_chat_messages(opts) - |> exclude_invisible_actors(opts) - |> exclude_visibility(opts) + query = + Activity + |> maybe_preload_objects(opts) + |> maybe_preload_bookmarks(opts) + |> maybe_preload_report_notes(opts) + |> maybe_set_thread_muted_field(opts) + |> maybe_order(opts) + |> restrict_recipients(recipients, opts[:user]) + |> restrict_replies(opts) + |> restrict_since(opts) + |> restrict_local(opts) + |> restrict_actor(opts) + |> restrict_type(opts) + |> restrict_state(opts) + |> restrict_favorited_by(opts) + |> restrict_blocked(restrict_blocked_opts) + |> restrict_muted(restrict_muted_opts) + |> restrict_filtered(opts) + |> restrict_media(opts) + |> restrict_visibility(opts) + |> restrict_thread_visibility(opts, config) + |> restrict_reblogs(opts) + |> restrict_pinned(opts) + |> restrict_muted_reblogs(restrict_muted_reblogs_opts) + |> restrict_instance(opts) + |> restrict_announce_object_actor(opts) + |> restrict_filtered(opts) + |> Activity.restrict_deactivated_users() + |> exclude_poll_votes(opts) + |> exclude_chat_messages(opts) + |> exclude_invisible_actors(opts) + |> exclude_visibility(opts) + + if Config.improved_hashtag_timeline() do + query + |> restrict_hashtag_any(opts) + |> restrict_hashtag_all(opts) + |> restrict_hashtag_reject_any(opts) + else + query + |> restrict_embedded_tag_any(opts) + |> restrict_embedded_tag_all(opts) + |> restrict_embedded_tag_reject_any(opts) + end end def fetch_activities(recipients, opts \\ %{}, pagination \\ :keyset) do diff --git a/lib/pleroma/web/activity_pub/mrf/simple_policy.ex b/lib/pleroma/web/activity_pub/mrf/simple_policy.ex index bb3838d2c..0b1be8c51 100644 --- a/lib/pleroma/web/activity_pub/mrf/simple_policy.ex +++ b/lib/pleroma/web/activity_pub/mrf/simple_policy.ex @@ -74,9 +74,11 @@ defmodule Pleroma.Web.ActivityPub.MRF.SimplePolicy do object = if MRF.subdomain_match?(media_nsfw, actor_host) do - tags = (child_object["tag"] || []) ++ ["nsfw"] - child_object = Map.put(child_object, "tag", tags) - child_object = Map.put(child_object, "sensitive", true) + child_object = + child_object + |> Map.put("tag", (child_object["tag"] || []) ++ ["nsfw"]) + |> Map.put("sensitive", true) + Map.put(object, "object", child_object) else object diff --git a/lib/pleroma/web/activity_pub/transmogrifier.ex b/lib/pleroma/web/activity_pub/transmogrifier.ex index 4d9a5617e..0a701334f 100644 --- a/lib/pleroma/web/activity_pub/transmogrifier.ex +++ b/lib/pleroma/web/activity_pub/transmogrifier.ex @@ -32,18 +32,18 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier do """ def fix_object(object, options \\ []) do object - |> strip_internal_fields - |> fix_actor - |> fix_url - |> fix_attachments - |> fix_context + |> strip_internal_fields() + |> fix_actor() + |> fix_url() + |> fix_attachments() + |> fix_context() |> fix_in_reply_to(options) - |> fix_emoji - |> fix_tag - |> set_sensitive - |> fix_content_map - |> fix_addressing - |> fix_summary + |> fix_emoji() + |> fix_tag() + |> set_sensitive() + |> fix_content_map() + |> fix_addressing() + |> fix_summary() |> fix_type(options) end @@ -315,10 +315,9 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier do tags = tag |> Enum.filter(fn data -> data["type"] == "Hashtag" and data["name"] end) - |> Enum.map(fn %{"name" => name} -> - name - |> String.slice(1..-1) - |> String.downcase() + |> Enum.map(fn + %{"name" => "#" <> hashtag} -> String.downcase(hashtag) + %{"name" => hashtag} -> String.downcase(hashtag) end) Map.put(object, "tag", tag ++ tags) diff --git a/lib/pleroma/web/feed/feed_view.ex b/lib/pleroma/web/feed/feed_view.ex index df97d2f46..66940f311 100644 --- a/lib/pleroma/web/feed/feed_view.ex +++ b/lib/pleroma/web/feed/feed_view.ex @@ -32,6 +32,7 @@ defmodule Pleroma.Web.Feed.FeedView do %{ activity: activity, + object: object, data: Map.get(object, :data), actor: actor } diff --git a/lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex b/lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex index 08e6f23b9..1fb954a9b 100644 --- a/lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex @@ -134,9 +134,9 @@ defmodule Pleroma.Web.MastodonAPI.TimelineController do tags = [params[:tag], params[:any]] |> List.flatten() - |> Enum.uniq() |> Enum.reject(&is_nil/1) |> Enum.map(&String.downcase/1) + |> Enum.uniq() tag_all = params diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index 2cd6732fe..bbe0b11ec 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -201,8 +201,10 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do like_count = object.data["like_count"] || 0 announcement_count = object.data["announcement_count"] || 0 - tags = object.data["tag"] || [] - sensitive = object.data["sensitive"] || Enum.member?(tags, "nsfw") + hashtags = Object.hashtags(object) + sensitive = object.data["sensitive"] || Enum.member?(hashtags, "nsfw") + + tags = Object.tags(object) tag_mentions = tags diff --git a/lib/pleroma/web/templates/feed/feed/_activity.atom.eex b/lib/pleroma/web/templates/feed/feed/_activity.atom.eex index 3fd150c4e..6688830ba 100644 --- a/lib/pleroma/web/templates/feed/feed/_activity.atom.eex +++ b/lib/pleroma/web/templates/feed/feed/_activity.atom.eex @@ -22,7 +22,7 @@ <% end %> - <%= for tag <- @data["tag"] || [] do %> + <%= for tag <- Pleroma.Object.hashtags(@object) do %> <% end %> diff --git a/lib/pleroma/web/templates/feed/feed/_activity.rss.eex b/lib/pleroma/web/templates/feed/feed/_activity.rss.eex index 42960de7d..fc6d74b42 100644 --- a/lib/pleroma/web/templates/feed/feed/_activity.rss.eex +++ b/lib/pleroma/web/templates/feed/feed/_activity.rss.eex @@ -21,7 +21,7 @@ <%= @data["external_url"] %> <% end %> - <%= for tag <- @data["tag"] || [] do %> + <%= for tag <- Pleroma.Object.hashtags(@object) do %> <% end %> diff --git a/lib/pleroma/web/templates/feed/feed/_tag_activity.atom.eex b/lib/pleroma/web/templates/feed/feed/_tag_activity.atom.eex index cf5874a91..c2de28fe4 100644 --- a/lib/pleroma/web/templates/feed/feed/_tag_activity.atom.eex +++ b/lib/pleroma/web/templates/feed/feed/_tag_activity.atom.eex @@ -41,7 +41,7 @@ <% end %> <% end %> - <%= for tag <- @data["tag"] || [] do %> + <%= for tag <- Pleroma.Object.hashtags(@object) do %> <% end %> diff --git a/lib/pleroma/workers/cron/hashtags_cleanup_worker.ex b/lib/pleroma/workers/cron/hashtags_cleanup_worker.ex new file mode 100644 index 000000000..b319067ca --- /dev/null +++ b/lib/pleroma/workers/cron/hashtags_cleanup_worker.ex @@ -0,0 +1,57 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Workers.Cron.HashtagsCleanupWorker do + @moduledoc """ + The worker to clean up unused hashtags_objects and hashtags. + """ + + use Oban.Worker, queue: "hashtags_cleanup" + + alias Pleroma.Repo + + require Logger + + @hashtags_objects_query """ + DELETE FROM hashtags_objects WHERE object_id IN + (SELECT DISTINCT objects.id FROM objects + JOIN hashtags_objects ON hashtags_objects.object_id = objects.id LEFT JOIN activities + ON COALESCE(activities.data->'object'->>'id', activities.data->>'object') = + (objects.data->>'id') + AND activities.data->>'type' = 'Create' + WHERE activities.id IS NULL); + """ + + @hashtags_query """ + DELETE FROM hashtags WHERE id IN + (SELECT hashtags.id FROM hashtags + LEFT OUTER JOIN hashtags_objects + ON hashtags_objects.hashtag_id = hashtags.id + WHERE hashtags_objects.hashtag_id IS NULL AND hashtags.inserted_at < $1); + """ + + @impl Oban.Worker + def perform(_job) do + Logger.info("Cleaning up unused `hashtags_objects` records...") + + {:ok, %{num_rows: hashtags_objects_count}} = + Repo.query(@hashtags_objects_query, [], timeout: :infinity) + + Logger.info("Deleted #{hashtags_objects_count} unused `hashtags_objects` records.") + + Logger.info("Cleaning up unused `hashtags` records...") + + # Note: ignoring recently created hashtags since references are added after hashtag is created + {:ok, %{num_rows: hashtags_count}} = + Repo.query(@hashtags_query, [NaiveDateTime.add(NaiveDateTime.utc_now(), -3600 * 24)], + timeout: :infinity + ) + + Logger.info("Deleted #{hashtags_count} unused `hashtags` records.") + + Logger.info("HashtagsCleanupWorker complete.") + + :ok + end +end diff --git a/priv/repo/migrations/20190711042021_create_safe_jsonb_set.exs b/priv/repo/migrations/20190711042021_create_safe_jsonb_set.exs index 43d616705..bfac09f9e 100644 --- a/priv/repo/migrations/20190711042021_create_safe_jsonb_set.exs +++ b/priv/repo/migrations/20190711042021_create_safe_jsonb_set.exs @@ -9,7 +9,7 @@ defmodule Pleroma.Repo.Migrations.CreateSafeJsonbSet do begin result := jsonb_set(target, path, coalesce(new_value, 'null'::jsonb), create_missing); if result is NULL then - raise 'jsonb_set tried to wipe the object, please report this incindent to Pleroma bug tracker. https://git.pleroma.social/pleroma/pleroma/issues/new'; + raise 'jsonb_set tried to wipe the object, please report this incident to Pleroma bug tracker. https://git.pleroma.social/pleroma/pleroma/issues/new'; return target; else return result; diff --git a/priv/repo/migrations/20201221202251_create_hashtags.exs b/priv/repo/migrations/20201221202251_create_hashtags.exs new file mode 100644 index 000000000..afc522002 --- /dev/null +++ b/priv/repo/migrations/20201221202251_create_hashtags.exs @@ -0,0 +1,14 @@ +defmodule Pleroma.Repo.Migrations.CreateHashtags do + use Ecto.Migration + + def change do + create_if_not_exists table(:hashtags) do + add(:name, :citext, null: false) + add(:data, :map, default: %{}) + + timestamps() + end + + create_if_not_exists(unique_index(:hashtags, [:name])) + end +end diff --git a/priv/repo/migrations/20201221203824_create_hashtags_objects.exs b/priv/repo/migrations/20201221203824_create_hashtags_objects.exs new file mode 100644 index 000000000..214ea81c3 --- /dev/null +++ b/priv/repo/migrations/20201221203824_create_hashtags_objects.exs @@ -0,0 +1,13 @@ +defmodule Pleroma.Repo.Migrations.CreateHashtagsObjects do + use Ecto.Migration + + def change do + create_if_not_exists table(:hashtags_objects, primary_key: false) do + add(:hashtag_id, references(:hashtags), null: false) + add(:object_id, references(:objects), null: false) + end + + create_if_not_exists(unique_index(:hashtags_objects, [:hashtag_id, :object_id])) + create_if_not_exists(index(:hashtags_objects, [:object_id])) + end +end diff --git a/priv/repo/migrations/20210105195018_create_data_migrations.exs b/priv/repo/migrations/20210105195018_create_data_migrations.exs new file mode 100644 index 000000000..5f2e8d96c --- /dev/null +++ b/priv/repo/migrations/20210105195018_create_data_migrations.exs @@ -0,0 +1,17 @@ +defmodule Pleroma.Repo.Migrations.CreateDataMigrations do + use Ecto.Migration + + def change do + create_if_not_exists table(:data_migrations) do + add(:name, :string, null: false) + add(:state, :integer, default: 1) + add(:feature_lock, :boolean, default: false) + add(:params, :map, default: %{}) + add(:data, :map, default: %{}) + + timestamps() + end + + create_if_not_exists(unique_index(:data_migrations, [:name])) + end +end diff --git a/priv/repo/migrations/20210106183301_data_migration_create_populate_hashtags_table.exs b/priv/repo/migrations/20210106183301_data_migration_create_populate_hashtags_table.exs new file mode 100644 index 000000000..2a965f075 --- /dev/null +++ b/priv/repo/migrations/20210106183301_data_migration_create_populate_hashtags_table.exs @@ -0,0 +1,14 @@ +defmodule Pleroma.Repo.Migrations.DataMigrationCreatePopulateHashtagsTable do + use Ecto.Migration + + def up do + dt = NaiveDateTime.utc_now() + + execute( + "INSERT INTO data_migrations(name, inserted_at, updated_at) " <> + "VALUES ('populate_hashtags_table', '#{dt}', '#{dt}') ON CONFLICT DO NOTHING;" + ) + end + + def down, do: :ok +end diff --git a/priv/repo/migrations/20210111172254_create_data_migration_failed_ids.exs b/priv/repo/migrations/20210111172254_create_data_migration_failed_ids.exs new file mode 100644 index 000000000..ba0be98af --- /dev/null +++ b/priv/repo/migrations/20210111172254_create_data_migration_failed_ids.exs @@ -0,0 +1,14 @@ +defmodule Pleroma.Repo.Migrations.CreateDataMigrationFailedIds do + use Ecto.Migration + + def change do + create_if_not_exists table(:data_migration_failed_ids, primary_key: false) do + add(:data_migration_id, references(:data_migrations), null: false) + add(:record_id, :bigint, null: false) + end + + create_if_not_exists( + unique_index(:data_migration_failed_ids, [:data_migration_id, :record_id]) + ) + end +end diff --git a/test/pleroma/activity/ir/topics_test.exs b/test/pleroma/activity/ir/topics_test.exs index 6b848e04d..9c8e5d932 100644 --- a/test/pleroma/activity/ir/topics_test.exs +++ b/test/pleroma/activity/ir/topics_test.exs @@ -11,6 +11,8 @@ defmodule Pleroma.Activity.Ir.TopicsTest do require Pleroma.Constants + import Mock + describe "poll answer" do test "produce no topics" do activity = %Activity{object: %Object{data: %{"type" => "Answer"}}} @@ -77,14 +79,13 @@ defmodule Pleroma.Activity.Ir.TopicsTest do refute Enum.member?(topics, "public:local:media") end - test "converts tags to hash tags", %{activity: %{object: %{data: data} = object} = activity} do - tagged_data = Map.put(data, "tag", ["foo", "bar"]) - activity = %{activity | object: %{object | data: tagged_data}} - - topics = Topics.get_activity_topics(activity) + test "converts tags to hash tags", %{activity: activity} do + with_mock(Object, [:passthrough], hashtags: fn _ -> ["foo", "bar"] end) do + topics = Topics.get_activity_topics(activity) - assert Enum.member?(topics, "hashtag:foo") - assert Enum.member?(topics, "hashtag:bar") + assert Enum.member?(topics, "hashtag:foo") + assert Enum.member?(topics, "hashtag:bar") + end end test "only converts strings to hash tags", %{ diff --git a/test/pleroma/hashtag_test.exs b/test/pleroma/hashtag_test.exs new file mode 100644 index 000000000..0264dea0b --- /dev/null +++ b/test/pleroma/hashtag_test.exs @@ -0,0 +1,17 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.HashtagTest do + use Pleroma.DataCase + + alias Pleroma.Hashtag + + describe "changeset validations" do + test "ensure non-blank :name" do + changeset = Hashtag.changeset(%Hashtag{}, %{name: ""}) + + assert {:name, {"can't be blank", [validation: :required]}} in changeset.errors + end + end +end diff --git a/test/pleroma/object_test.exs b/test/pleroma/object_test.exs index db7678d5d..8320660a5 100644 --- a/test/pleroma/object_test.exs +++ b/test/pleroma/object_test.exs @@ -5,10 +5,13 @@ defmodule Pleroma.ObjectTest do use Pleroma.DataCase use Oban.Testing, repo: Pleroma.Repo + import ExUnit.CaptureLog import Pleroma.Factory import Tesla.Mock + alias Pleroma.Activity + alias Pleroma.Hashtag alias Pleroma.Object alias Pleroma.Repo alias Pleroma.Tests.ObanHelpers @@ -417,4 +420,28 @@ defmodule Pleroma.ObjectTest do assert updated_object.data["like_count"] == 1 end end + + describe ":hashtags association" do + test "Hashtag records are created with Object record and updated on its change" do + user = insert(:user) + + {:ok, %{object: object}} = + CommonAPI.post(user, %{status: "some text #hashtag1 #hashtag2 ..."}) + + assert [%Hashtag{name: "hashtag1"}, %Hashtag{name: "hashtag2"}] = + Enum.sort_by(object.hashtags, & &1.name) + + {:ok, object} = Object.update_data(object, %{"tag" => []}) + + assert [] = object.hashtags + + object = Object.get_by_id(object.id) |> Repo.preload(:hashtags) + assert [] = object.hashtags + + {:ok, object} = Object.update_data(object, %{"tag" => ["abc", "def"]}) + + assert [%Hashtag{name: "abc"}, %Hashtag{name: "def"}] = + Enum.sort_by(object.hashtags, & &1.name) + end + end end diff --git a/test/pleroma/web/activity_pub/activity_pub_test.exs b/test/pleroma/web/activity_pub/activity_pub_test.exs index f4023856c..5b9fc061e 100644 --- a/test/pleroma/web/activity_pub/activity_pub_test.exs +++ b/test/pleroma/web/activity_pub/activity_pub_test.exs @@ -217,28 +217,49 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubTest do {:ok, status_two} = CommonAPI.post(user, %{status: ". #essais"}) {:ok, status_three} = CommonAPI.post(user, %{status: ". #test #reject"}) - fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"}) + {:ok, status_four} = CommonAPI.post(user, %{status: ". #any1 #any2"}) + {:ok, status_five} = CommonAPI.post(user, %{status: ". #any2 #any1"}) - fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["test", "essais"]}) + for hashtag_timeline_strategy <- [true, false] do + clear_config([:instance, :improved_hashtag_timeline], hashtag_timeline_strategy) - fetch_three = - ActivityPub.fetch_activities([], %{ - type: "Create", - tag: ["test", "essais"], - tag_reject: ["reject"] - }) + fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"}) - fetch_four = - ActivityPub.fetch_activities([], %{ - type: "Create", - tag: ["test"], - tag_all: ["test", "reject"] - }) + fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["test", "essais"]}) - assert fetch_one == [status_one, status_three] - assert fetch_two == [status_one, status_two, status_three] - assert fetch_three == [status_one, status_two] - assert fetch_four == [status_three] + fetch_three = + ActivityPub.fetch_activities([], %{ + type: "Create", + tag: ["test", "essais"], + tag_reject: ["reject"] + }) + + fetch_four = + ActivityPub.fetch_activities([], %{ + type: "Create", + tag: ["test"], + tag_all: ["test", "reject"] + }) + + # Testing that deduplication (if needed) is done on DB (not Ecto) level; :limit is important + fetch_five = + ActivityPub.fetch_activities([], %{ + type: "Create", + tag: ["any1", "any2"], + limit: 2 + }) + + [fetch_one, fetch_two, fetch_three, fetch_four, fetch_five] = + Enum.map([fetch_one, fetch_two, fetch_three, fetch_four, fetch_five], fn statuses -> + Enum.map(statuses, fn s -> Repo.preload(s, object: :hashtags) end) + end) + + assert fetch_one == [status_one, status_three] + assert fetch_two == [status_one, status_two, status_three] + assert fetch_three == [status_one, status_two] + assert fetch_four == [status_three] + assert fetch_five == [status_four, status_five] + end end describe "insertion" do diff --git a/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs b/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs index 31586abc9..deb956410 100644 --- a/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs +++ b/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs @@ -39,7 +39,8 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier.NoteHandlingTest do {:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data) object = Object.normalize(data["object"], fetch: false) - assert "test" in object.data["tag"] + assert "test" in Object.tags(object) + assert Object.hashtags(object) == ["test"] end test "it cleans up incoming notices which are not really DMs" do @@ -220,7 +221,8 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier.NoteHandlingTest do {:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data) object = Object.normalize(data["object"], fetch: false) - assert Enum.at(object.data["tag"], 2) == "moo" + assert Enum.at(Object.tags(object), 2) == "moo" + assert Object.hashtags(object) == ["moo"] end test "it works for incoming notices with contentMap" do diff --git a/test/pleroma/web/common_api_test.exs b/test/pleroma/web/common_api_test.exs index adfe58def..9d005697c 100644 --- a/test/pleroma/web/common_api_test.exs +++ b/test/pleroma/web/common_api_test.exs @@ -493,7 +493,7 @@ defmodule Pleroma.Web.CommonAPITest do object = Object.normalize(activity, fetch: false) - assert object.data["tag"] == ["2hu"] + assert Object.tags(object) == ["2hu"] end test "it adds emoji in the object" do diff --git a/test/pleroma/web/mastodon_api/views/status_view_test.exs b/test/pleroma/web/mastodon_api/views/status_view_test.exs index ed59cf285..48f5f0dd0 100644 --- a/test/pleroma/web/mastodon_api/views/status_view_test.exs +++ b/test/pleroma/web/mastodon_api/views/status_view_test.exs @@ -262,8 +262,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusViewTest do mentions: [], tags: [ %{ - name: "#{object_data["tag"]}", - url: "http://localhost:4001/tag/#{object_data["tag"]}" + name: "#{hd(object_data["tag"])}", + url: "http://localhost:4001/tag/#{hd(object_data["tag"])}" } ], application: %{