# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.User.Search do
+ alias Pleroma.Pagination
alias Pleroma.Repo
alias Pleroma.User
import Ecto.Query
- def search(query, opts \\ []) do
+ @similarity_threshold 0.25
+ @limit 20
+
+ def search(query_string, opts \\ []) do
resolve = Keyword.get(opts, :resolve, false)
+ following = Keyword.get(opts, :following, false)
+ result_limit = Keyword.get(opts, :limit, @limit)
+ offset = Keyword.get(opts, :offset, 0)
+
for_user = Keyword.get(opts, :for_user)
- # Strip the beginning @ off if there is a query
- query = String.trim_leading(query, "@")
+ query_string = format_query(query_string)
- maybe_resolve(resolve, for_user, query)
+ maybe_resolve(resolve, for_user, query_string)
{:ok, results} =
Repo.transaction(fn ->
- Ecto.Adapters.SQL.query(Repo, "select set_limit(0.25)", [])
+ Ecto.Adapters.SQL.query(
+ Repo,
+ "select set_limit(#{@similarity_threshold})",
+ []
+ )
- query
- |> search_query(for_user)
- |> Repo.all()
+ query_string
+ |> search_query(for_user, following)
+ |> Pagination.fetch_paginated(%{"offset" => offset, "limit" => result_limit}, :offset)
end)
results
end
- defp search_query(query, for_user) do
- query
- |> union_query()
+ defp format_query(query_string) do
+ # Strip the beginning @ off if there is a query
+ query_string = String.trim_leading(query_string, "@")
+
+ with [name, domain] <- String.split(query_string, "@"),
+ formatted_domain <- String.replace(domain, ~r/[!-\-|@|[-`|{-~|\/|:|\s]+/, "") do
+ name <> "@" <> to_string(:idna.encode(formatted_domain))
+ else
+ _ -> query_string
+ end
+ end
+
+ defp search_query(query_string, for_user, following) do
+ for_user
+ |> base_query(following)
+ |> filter_blocked_user(for_user)
+ |> filter_blocked_domains(for_user)
+ |> search_subqueries(query_string)
+ |> union_subqueries
|> distinct_query()
|> boost_search_rank_query(for_user)
|> subquery()
|> order_by(desc: :search_rank)
- |> limit(20)
|> maybe_restrict_local(for_user)
end
- defp union_query(query) do
- fts_subquery = fts_search_subquery(query)
- trigram_subquery = trigram_search_subquery(query)
+ defp base_query(_user, false), do: User
+ defp base_query(user, true), do: User.get_followers_query(user)
+
+ defp filter_blocked_user(query, %User{info: %{blocks: blocks}})
+ when length(blocks) > 0 do
+ from(q in query, where: not (q.ap_id in ^blocks))
+ end
+
+ defp filter_blocked_user(query, _), do: query
+ defp filter_blocked_domains(query, %User{info: %{domain_blocks: domain_blocks}})
+ when length(domain_blocks) > 0 do
+ domains = Enum.join(domain_blocks, ",")
+
+ from(
+ q in query,
+ where: fragment("substring(ap_id from '.*://([^/]*)') NOT IN (?)", ^domains)
+ )
+ end
+
+ defp filter_blocked_domains(query, _), do: query
+
+ defp union_subqueries({fts_subquery, trigram_subquery}) do
from(s in trigram_subquery, union_all: ^fts_subquery)
end
+ defp search_subqueries(base_query, query_string) do
+ {
+ fts_search_subquery(base_query, query_string),
+ trigram_search_subquery(base_query, query_string)
+ }
+ end
+
defp distinct_query(q) do
from(s in subquery(q), order_by: s.search_type, distinct: s.id)
end
)
end
- defp fts_search_subquery(term, query \\ User) do
+ @spec fts_search_subquery(User.t() | Ecto.Query.t(), String.t()) :: Ecto.Query.t()
+ defp fts_search_subquery(query, term) do
processed_query =
- term
- |> String.replace(~r/\W+/, " ")
+ String.trim_trailing(term, "@" <> local_domain())
+ |> String.replace(~r/[!-\/|@|[-`|{-~|:-?]+/, " ")
|> String.trim()
|> String.split()
|> Enum.map(&(&1 <> ":*"))
|> User.restrict_deactivated()
end
- defp trigram_search_subquery(term) do
+ @spec trigram_search_subquery(User.t() | Ecto.Query.t(), String.t()) :: Ecto.Query.t()
+ defp trigram_search_subquery(query, term) do
+ term = String.trim_trailing(term, "@" <> local_domain())
+
from(
- u in User,
+ u in query,
select_merge: %{
# ^1 gives 'Postgrex expected a binary, got 1' for some weird reason
search_type: fragment("?", 1),
)
|> User.restrict_deactivated()
end
+
+ defp local_domain, do: Pleroma.Config.get([Pleroma.Web.Endpoint, :url, :host])
end