From ed8f55ab8eb292903cec8f7699aa6775cc304458 Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Fri, 18 Jan 2019 10:35:45 +0300 Subject: [PATCH] [#477] User: FTS and trigram search results mixing (to handle misspelled requests). --- lib/pleroma/user.ex | 136 ++++++++++-------- test/user_test.exs | 6 + .../twitter_api_controller_test.exs | 2 +- 3 files changed, 86 insertions(+), 58 deletions(-) diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 8ae36416a..1d0bf1edf 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -679,48 +679,24 @@ defmodule Pleroma.User do end def search(query, resolve \\ false, for_user \\ nil) do - # strip the beginning @ off if there is a query + # Strip the beginning @ off if there is a query query = String.trim_leading(query, "@") - if resolve do - User.get_or_fetch_by_nickname(query) - end + if resolve, do: User.get_or_fetch_by_nickname(query) - processed_query = - query - |> String.replace(~r/\W+/, " ") - |> String.trim() - |> String.split() - |> Enum.map(&(&1 <> ":*")) - |> Enum.join(" | ") + fts_results = do_search(fts_search_subquery(query), for_user) - inner = - from( - u in User, - select_merge: %{ - search_rank: - fragment( - """ - ts_rank_cd( - setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || - setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), - to_tsquery('simple', ?), - 32 - ) - """, - u.nickname, - u.name, - ^processed_query - ) - }, - where: not is_nil(u.nickname) - ) + trigram_results = do_search(trigram_search_subquery(query), for_user) + + Enum.uniq_by(fts_results ++ trigram_results, & &1.id) + end + defp do_search(subquery, for_user, options \\ []) do q = from( - s in subquery(inner), + s in subquery(subquery), order_by: [desc: s.search_rank], - limit: 20 + limit: ^(options[:limit] || 20) ) results = @@ -728,35 +704,81 @@ defmodule Pleroma.User do |> Repo.all() |> Enum.filter(&(&1.search_rank > 0)) - weighted_results = - if for_user do - friends_ids = get_friends_ids(for_user) - followers_ids = get_followers_ids(for_user) + boost_search_results(results, for_user) + end - Enum.map( - results, - fn u -> - search_rank_coef = - cond do - u.id in friends_ids -> - 1.2 + defp fts_search_subquery(query) do + processed_query = + query + |> String.replace(~r/\W+/, " ") + |> String.trim() + |> String.split() + |> Enum.map(&(&1 <> ":*")) + |> Enum.join(" | ") + + from( + u in User, + select_merge: %{ + search_rank: + fragment( + """ + ts_rank_cd( + setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') || + setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'), + to_tsquery('simple', ?), + 32 + ) + """, + u.nickname, + u.name, + ^processed_query + ) + }, + where: not is_nil(u.nickname) + ) + end + + defp trigram_search_subquery(query) do + from( + u in User, + select_merge: %{ + search_rank: + fragment( + "similarity(?, ? || ' ' || coalesce(?, ''))", + ^query, + u.nickname, + u.name + ) + }, + where: not is_nil(u.nickname) + ) + end - u.id in followers_ids -> - 1.1 + defp boost_search_results(results, nil), do: results - true -> - 1 - end + defp boost_search_results(results, for_user) do + friends_ids = get_friends_ids(for_user) + followers_ids = get_followers_ids(for_user) - Map.put(u, :search_rank, u.search_rank * search_rank_coef) + Enum.map( + results, + fn u -> + search_rank_coef = + cond do + u.id in friends_ids -> + 1.2 + + u.id in followers_ids -> + 1.1 + + true -> + 1 end - ) - |> Enum.sort_by(&(-&1.search_rank)) - else - results - end - weighted_results + Map.put(u, :search_rank, u.search_rank * search_rank_coef) + end + ) + |> Enum.sort_by(&(-&1.search_rank)) end def blocks_import(%User{} = blocker, blocked_identifiers) when is_list(blocked_identifiers) do diff --git a/test/user_test.exs b/test/user_test.exs index 48b7b72ec..339def217 100644 --- a/test/user_test.exs +++ b/test/user_test.exs @@ -814,6 +814,12 @@ defmodule Pleroma.UserTest do assert [u4.id, u3.id, u1.id] == Enum.map(User.search("lain@ple"), & &1.id) end + test "finds users, handling misspelled requests" do + u1 = insert(:user, %{name: "lain"}) + + assert [u1.id] == Enum.map(User.search("laiin"), & &1.id) + end + test "finds users, boosting ranks of friends and followers" do u1 = insert(:user) u2 = insert(:user, %{name: "Doe"}) diff --git a/test/web/twitter_api/twitter_api_controller_test.exs b/test/web/twitter_api/twitter_api_controller_test.exs index a4baf2b5f..e013d1aca 100644 --- a/test/web/twitter_api/twitter_api_controller_test.exs +++ b/test/web/twitter_api/twitter_api_controller_test.exs @@ -1656,7 +1656,7 @@ defmodule Pleroma.Web.TwitterAPI.ControllerTest do test "it returns users, ordered by similarity", %{conn: conn} do user = insert(:user, %{name: "eal"}) user_two = insert(:user, %{name: "eal me"}) - _user_three = insert(:user, %{name: "ebn"}) + _user_three = insert(:user, %{name: "zzz"}) resp = conn -- 2.45.2