[#477] User: FTS and trigram search results mixing (to handle misspelled requests).
authorIvan Tashkinov <ivantashkinov@gmail.com>
Fri, 18 Jan 2019 07:35:45 +0000 (10:35 +0300)
committerIvan Tashkinov <ivantashkinov@gmail.com>
Fri, 18 Jan 2019 07:35:45 +0000 (10:35 +0300)
lib/pleroma/user.ex
test/user_test.exs
test/web/twitter_api/twitter_api_controller_test.exs

index 8ae36416a761fb388e0beebef76cbaf2dcdc359f..1d0bf1edfd0f7ebaeb3ce787c1c47d0f6f620e91 100644 (file)
@@ -679,48 +679,24 @@ defmodule Pleroma.User do
   end
 
   def search(query, resolve \\ false, for_user \\ nil) do
-    # strip the beginning @ off if there is a query
+    # Strip the beginning @ off if there is a query
     query = String.trim_leading(query, "@")
 
-    if resolve do
-      User.get_or_fetch_by_nickname(query)
-    end
+    if resolve, do: User.get_or_fetch_by_nickname(query)
 
-    processed_query =
-      query
-      |> String.replace(~r/\W+/, " ")
-      |> String.trim()
-      |> String.split()
-      |> Enum.map(&(&1 <> ":*"))
-      |> Enum.join(" | ")
+    fts_results = do_search(fts_search_subquery(query), for_user)
 
-    inner =
-      from(
-        u in User,
-        select_merge: %{
-          search_rank:
-            fragment(
-              """
-              ts_rank_cd(
-                setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') ||
-                setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'),
-                to_tsquery('simple', ?),
-                32
-              )
-              """,
-              u.nickname,
-              u.name,
-              ^processed_query
-            )
-        },
-        where: not is_nil(u.nickname)
-      )
+    trigram_results = do_search(trigram_search_subquery(query), for_user)
+
+    Enum.uniq_by(fts_results ++ trigram_results, & &1.id)
+  end
 
+  defp do_search(subquery, for_user, options \\ []) do
     q =
       from(
-        s in subquery(inner),
+        s in subquery(subquery),
         order_by: [desc: s.search_rank],
-        limit: 20
+        limit: ^(options[:limit] || 20)
       )
 
     results =
@@ -728,35 +704,81 @@ defmodule Pleroma.User do
       |> Repo.all()
       |> Enum.filter(&(&1.search_rank > 0))
 
-    weighted_results =
-      if for_user do
-        friends_ids = get_friends_ids(for_user)
-        followers_ids = get_followers_ids(for_user)
+    boost_search_results(results, for_user)
+  end
 
-        Enum.map(
-          results,
-          fn u ->
-            search_rank_coef =
-              cond do
-                u.id in friends_ids ->
-                  1.2
+  defp fts_search_subquery(query) do
+    processed_query =
+      query
+      |> String.replace(~r/\W+/, " ")
+      |> String.trim()
+      |> String.split()
+      |> Enum.map(&(&1 <> ":*"))
+      |> Enum.join(" | ")
+
+    from(
+      u in User,
+      select_merge: %{
+        search_rank:
+          fragment(
+            """
+            ts_rank_cd(
+              setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') ||
+              setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'),
+              to_tsquery('simple', ?),
+              32
+            )
+            """,
+            u.nickname,
+            u.name,
+            ^processed_query
+          )
+      },
+      where: not is_nil(u.nickname)
+    )
+  end
+
+  defp trigram_search_subquery(query) do
+    from(
+      u in User,
+      select_merge: %{
+        search_rank:
+          fragment(
+            "similarity(?, ? || ' ' || coalesce(?, ''))",
+            ^query,
+            u.nickname,
+            u.name
+          )
+      },
+      where: not is_nil(u.nickname)
+    )
+  end
 
-                u.id in followers_ids ->
-                  1.1
+  defp boost_search_results(results, nil), do: results
 
-                true ->
-                  1
-              end
+  defp boost_search_results(results, for_user) do
+    friends_ids = get_friends_ids(for_user)
+    followers_ids = get_followers_ids(for_user)
 
-            Map.put(u, :search_rank, u.search_rank * search_rank_coef)
+    Enum.map(
+      results,
+      fn u ->
+        search_rank_coef =
+          cond do
+            u.id in friends_ids ->
+              1.2
+
+            u.id in followers_ids ->
+              1.1
+
+            true ->
+              1
           end
-        )
-        |> Enum.sort_by(&(-&1.search_rank))
-      else
-        results
-      end
 
-    weighted_results
+        Map.put(u, :search_rank, u.search_rank * search_rank_coef)
+      end
+    )
+    |> Enum.sort_by(&(-&1.search_rank))
   end
 
   def blocks_import(%User{} = blocker, blocked_identifiers) when is_list(blocked_identifiers) do
index 48b7b72ec3aaf0e6c077a9bd990a364b01c5cd7f..339def21757b3bd36985a33ee72c7a2b03228e8b 100644 (file)
@@ -814,6 +814,12 @@ defmodule Pleroma.UserTest do
       assert [u4.id, u3.id, u1.id] == Enum.map(User.search("lain@ple"), & &1.id)
     end
 
+    test "finds users, handling misspelled requests" do
+      u1 = insert(:user, %{name: "lain"})
+
+      assert [u1.id] == Enum.map(User.search("laiin"), & &1.id)
+    end
+
     test "finds users, boosting ranks of friends and followers" do
       u1 = insert(:user)
       u2 = insert(:user, %{name: "Doe"})
index a4baf2b5ffd5122c692de17fe5d1921f87e2749c..e013d1aca0add4ce95650d34f821765cded5b025 100644 (file)
@@ -1656,7 +1656,7 @@ defmodule Pleroma.Web.TwitterAPI.ControllerTest do
     test "it returns users, ordered by similarity", %{conn: conn} do
       user = insert(:user, %{name: "eal"})
       user_two = insert(:user, %{name: "eal me"})
-      _user_three = insert(:user, %{name: "ebn"})
+      _user_three = insert(:user, %{name: "zzz"})
 
       resp =
         conn