Merge branch 'i1t/pleroma-477_user_search_improvements' into 'develop'
authorlambda <pleromagit@rogerbraun.net>
Sun, 20 Jan 2019 10:24:05 +0000 (10:24 +0000)
committerlambda <pleromagit@rogerbraun.net>
Sun, 20 Jan 2019 10:24:05 +0000 (10:24 +0000)
I1t/pleroma 477 user search improvements

See merge request pleroma/pleroma!685

1  2 
lib/pleroma/user.ex
lib/pleroma/web/mastodon_api/mastodon_api_controller.ex
test/user_test.exs
test/web/twitter_api/twitter_api_controller_test.exs

diff --combined lib/pleroma/user.ex
index 1db1c53cb5604d8b3422894c0dddef1cb2999bc9,955808e285ebee00bd81d7e3afb2334a353587b3..06084b117404003b7bccf75006f3584c38be1e93
@@@ -35,7 -35,7 +35,7 @@@ defmodule Pleroma.User d
      field(:avatar, :map)
      field(:local, :boolean, default: true)
      field(:follower_address, :string)
-     field(:search_distance, :float, virtual: true)
+     field(:search_rank, :float, virtual: true)
      field(:tags, {:array, :string}, default: [])
      field(:last_refreshed_at, :naive_datetime)
      has_many(:notifications, Notification)
    def get_by_nickname(nickname) do
      Repo.get_by(User, nickname: nickname) ||
        if Regex.match?(~r(@#{Pleroma.Web.Endpoint.host()})i, nickname) do
 -        [local_nickname, _] = String.split(nickname, "@")
 -        Repo.get_by(User, nickname: local_nickname)
 +        Repo.get_by(User, nickname: local_nickname(nickname))
        end
    end
  
      {:ok, Repo.all(q)}
    end
  
+   def get_followers_ids(user, page \\ nil) do
+     q = get_followers_query(user, page)
+     Repo.all(from(u in q, select: u.id))
+   end
    def get_friends_query(%User{id: id, following: following}, nil) do
      from(
        u in User,
      {:ok, Repo.all(q)}
    end
  
+   def get_friends_ids(user, page \\ nil) do
+     q = get_friends_query(user, page)
+     Repo.all(from(u in q, select: u.id))
+   end
    def get_follow_requests_query(%User{} = user) do
      from(
        a in Activity,
      Repo.all(query)
    end
  
-   def search(query, resolve \\ false) do
-     # strip the beginning @ off if there is a query
+   def search(query, resolve \\ false, for_user \\ nil) do
+     # Strip the beginning @ off if there is a query
      query = String.trim_leading(query, "@")
  
-     if resolve do
-       User.get_or_fetch_by_nickname(query)
-     end
+     if resolve, do: User.get_or_fetch_by_nickname(query)
  
-     inner =
-       from(
-         u in User,
-         select_merge: %{
-           search_distance:
-             fragment(
-               "? <-> (? || coalesce(?, ''))",
-               ^query,
-               u.nickname,
-               u.name
-             )
-         },
-         where: not is_nil(u.nickname)
-       )
+     fts_results = do_search(fts_search_subquery(query), for_user)
+     {:ok, trigram_results} =
+       Repo.transaction(fn ->
+         Ecto.Adapters.SQL.query(Repo, "select set_limit(0.25)", [])
+         do_search(trigram_search_subquery(query), for_user)
+       end)
  
+     Enum.uniq_by(fts_results ++ trigram_results, & &1.id)
+   end
+   defp do_search(subquery, for_user, options \\ []) do
      q =
        from(
-         s in subquery(inner),
-         order_by: s.search_distance,
-         limit: 20
+         s in subquery(subquery),
+         order_by: [desc: s.search_rank],
+         limit: ^(options[:limit] || 20)
        )
  
-     Repo.all(q)
+     results =
+       q
+       |> Repo.all()
+       |> Enum.filter(&(&1.search_rank > 0))
+     boost_search_results(results, for_user)
+   end
+   defp fts_search_subquery(query) do
+     processed_query =
+       query
+       |> String.replace(~r/\W+/, " ")
+       |> String.trim()
+       |> String.split()
+       |> Enum.map(&(&1 <> ":*"))
+       |> Enum.join(" | ")
+     from(
+       u in User,
+       select_merge: %{
+         search_rank:
+           fragment(
+             """
+             ts_rank_cd(
+               setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') ||
+               setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B'),
+               to_tsquery('simple', ?),
+               32
+             )
+             """,
+             u.nickname,
+             u.name,
+             ^processed_query
+           )
+       },
+       where:
+         fragment(
+           """
+             (setweight(to_tsvector('simple', regexp_replace(?, '\\W', ' ', 'g')), 'A') ||
+             setweight(to_tsvector('simple', regexp_replace(coalesce(?, ''), '\\W', ' ', 'g')), 'B')) @@ to_tsquery('simple', ?)
+           """,
+           u.nickname,
+           u.name,
+           ^processed_query
+         )
+     )
+   end
+   defp trigram_search_subquery(query) do
+     from(
+       u in User,
+       select_merge: %{
+         search_rank:
+           fragment(
+             "similarity(?, trim(? || ' ' || coalesce(?, '')))",
+             ^query,
+             u.nickname,
+             u.name
+           )
+       },
+       where: fragment("trim(? || ' ' || coalesce(?, '')) % ?", u.nickname, u.name, ^query)
+     )
+   end
+   defp boost_search_results(results, nil), do: results
+   defp boost_search_results(results, for_user) do
+     friends_ids = get_friends_ids(for_user)
+     followers_ids = get_followers_ids(for_user)
+     Enum.map(
+       results,
+       fn u ->
+         search_rank_coef =
+           cond do
+             u.id in friends_ids ->
+               1.2
+             u.id in followers_ids ->
+               1.1
+             true ->
+               1
+           end
+         Map.put(u, :search_rank, u.search_rank * search_rank_coef)
+       end
+     )
+     |> Enum.sort_by(&(-&1.search_rank))
    end
  
    def blocks_import(%User{} = blocker, blocked_identifiers) when is_list(blocked_identifiers) do
      update_and_set_cache(cng)
    end
  
 -  def local_user_query() do
 +  def local_user_query do
      from(
        u in User,
        where: u.local == true,
      )
    end
  
 -  def moderator_user_query() do
 +  def active_local_user_query do
 +    from(
 +      u in local_user_query(),
 +      where: fragment("?->'deactivated' @> 'false'", u.info)
 +    )
 +  end
 +
 +  def moderator_user_query do
      from(
        u in User,
        where: u.local == true,
        end)
  
      bio
 -    |> CommonUtils.format_input(mentions, tags, "text/plain")
 +    |> CommonUtils.format_input(mentions, tags, "text/plain", user_links: [format: :full])
      |> Formatter.emojify(emoji)
    end
  
        @strict_local_nickname_regex
      end
    end
 +
 +  def local_nickname(nickname_or_mention) do
 +    nickname_or_mention
 +    |> full_nickname()
 +    |> String.split("@")
 +    |> hd()
 +  end
 +
 +  def full_nickname(nickname_or_mention),
 +    do: String.trim_leading(nickname_or_mention, "@")
 +
 +  def error_user(ap_id) do
 +    %User{
 +      name: ap_id,
 +      ap_id: ap_id,
 +      info: %User.Info{},
 +      nickname: "erroruser@example.com",
 +      inserted_at: NaiveDateTime.utc_now()
 +    }
 +  end
  end
index daad891856b168bac828a2db38bd18ddbc46cf06,54367f5862d1676c8f597c9511e085ea6d2491af..882d336be9622754a41b6cf93aa7a7c567fe4659
@@@ -341,6 -341,7 +341,6 @@@ defmodule Pleroma.Web.MastodonAPI.Masto
      params =
        params
        |> Map.put("in_reply_to_status_id", params["in_reply_to_id"])
 -      |> Map.put("no_attachment_links", true)
  
      idempotency_key =
        case get_req_header(conn, "idempotency-key") do
    end
  
    def search2(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do
-     accounts = User.search(query, params["resolve"] == "true")
+     accounts = User.search(query, params["resolve"] == "true", user)
  
      statuses = status_search(user, query)
  
    end
  
    def search(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do
-     accounts = User.search(query, params["resolve"] == "true")
+     accounts = User.search(query, params["resolve"] == "true", user)
  
      statuses = status_search(user, query)
  
    end
  
    def account_search(%{assigns: %{user: user}} = conn, %{"q" => query} = params) do
-     accounts = User.search(query, params["resolve"] == "true")
+     accounts = User.search(query, params["resolve"] == "true", user)
  
      res = AccountView.render("accounts.json", users: accounts, for: user, as: :user)
  
diff --combined test/user_test.exs
index 21a62483f7dbcf97017f3f9b9a7abae63f641e7c,339def21757b3bd36985a33ee72c7a2b03228e8b..092cfc5dc74080ff908e87d42a29c67ce5b6308e
@@@ -775,14 -775,61 +775,61 @@@ defmodule Pleroma.UserTest d
    end
  
    describe "User.search" do
-     test "finds a user, ranking by similarity" do
-       _user = insert(:user, %{name: "lain"})
-       _user_two = insert(:user, %{name: "ean"})
-       _user_three = insert(:user, %{name: "ebn", nickname: "lain@mastodon.social"})
-       user_four = insert(:user, %{nickname: "lain@pleroma.soykaf.com"})
+     test "finds a user by full or partial nickname" do
+       user = insert(:user, %{nickname: "john"})
  
-       assert user_four ==
-                User.search("lain@ple") |> List.first() |> Map.put(:search_distance, nil)
+       Enum.each(["john", "jo", "j"], fn query ->
+         assert user == User.search(query) |> List.first() |> Map.put(:search_rank, nil)
+       end)
+     end
+     test "finds a user by full or partial name" do
+       user = insert(:user, %{name: "John Doe"})
+       Enum.each(["John Doe", "JOHN", "doe", "j d", "j", "d"], fn query ->
+         assert user == User.search(query) |> List.first() |> Map.put(:search_rank, nil)
+       end)
+     end
+     test "finds users, preferring nickname matches over name matches" do
+       u1 = insert(:user, %{name: "lain", nickname: "nick1"})
+       u2 = insert(:user, %{nickname: "lain", name: "nick1"})
+       assert [u2.id, u1.id] == Enum.map(User.search("lain"), & &1.id)
+     end
+     test "finds users, considering density of matched tokens" do
+       u1 = insert(:user, %{name: "Bar Bar plus Word Word"})
+       u2 = insert(:user, %{name: "Word Word Bar Bar Bar"})
+       assert [u2.id, u1.id] == Enum.map(User.search("bar word"), & &1.id)
+     end
+     test "finds users, ranking by similarity" do
+       u1 = insert(:user, %{name: "lain"})
+       _u2 = insert(:user, %{name: "ean"})
+       u3 = insert(:user, %{name: "ebn", nickname: "lain@mastodon.social"})
+       u4 = insert(:user, %{nickname: "lain@pleroma.soykaf.com"})
+       assert [u4.id, u3.id, u1.id] == Enum.map(User.search("lain@ple"), & &1.id)
+     end
+     test "finds users, handling misspelled requests" do
+       u1 = insert(:user, %{name: "lain"})
+       assert [u1.id] == Enum.map(User.search("laiin"), & &1.id)
+     end
+     test "finds users, boosting ranks of friends and followers" do
+       u1 = insert(:user)
+       u2 = insert(:user, %{name: "Doe"})
+       follower = insert(:user, %{name: "Doe"})
+       friend = insert(:user, %{name: "Doe"})
+       {:ok, follower} = User.follow(follower, u1)
+       {:ok, u1} = User.follow(u1, friend)
+       assert [friend.id, follower.id, u2.id] == Enum.map(User.search("doe", false, u1), & &1.id)
      end
  
      test "finds a user whose name is nil" do
        assert user_two ==
                 User.search("lain@pleroma.soykaf.com")
                 |> List.first()
-                |> Map.put(:search_distance, nil)
+                |> Map.put(:search_rank, nil)
+     end
+     test "does not yield false-positive matches" do
+       insert(:user, %{name: "John Doe"})
+       Enum.each(["mary", "a", ""], fn query ->
+         assert [] == User.search(query)
+       end)
      end
    end
  
        Pleroma.Config.put([:instance, :account_activation_required], false)
      end
    end
 +
 +  describe "parse_bio/2" do
 +    test "preserves hosts in user links text" do
 +      remote_user = insert(:user, local: false, nickname: "nick@domain.com")
 +      user = insert(:user)
 +      bio = "A.k.a. @nick@domain.com"
 +
 +      expected_text =
 +        "A.k.a. <span class='h-card'><a data-user='#{remote_user.id}' class='u-url mention' href='#{
 +          remote_user.ap_id
 +        }'>" <> "@<span>nick@domain.com</span></a></span>"
 +
 +      assert expected_text == User.parse_bio(bio, user)
 +    end
 +  end
  end
index e08edc5254d61fbb1dc3e359c1e98505b2380e69,e013d1aca0add4ce95650d34f821765cded5b025..f22cdd870761c1c8228561562a2d77ac8f6009df
@@@ -1357,9 -1357,9 +1357,9 @@@ defmodule Pleroma.Web.TwitterAPI.Contro
        assert user.name == "new name"
  
        assert user.bio ==
 -               "hi <span><a data-user='#{user2.id}' class='mention' href='#{user2.ap_id}'>@<span>#{
 -                 user2.nickname
 -               }</span></a></span>"
 +               "hi <span class='h-card'><a data-user='#{user2.id}' class='u-url mention' href='#{
 +                 user2.ap_id
 +               }'>@<span>#{user2.nickname}</span></a></span>"
  
        assert json_response(conn, 200) == UserView.render("user.json", %{user: user, for: user})
      end
    describe "GET /api/pleroma/search_user" do
      test "it returns users, ordered by similarity", %{conn: conn} do
        user = insert(:user, %{name: "eal"})
-       user_two = insert(:user, %{name: "ean"})
-       user_three = insert(:user, %{name: "ebn"})
+       user_two = insert(:user, %{name: "eal me"})
+       _user_three = insert(:user, %{name: "zzz"})
  
        resp =
          conn
-         |> get(twitter_api_search__path(conn, :search_user), query: "eal")
+         |> get(twitter_api_search__path(conn, :search_user), query: "eal me")
          |> json_response(200)
  
-       assert length(resp) == 3
-       assert [user.id, user_two.id, user_three.id] == Enum.map(resp, fn %{"id" => id} -> id end)
+       assert length(resp) == 2
+       assert [user_two.id, user.id] == Enum.map(resp, fn %{"id" => id} -> id end)
      end
    end