Mark only content as searchable for meilisearch
[akkoma] / lib / mix / tasks / pleroma / search / meilisearch.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
6 require Logger
7 require Pleroma.Constants
8
9 import Mix.Pleroma
10 import Ecto.Query
11
12 def run(["index"]) do
13 start_pleroma()
14
15 endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
16
17 {:ok, _} =
18 Pleroma.HTTP.post(
19 "#{endpoint}/indexes/objects/settings/ranking-rules",
20 Jason.encode!([
21 "desc(published)",
22 "typo",
23 "words",
24 "proximity",
25 "attribute",
26 "wordsPosition",
27 "exactness"
28 ])
29 )
30
31 {:ok, _} =
32 Pleroma.HTTP.post(
33 "#{endpoint}/indexes/objects/settings/searchable-attributes",
34 Jason.encode!([
35 "content"
36 ])
37 )
38
39 chunk_size = 10_000
40
41 Pleroma.Repo.transaction(
42 fn ->
43 Pleroma.Repo.stream(
44 from(Pleroma.Object,
45 # Only index public posts which are notes and have some text
46 where:
47 fragment("data->>'type' = 'Note'") and
48 fragment("LENGTH(data->>'content') > 0") and
49 fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()),
50 order_by: [desc: fragment("data->'published'")]
51 ),
52 timeout: :infinity
53 )
54 |> Stream.chunk_every(chunk_size)
55 |> Stream.transform(0, fn objects, acc ->
56 new_acc = acc + Enum.count(objects)
57
58 IO.puts("Indexed #{new_acc} entries")
59
60 {[objects], new_acc}
61 end)
62 |> Stream.map(fn objects ->
63 Enum.map(objects, fn object ->
64 data = object.data
65
66 content_str =
67 case data["content"] do
68 [nil | rest] -> to_string(rest)
69 str -> str
70 end
71
72 {:ok, published, _} = DateTime.from_iso8601(data["published"])
73 {:ok, content} = FastSanitize.strip_tags(content_str)
74
75 %{
76 id: object.id,
77 content: content,
78 ap: data["id"],
79 published: published |> DateTime.to_unix()
80 }
81 end)
82 end)
83 |> Stream.each(fn objects ->
84 {:ok, result} =
85 Pleroma.HTTP.post(
86 "#{endpoint}/indexes/objects/documents",
87 Jason.encode!(objects)
88 )
89
90 if not Map.has_key?(Jason.decode!(result.body), "updateId") do
91 IO.puts("Failed to index: #{result}")
92 end
93 end)
94 |> Stream.run()
95 end,
96 timeout: :infinity
97 )
98 end
99
100 def run(["clear"]) do
101 start_pleroma()
102
103 endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
104
105 {:ok, _} = Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects", "", [], [])
106 end
107 end