3704e0bdcad19f6bc6bcd084eaf13c8517397245
[akkoma] / lib / mix / tasks / pleroma / search / meilisearch.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
6 require Logger
7 require Pleroma.Constants
8
9 import Mix.Pleroma
10 import Ecto.Query
11
12 def run(["index"]) do
13 start_pleroma()
14
15 endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
16
17 {:ok, _} =
18 Pleroma.HTTP.post(
19 "#{endpoint}/indexes/objects/settings/ranking-rules",
20 Jason.encode!([
21 "desc(published)",
22 "typo",
23 "words",
24 "proximity",
25 "attribute",
26 "wordsPosition",
27 "exactness"
28 ])
29 )
30
31 {:ok, _} =
32 Pleroma.HTTP.post(
33 "#{endpoint}/indexes/objects/settings/searchable-attributes",
34 Jason.encode!([
35 "content"
36 ])
37 )
38
39 chunk_size = 10_000
40
41 Pleroma.Repo.transaction(
42 fn ->
43 Pleroma.Repo.stream(
44 from(Pleroma.Object,
45 # Only index public posts which are notes and have some text
46 where:
47 fragment("data->>'type' = 'Note'") and
48 fragment("LENGTH(data->>'content') > 0") and
49 fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()),
50 order_by: [desc: fragment("data->'published'")]
51 ),
52 timeout: :infinity
53 )
54 |> Stream.chunk_every(chunk_size)
55 |> Stream.map(fn objects ->
56 Enum.map(objects, fn object ->
57 data = object.data
58
59 content_str =
60 case data["content"] do
61 [nil | rest] -> to_string(rest)
62 str -> str
63 end
64
65 {:ok, published, _} = DateTime.from_iso8601(data["published"])
66
67 content =
68 with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str),
69 trimmed <- String.trim(scrubbed) do
70 trimmed
71 end
72
73 # Only index if there is anything in the string. If there is a single symbol,
74 # it's probably a dot from mastodon posts with just the picture
75 if String.length(content) > 1 do
76 %{
77 id: object.id,
78 content: content,
79 ap: data["id"],
80 published: published |> DateTime.to_unix()
81 }
82 else
83 nil
84 end
85 end)
86 |> Enum.filter(fn o -> not is_nil(o) end)
87 end)
88 |> Stream.transform(0, fn objects, acc ->
89 new_acc = acc + Enum.count(objects)
90
91 IO.puts("Indexed #{new_acc} entries")
92
93 {[objects], new_acc}
94 end)
95 |> Stream.each(fn objects ->
96 {:ok, result} =
97 Pleroma.HTTP.post(
98 "#{endpoint}/indexes/objects/documents",
99 Jason.encode!(objects)
100 )
101
102 if not Map.has_key?(Jason.decode!(result.body), "updateId") do
103 IO.puts("Failed to index: #{result}")
104 end
105 end)
106 |> Stream.run()
107 end,
108 timeout: :infinity
109 )
110 end
111
112 def run(["clear"]) do
113 start_pleroma()
114
115 endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
116
117 {:ok, _} =
118 Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [],
119 timeout: :infinity
120 )
121 end
122 end