Add a reindex option
[akkoma] / lib / mix / tasks / pleroma / search / meilisearch.ex
index 2485a441d8e1e152051bd12c8aaf70bc20f3d7d9..2a3c3a8b925383d1bf2ad2a3de9853c7407ec1a7 100644 (file)
@@ -9,38 +9,38 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
   import Mix.Pleroma
   import Ecto.Query
 
-  def run(["index"]) do
+  import Pleroma.Search.Meilisearch, only: [meili_post!: 2, meili_delete!: 1, meili_get!: 1]
+
+  def run(["index" | args]) do
     start_pleroma()
 
-    endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+    is_reindex = "--reindex" in args
 
-    {:ok, _} =
-      Pleroma.HTTP.post(
-        "#{endpoint}/indexes/objects/settings/ranking-rules",
-        Jason.encode!([
-          "desc(published)",
-          "typo",
-          "words",
-          "proximity",
-          "attribute",
-          "wordsPosition",
-          "exactness"
-        ])
-      )
+    meili_post!(
+      "/indexes/objects/settings/ranking-rules",
+      [
+        "desc(published)",
+        "words",
+        "exactness",
+        "proximity",
+        "wordsPosition",
+        "typo",
+        "attribute"
+      ]
+    )
 
-    {:ok, _} =
-      Pleroma.HTTP.post(
-        "#{endpoint}/indexes/objects/settings/searchable-attributes",
-        Jason.encode!([
-          "content"
-        ])
-      )
+    meili_post!(
+      "/indexes/objects/settings/searchable-attributes",
+      [
+        "content"
+      ]
+    )
 
     chunk_size = 10_000
 
     Pleroma.Repo.transaction(
       fn ->
-        Pleroma.Repo.stream(
+        query =
           from(Pleroma.Object,
             # Only index public posts which are notes and have some text
             where:
@@ -48,7 +48,13 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
                 fragment("LENGTH(data->>'content') > 0") and
                 fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()),
             order_by: [desc: fragment("data->'published'")]
-          ),
+          )
+
+        count = query |> Pleroma.Repo.aggregate(:count, :data)
+        IO.puts("Entries to index: #{count}")
+
+        Pleroma.Repo.stream(
+          query,
           timeout: :infinity
         )
         |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1)
@@ -64,14 +70,27 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
           {[objects], new_acc}
         end)
         |> Stream.each(fn objects ->
-          {:ok, result} =
-            Pleroma.HTTP.post(
-              "#{endpoint}/indexes/objects/documents",
-              Jason.encode!(objects)
+          objects =
+            objects
+            |> Enum.filter(fn o ->
+              if is_reindex do
+                result = meili_get!("/indexes/objects/documents/#{o.id}")
+
+                # Filter out the already indexed documents. This is true when the document does not exist
+                result["errorCode"] == "document_not_found"
+              else
+                true
+              end
+            end)
+
+          result =
+            meili_post!(
+              "/indexes/objects/documents",
+              objects
             )
 
-          if not Map.has_key?(Jason.decode!(result.body), "updateId") do
-            IO.puts("Failed to index: #{result}")
+          if not Map.has_key?(result, "updateId") do
+            IO.puts("Failed to index: #{inspect(result)}")
           end
         end)
         |> Stream.run()
@@ -85,11 +104,34 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
   def run(["clear"]) do
     start_pleroma()
 
+    meili_delete!("/indexes/objects/documents")
+  end
+
+  def run(["show-private-key", master_key]) do
+    start_pleroma()
+
     endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
 
-    {:ok, _} =
-      Pleroma.HTTP.request(:delete, "#{endpoint}/indexes/objects/documents", "", [],
-        timeout: :infinity
+    {:ok, result} =
+      Pleroma.HTTP.get(
+        Path.join(endpoint, "/keys"),
+        [{"X-Meili-API-Key", master_key}]
       )
+
+    decoded = Jason.decode!(result.body)
+
+    if decoded["private"] do
+      IO.puts(decoded["private"])
+    else
+      IO.puts("Error fetching the key, check the master key is correct: #{inspect(decoded)}")
+    end
+  end
+
+  def run(["stats"]) do
+    start_pleroma()
+
+    result = meili_get!("/indexes/objects/stats")
+    IO.puts("Number of entries: #{result["numberOfDocuments"]}")
+    IO.puts("Indexing? #{result["isIndexing"]}")
   end
 end