Rework task indexing to share code with the main module
authorEkaterina Vaartis <vaartis@kotobank.ch>
Sun, 22 Aug 2021 19:53:18 +0000 (22:53 +0300)
committerFloatingGhost <hannah@coffee-and-dreams.uk>
Wed, 29 Jun 2022 19:48:29 +0000 (20:48 +0100)
The code in the main module now scrubs new posts too

lib/mix/tasks/pleroma/search/meilisearch.ex
lib/pleroma/search/meilisearch.ex

index 3704e0bdcad19f6bc6bcd084eaf13c8517397245..b5a394e34929821f87ec931c406cb04c6fa5dc16 100644 (file)
@@ -51,40 +51,9 @@ defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
           ),
           timeout: :infinity
         )
+        |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1)
+        |> Stream.filter(fn o -> not is_nil(o) end)
         |> Stream.chunk_every(chunk_size)
-        |> Stream.map(fn objects ->
-          Enum.map(objects, fn object ->
-            data = object.data
-
-            content_str =
-              case data["content"] do
-                [nil | rest] -> to_string(rest)
-                str -> str
-              end
-
-            {:ok, published, _} = DateTime.from_iso8601(data["published"])
-
-            content =
-              with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str),
-                   trimmed <- String.trim(scrubbed) do
-                trimmed
-              end
-
-            # Only index if there is anything in the string. If there is a single symbol,
-            # it's probably a dot from mastodon posts with just the picture
-            if String.length(content) > 1 do
-              %{
-                id: object.id,
-                content: content,
-                ap: data["id"],
-                published: published |> DateTime.to_unix()
-              }
-            else
-              nil
-            end
-          end)
-          |> Enum.filter(fn o -> not is_nil(o) end)
-        end)
         |> Stream.transform(0, fn objects, acc ->
           new_acc = acc + Enum.count(objects)
 
index 87fdeaf5e0b30c54ba6f363a916e86557eef1692..10468e36c0bc9839f878b4ef918ae8449d399731 100644 (file)
@@ -39,28 +39,46 @@ defmodule Pleroma.Search.Meilisearch do
     end
   end
 
-  def add_to_index(activity) do
-    object = activity.object
-
-    if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" and
+  def object_to_search_data(object) do
+    if not is_nil(object) and object.data["type"] == "Note" and
          Pleroma.Constants.as_public() in object.data["to"] do
       data = object.data
 
-      endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+      content_str =
+        case data["content"] do
+          [nil | rest] -> to_string(rest)
+          str -> str
+        end
+
+      content =
+        with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str),
+             trimmed <- String.trim(scrubbed) do
+          trimmed
+        end
+
+      if String.length(content) > 1 do
+        {:ok, published, _} = DateTime.from_iso8601(data["published"])
+
+        %{
+          id: object.id,
+          content: content,
+          ap: data["id"],
+          published: published |> DateTime.to_unix()
+        }
+      end
+    end
+  end
 
-      {:ok, published, _} = DateTime.from_iso8601(data["published"])
+  def add_to_index(activity) do
+    maybe_search_data = object_to_search_data(activity)
+
+    if activity.data["type"] == "Create" and maybe_search_data do
+      endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
 
       {:ok, result} =
         Pleroma.HTTP.post(
           "#{endpoint}/indexes/objects/documents",
-          Jason.encode!([
-            %{
-              id: object.id,
-              content: data["content"] |> Pleroma.HTML.filter_tags(),
-              ap: data["id"],
-              published: published |> DateTime.to_unix()
-            }
-          ])
+          Jason.encode!([maybe_search_data])
         )
 
       if not Map.has_key?(Jason.decode!(result.body), "updateId") do