suggestion: [100_000]
}
]
+ },
+ %{
+ group: :pleroma,
+ key: Pleroma.Search.Elasticsearch.Cluster,
+ type: :group,
+ description: "Elasticsearch settings.",
+ children: [
+ %{
+ key: :url,
+ type: :string,
+ description: "Elasticsearch URL.",
+ suggestion: ["http://127.0.0.1:9200/"]
+ },
+ %{
+ key: :username,
+ type: :string,
+ description: "Username to connect to ES. Set to nil if your cluster is unauthenticated.",
+ suggestion: ["elastic"]
+ },
+ %{
+ key: :password,
+ type: :string,
+ description: "Password to connect to ES. Set to nil if your cluster is unauthenticated.",
+ suggestion: ["changeme"]
+ },
+ %{
+ key: :api,
+ type: :module,
+ description:
+ "The API module used by Elasticsearch. Should always be Elasticsearch.API.HTTP",
+ suggestion: [Elasticsearch.API.HTTP]
+ },
+ %{
+ key: :json_library,
+ type: :module,
+ description:
+ "The JSON module used to encode/decode when communicating with Elasticsearch",
+ suggestion: [Jason]
+ },
+ %{
+ key: :indexes,
+ type: :map,
+ description: "The indices to set up in Elasticsearch",
+ children: [
+ %{
+ key: :activities,
+ type: :map,
+ description: "Config for the index to use for activities",
+ children: [
+ %{
+ key: :settings,
+ type: :string,
+ description:
+ "Path to the file containing index settings for the activities index. Should contain a mapping.",
+ suggestion: ["priv/es-mappings/activity.json"]
+ },
+ %{
+ key: :store,
+ type: :module,
+ description: "The internal store module",
+ suggestion: [Pleroma.Search.Elasticsearch.Store]
+ },
+ %{
+ key: :sources,
+ type: {:list, :module},
+ description: "The internal types to use for this index",
+ suggestion: [[Pleroma.Activity]]
+ },
+ %{
+ key: :bulk_page_size,
+ type: :int,
+ description: "Size for bulk put requests, mostly used on building the index",
+ suggestion: [5000]
+ },
+ %{
+ key: :bulk_wait_interval,
+ type: :int,
+ description: "Time to wait between bulk put requests (in ms)",
+ suggestion: [15_000]
+ }
+ ]
+ }
+ ]
+ }
+ ]
}
]
there is no need to actually clear the whole index, unless you want **all** of it gone. That said, the index does not hold any information
that cannot be re-created from the database, it should also generally be a lot smaller than the size of your database. Still, the size
depends on the amount of text in posts.
+
+## Elasticsearch
+
+As with meilisearch, this can be rather memory-hungry, but it is very good at what it does.
+
+To use [elasticsearch](https://www.elastic.co/), set the search module to `Pleroma.Search.Elasticsearch`:
+
+> config :pleroma, Pleroma.Search, module: Pleroma.Search.Elasticsearch
+
+You then need to set the URL and authentication credentials if relevant.
+
+> config :pleroma, Pleroma.Search.Elasticsearch.Cluster,
+> url: "http://127.0.0.1:9200/",
+> username: "elastic",
+> password: "changeme",
+
+### Initial indexing
+
+After setting up the configuration, you'll want to index all of your already existsing posts. Only public posts are indexed. You'll only
+have to do it one time, but it might take a while, depending on the amount of posts your instance has seen.
+
+The sequence of actions is as follows:
+
+1. First, change the configuration to use `Pleroma.Search.Elasticsearch` as the search backend
+2. Restart your instance, at this point it can be used while the search indexing is running, though search won't return anything
+3. Start the initial indexing process (as described below with `index`),
+ and wait until the task says it sent everything from the database to index
+4. Wait until the index tasks exits
+
+To start the initial indexing, run the `build` command:
+
+=== "OTP"
+```sh
+./bin/pleroma_ctl search.elasticsearch index activities --cluster Pleroma.Search.Elasticsearch.Cluster
+```
+
+=== "From Source"
+```sh
+mix elasticsearch.build activities --cluster Pleroma.Search.Elasticsearch.Cluster
+```
\ No newline at end of file
--- /dev/null
+defmodule Mix.Tasks.Pleroma.Search.Elasticsearch do
+ alias Mix.Tasks.Elasticsearch.Build
+ import Mix.Pleroma
+
+ def run(["index" | args]) do
+ start_pleroma()
+ Build.run(args)
+ end
+end
--- /dev/null
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
+ require Pleroma.Constants
+
+ import Mix.Pleroma
+ import Ecto.Query
+
+ import Pleroma.Search.Meilisearch,
+ only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete!: 1]
+
+ def run(["index"]) do
+ start_pleroma()
+
+ meili_version =
+ (
+ {:ok, result} = meili_get("/version")
+
+ result["pkgVersion"]
+ )
+
+ # The ranking rule syntax was changed but nothing about that is mentioned in the changelog
+ if not Version.match?(meili_version, ">= 0.25.0") do
+ raise "Meilisearch <0.24.0 not supported"
+ end
+
+ {:ok, _} =
+ meili_post(
+ "/indexes/objects/settings/ranking-rules",
+ [
+ "published:desc",
+ "words",
+ "exactness",
+ "proximity",
+ "typo",
+ "attribute",
+ "sort"
+ ]
+ )
+
+ {:ok, _} =
+ meili_post(
+ "/indexes/objects/settings/searchable-attributes",
+ [
+ "content"
+ ]
+ )
+
+ IO.puts("Created indices. Starting to insert posts.")
+
+ chunk_size = Pleroma.Config.get([Pleroma.Search.Meilisearch, :initial_indexing_chunk_size])
+
+ Pleroma.Repo.transaction(
+ fn ->
+ query =
+ from(Pleroma.Object,
+ # Only index public and unlisted posts which are notes and have some text
+ where:
+ fragment("data->>'type' = 'Note'") and
+ (fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) or
+ fragment("data->'cc' \\? ?", ^Pleroma.Constants.as_public())),
+ order_by: [desc: fragment("data->'published'")]
+ )
+
+ count = query |> Pleroma.Repo.aggregate(:count, :data)
+ IO.puts("Entries to index: #{count}")
+
+ Pleroma.Repo.stream(
+ query,
+ timeout: :infinity
+ )
+ |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1)
+ |> Stream.filter(fn o -> not is_nil(o) end)
+ |> Stream.chunk_every(chunk_size)
+ |> Stream.transform(0, fn objects, acc ->
+ new_acc = acc + Enum.count(objects)
+
+ # Reset to the beginning of the line and rewrite it
+ IO.write("\r")
+ IO.write("Indexed #{new_acc} entries")
+
+ {[objects], new_acc}
+ end)
+ |> Stream.each(fn objects ->
+ result =
+ meili_put(
+ "/indexes/objects/documents",
+ objects
+ )
+
+ with {:ok, res} <- result do
+ if not Map.has_key?(res, "uid") do
+ IO.puts("\nFailed to index: #{inspect(result)}")
+ end
+ else
+ e -> IO.puts("\nFailed to index due to network error: #{inspect(e)}")
+ end
+ end)
+ |> Stream.run()
+ end,
+ timeout: :infinity
+ )
+
+ IO.write("\n")
+ end
+
+ def run(["clear"]) do
+ start_pleroma()
+
+ meili_delete!("/indexes/objects/documents")
+ end
+
+ def run(["show-keys", master_key]) do
+ start_pleroma()
+
+ endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
+
+ {:ok, result} =
+ Pleroma.HTTP.get(
+ Path.join(endpoint, "/keys"),
+ [{"Authorization", "Bearer #{master_key}"}]
+ )
+
+ decoded = Jason.decode!(result.body)
+
+ if decoded["results"] do
+ Enum.each(decoded["results"], fn %{"description" => desc, "key" => key} ->
+ IO.puts("#{desc}: #{key}")
+ end)
+ else
+ IO.puts("Error fetching the keys, check the master key is correct: #{inspect(decoded)}")
+ end
+ end
+
+ def run(["stats"]) do
+ start_pleroma()
+
+ {:ok, result} = meili_get("/indexes/objects/stats")
+ IO.puts("Number of entries: #{result["numberOfDocuments"]}")
+ IO.puts("Indexing? #{result["isIndexing"]}")
+ end
+end