Connection pool: implement logging and telemetry events
authorrinpatch <rinpatch@sdf.org>
Wed, 15 Jul 2020 12:26:25 +0000 (15:26 +0300)
committerrinpatch <rinpatch@sdf.org>
Wed, 15 Jul 2020 12:26:25 +0000 (15:26 +0300)
lib/pleroma/application.ex
lib/pleroma/gun/connection_pool/worker_supervisor.ex
lib/pleroma/telemetry/logger.ex [new file with mode: 0644]

index cfdaf1770f516e59e6d35b0cc7252a4d393fdb29..37fcdf29370a592cea79c96956d189e6bfa26597 100644 (file)
@@ -39,6 +39,7 @@ defmodule Pleroma.Application do
     # every time the application is restarted, so we disable module
     # conflicts at runtime
     Code.compiler_options(ignore_module_conflict: true)
+    Pleroma.Telemetry.Logger.attach()
     Config.Holder.save_default()
     Pleroma.HTML.compile_scrubbers()
     Config.DeprecationWarnings.warn()
index d090c034e66d177b17f7d96beec5aabc4ff9c3a5..4b5d10d2a20396c17f60c912e23cabdd571d1b24 100644 (file)
@@ -18,8 +18,12 @@ defmodule Pleroma.Gun.ConnectionPool.WorkerSupervisor do
     case DynamicSupervisor.start_child(__MODULE__, {Pleroma.Gun.ConnectionPool.Worker, opts}) do
       {:error, :max_children} ->
         case free_pool() do
-          :ok -> start_worker(opts)
-          :error -> {:error, :pool_full}
+          :ok ->
+            start_worker(opts)
+
+          :error ->
+            :telemetry.execute([:pleroma, :connection_pool, :provision_failure], %{opts: opts})
+            {:error, :pool_full}
         end
 
       res ->
@@ -44,6 +48,14 @@ defmodule Pleroma.Gun.ConnectionPool.WorkerSupervisor do
               |> round
               |> max(1)
 
+            :telemetry.execute([:pleroma, :connection_pool, :reclaim, :start], %{}, %{
+              max_connections: max_connections,
+              reclaim_max: reclaim_max
+            })
+
+            # :ets.fun2ms(
+            # fn {_, {worker_pid, {_, used_by, crf, last_reference}}} when used_by == [] ->
+            #   {worker_pid, crf, last_reference} end)
             unused_conns =
               Registry.select(
                 @registry,
@@ -55,17 +67,35 @@ defmodule Pleroma.Gun.ConnectionPool.WorkerSupervisor do
 
             case unused_conns do
               [] ->
+                :telemetry.execute(
+                  [:pleroma, :connection_pool, :reclaim, :stop],
+                  %{reclaimed_count: 0},
+                  %{
+                    max_connections: max_connections
+                  }
+                )
+
                 exit(:no_unused_conns)
 
               unused_conns ->
-                unused_conns
-                |> Enum.sort(fn {_pid1, crf1, last_reference1}, {_pid2, crf2, last_reference2} ->
-                  crf1 <= crf2 and last_reference1 <= last_reference2
-                end)
-                |> Enum.take(reclaim_max)
+                reclaimed =
+                  unused_conns
+                  |> Enum.sort(fn {_pid1, crf1, last_reference1},
+                                  {_pid2, crf2, last_reference2} ->
+                    crf1 <= crf2 and last_reference1 <= last_reference2
+                  end)
+                  |> Enum.take(reclaim_max)
+
+                reclaimed
                 |> Enum.each(fn {pid, _, _} ->
                   DynamicSupervisor.terminate_child(__MODULE__, pid)
                 end)
+
+                :telemetry.execute(
+                  [:pleroma, :connection_pool, :reclaim, :stop],
+                  %{reclaimed_count: Enum.count(reclaimed)},
+                  %{max_connections: max_connections}
+                )
             end
           end)
 
diff --git a/lib/pleroma/telemetry/logger.ex b/lib/pleroma/telemetry/logger.ex
new file mode 100644 (file)
index 0000000..d76dd37
--- /dev/null
@@ -0,0 +1,62 @@
+defmodule Pleroma.Telemetry.Logger do
+  @moduledoc "Transforms Pleroma telemetry events to logs"
+
+  require Logger
+
+  @events [
+    [:pleroma, :connection_pool, :reclaim, :start],
+    [:pleroma, :connection_pool, :reclaim, :stop],
+    [:pleroma, :connection_pool, :provision_failure]
+  ]
+  def attach do
+    :telemetry.attach_many("pleroma-logger", @events, &handle_event/4, [])
+  end
+
+  # Passing anonymous functions instead of strings to logger is intentional,
+  # that way strings won't be concatenated if the message is going to be thrown
+  # out anyway due to higher log level configured
+
+  def handle_event(
+        [:pleroma, :connection_pool, :reclaim, :start],
+        _,
+        %{max_connections: max_connections, reclaim_max: reclaim_max},
+        _
+      ) do
+    Logger.debug(fn ->
+      "Connection pool is exhausted (reached #{max_connections} connections). Starting idle connection cleanup to reclaim as much as #{
+        reclaim_max
+      } connections"
+    end)
+  end
+
+  def handle_event(
+        [:pleroma, :connection_pool, :reclaim, :stop],
+        %{reclaimed_count: 0},
+        _,
+        _
+      ) do
+    Logger.error(fn ->
+      "Connection pool failed to reclaim any connections due to all of them being in use. It will have to drop requests for opening connections to new hosts"
+    end)
+  end
+
+  def handle_event(
+        [:pleroma, :connection_pool, :reclaim, :stop],
+        %{reclaimed_count: reclaimed_count},
+        _,
+        _
+      ) do
+    Logger.debug(fn -> "Connection pool cleaned up #{reclaimed_count} idle connections" end)
+  end
+
+  def handle_event(
+        [:pleroma, :connection_pool, :provision_failure],
+        %{opts: [key | _]},
+        _,
+        _
+      ) do
+    Logger.error(fn ->
+      "Connection pool had to refuse opening a connection to #{key} due to connection limit exhaustion"
+    end)
+  end
+end