From 88f0be96933c287b99469edcfb6483cc91fa73c8 Mon Sep 17 00:00:00 2001
From: Alexander Strizhakov <alex.strizhakov@gmail.com>
Date: Mon, 22 Apr 2019 07:19:53 +0000
Subject: [PATCH] Feature/826 healthcheck endpoint

---
 CHANGELOG.md                                  |  1 +
 docs/api/pleroma_api.md                       | 17 ++++++
 lib/healthcheck.ex                            | 60 +++++++++++++++++++
 lib/pleroma/web/router.ex                     |  1 +
 .../controllers/util_controller.ex            | 13 ++++
 test/healthcheck_test.exs                     | 22 +++++++
 test/web/twitter_api/util_controller_test.exs |  6 ++
 7 files changed, 120 insertions(+)
 create mode 100644 lib/healthcheck.ex
 create mode 100644 test/healthcheck_test.exs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ed078bc69..f6bc41c24 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - Configuration: `fetch_initial_posts` option
 - Configuration: `notify_email` option
 - Pleroma API: User subscribtions
+- Pleroma API: Healthcheck endpoint
 - Admin API: Endpoints for listing/revoking invite tokens
 - Admin API: Endpoints for making users follow/unfollow each other
 - Mastodon API: [Scheduled statuses](https://docs.joinmastodon.org/api/rest/scheduled-statuses/)
diff --git a/docs/api/pleroma_api.md b/docs/api/pleroma_api.md
index dbe250300..4b8062d37 100644
--- a/docs/api/pleroma_api.md
+++ b/docs/api/pleroma_api.md
@@ -197,3 +197,20 @@ See [Admin-API](Admin-API.md)
     * `remote`: BOOLEAN field, receives notifications from people on remote instances
     * `local`: BOOLEAN field, receives notifications from people on the local instance
 * Response: JSON. Returns `{"status": "success"}` if the update was successful, otherwise returns `{"error": "error_msg"}`
+
+## `/api/pleroma/healthcheck`
+### Healthcheck endpoint with additional system data.
+* Method `GET`
+* Authentication: not required
+* Params: none
+* Response: JSON, statuses (200 - healthy, 503 unhealthy).
+* Example response:
+```json
+{
+  "pool_size": 0, # database connection pool
+  "active": 0, # active processes
+  "idle": 0, # idle processes
+  "memory_used": 0.00, # Memory used
+  "healthy": true # Instance state
+}
+```
diff --git a/lib/healthcheck.ex b/lib/healthcheck.ex
new file mode 100644
index 000000000..646fb3b9d
--- /dev/null
+++ b/lib/healthcheck.ex
@@ -0,0 +1,60 @@
+defmodule Pleroma.Healthcheck do
+  @moduledoc """
+  Module collects metrics about app and assign healthy status.
+  """
+  alias Pleroma.Healthcheck
+  alias Pleroma.Repo
+
+  defstruct pool_size: 0,
+            active: 0,
+            idle: 0,
+            memory_used: 0,
+            healthy: true
+
+  @type t :: %__MODULE__{
+          pool_size: non_neg_integer(),
+          active: non_neg_integer(),
+          idle: non_neg_integer(),
+          memory_used: number(),
+          healthy: boolean()
+        }
+
+  @spec system_info() :: t()
+  def system_info do
+    %Healthcheck{
+      memory_used: Float.round(:erlang.memory(:total) / 1024 / 1024, 2)
+    }
+    |> assign_db_info()
+    |> check_health()
+  end
+
+  defp assign_db_info(healthcheck) do
+    database = Application.get_env(:pleroma, Repo)[:database]
+
+    query =
+      "select state, count(pid) from pg_stat_activity where datname = '#{database}' group by state;"
+
+    result = Repo.query!(query)
+    pool_size = Application.get_env(:pleroma, Repo)[:pool_size]
+
+    db_info =
+      Enum.reduce(result.rows, %{active: 0, idle: 0}, fn [state, cnt], states ->
+        if state == "active" do
+          Map.put(states, :active, states.active + cnt)
+        else
+          Map.put(states, :idle, states.idle + cnt)
+        end
+      end)
+      |> Map.put(:pool_size, pool_size)
+
+    Map.merge(healthcheck, db_info)
+  end
+
+  @spec check_health(Healthcheck.t()) :: Healthcheck.t()
+  def check_health(%{pool_size: pool_size, active: active} = check)
+      when active >= pool_size do
+    %{check | healthy: false}
+  end
+
+  def check_health(check), do: check
+end
diff --git a/lib/pleroma/web/router.ex b/lib/pleroma/web/router.ex
index 8b665d61b..6228b5868 100644
--- a/lib/pleroma/web/router.ex
+++ b/lib/pleroma/web/router.ex
@@ -135,6 +135,7 @@ defmodule Pleroma.Web.Router do
     post("/password_reset", UtilController, :password_reset)
     get("/emoji", UtilController, :emoji)
     get("/captcha", UtilController, :captcha)
+    get("/healthcheck", UtilController, :healthcheck)
   end
 
   scope "/api/pleroma", Pleroma.Web do
diff --git a/lib/pleroma/web/twitter_api/controllers/util_controller.ex b/lib/pleroma/web/twitter_api/controllers/util_controller.ex
index 8665e058a..197a89966 100644
--- a/lib/pleroma/web/twitter_api/controllers/util_controller.ex
+++ b/lib/pleroma/web/twitter_api/controllers/util_controller.ex
@@ -363,4 +363,17 @@ defmodule Pleroma.Web.TwitterAPI.UtilController do
   def captcha(conn, _params) do
     json(conn, Pleroma.Captcha.new())
   end
+
+  def healthcheck(conn, _params) do
+    info = Pleroma.Healthcheck.system_info()
+
+    conn =
+      if info.healthy do
+        conn
+      else
+        Plug.Conn.put_status(conn, :service_unavailable)
+      end
+
+    json(conn, info)
+  end
 end
diff --git a/test/healthcheck_test.exs b/test/healthcheck_test.exs
new file mode 100644
index 000000000..e05061220
--- /dev/null
+++ b/test/healthcheck_test.exs
@@ -0,0 +1,22 @@
+defmodule Pleroma.HealthcheckTest do
+  use Pleroma.DataCase
+  alias Pleroma.Healthcheck
+
+  test "system_info/0" do
+    result = Healthcheck.system_info() |> Map.from_struct()
+
+    assert Map.keys(result) == [:active, :healthy, :idle, :memory_used, :pool_size]
+  end
+
+  describe "check_health/1" do
+    test "pool size equals active connections" do
+      result = Healthcheck.check_health(%Healthcheck{pool_size: 10, active: 10})
+      refute result.healthy
+    end
+
+    test "chech_health/1" do
+      result = Healthcheck.check_health(%Healthcheck{pool_size: 10, active: 9})
+      assert result.healthy
+    end
+  end
+end
diff --git a/test/web/twitter_api/util_controller_test.exs b/test/web/twitter_api/util_controller_test.exs
index c58b49ea4..56474447b 100644
--- a/test/web/twitter_api/util_controller_test.exs
+++ b/test/web/twitter_api/util_controller_test.exs
@@ -245,4 +245,10 @@ defmodule Pleroma.Web.TwitterAPI.UtilControllerTest do
       assert html_response(response, 200) =~ "Log in to follow"
     end
   end
+
+  test "GET /api/pleroma/healthcheck", %{conn: conn} do
+    conn = get(conn, "/api/pleroma/healthcheck")
+
+    assert conn.status in [200, 503]
+  end
 end
-- 
2.49.0