5b01ee14deeb97218b0ea168c78f90153c1a736e
[akkoma] / lib / pleroma / web / telemetry.ex
1 defmodule Pleroma.Web.Telemetry do
2 use Supervisor
3 import Telemetry.Metrics
4 alias Pleroma.Stats
5
6 def start_link(arg) do
7 Supervisor.start_link(__MODULE__, arg, name: __MODULE__)
8 end
9
10 @impl true
11 def init(_arg) do
12 children = [
13 {:telemetry_poller, measurements: periodic_measurements(), period: 10_000},
14 {TelemetryMetricsPrometheus.Core, metrics: prometheus_metrics()}
15 ]
16
17 Supervisor.init(children, strategy: :one_for_one)
18 end
19
20 # A seperate set of metrics for distributions because phoenix dashboard does NOT handle them well
21 defp distribution_metrics do
22 [
23 distribution(
24 "phoenix.router_dispatch.stop.duration",
25 # event_name: [:pleroma, :repo, :query, :total_time],
26 measurement: :duration,
27 unit: {:native, :second},
28 tags: [:route],
29 reporter_options: [
30 buckets: [0.1, 0.2, 0.5, 1, 2.5, 5, 10, 25, 50, 100, 250, 500, 1000]
31 ]
32 ),
33
34 # Database Time Metrics
35 distribution(
36 "pleroma.repo.query.total_time",
37 # event_name: [:pleroma, :repo, :query, :total_time],
38 measurement: :total_time,
39 unit: {:native, :millisecond},
40 reporter_options: [
41 buckets: [0.1, 0.2, 0.5, 1, 2.5, 5, 10, 25, 50, 100, 250, 500, 1000]
42 ]
43 ),
44 distribution(
45 "pleroma.repo.query.queue_time",
46 # event_name: [:pleroma, :repo, :query, :total_time],
47 measurement: :queue_time,
48 unit: {:native, :millisecond},
49 reporter_options: [
50 buckets: [0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2.5, 5, 10]
51 ]
52 ),
53 distribution(
54 "oban_job_exception",
55 event_name: [:oban, :job, :exception],
56 measurement: :duration,
57 tags: [:worker],
58 tag_values: fn tags -> Map.put(tags, :worker, tags.job.worker) end,
59 unit: {:native, :second},
60 reporter_options: [
61 buckets: [0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2.5, 5, 10]
62 ]
63 ),
64 distribution(
65 "tesla_request_completed",
66 event_name: [:tesla, :request, :stop],
67 measurement: :duration,
68 tags: [:response_code],
69 tag_values: fn tags -> Map.put(tags, :response_code, tags.env.status) end,
70 unit: {:native, :second},
71 reporter_options: [
72 buckets: [0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2.5, 5, 10]
73 ]
74 ),
75 distribution(
76 "oban_job_completion",
77 event_name: [:oban, :job, :stop],
78 measurement: :duration,
79 tags: [:worker],
80 tag_values: fn tags -> Map.put(tags, :worker, tags.job.worker) end,
81 unit: {:native, :second},
82 reporter_options: [
83 buckets: [0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2.5, 5, 10]
84 ]
85 )
86 ]
87 end
88
89 defp summary_metrics do
90 [
91 # Phoenix Metrics
92 summary("phoenix.endpoint.stop.duration",
93 unit: {:native, :millisecond}
94 ),
95 summary("phoenix.router_dispatch.stop.duration",
96 tags: [:route],
97 unit: {:native, :millisecond}
98 ),
99 summary("pleroma.repo.query.total_time", unit: {:native, :millisecond}),
100 summary("pleroma.repo.query.decode_time", unit: {:native, :millisecond}),
101 summary("pleroma.repo.query.query_time", unit: {:native, :millisecond}),
102 summary("pleroma.repo.query.queue_time", unit: {:native, :millisecond}),
103 summary("pleroma.repo.query.idle_time", unit: {:native, :millisecond}),
104
105 # VM Metrics
106 summary("vm.memory.total", unit: {:byte, :kilobyte}),
107 summary("vm.total_run_queue_lengths.total"),
108 summary("vm.total_run_queue_lengths.cpu"),
109 summary("vm.total_run_queue_lengths.io"),
110 last_value("pleroma.local_users.total"),
111 last_value("pleroma.domains.total"),
112 last_value("pleroma.local_statuses.total")
113 ]
114 end
115
116 def prometheus_metrics, do: summary_metrics() ++ distribution_metrics()
117 def live_dashboard_metrics, do: summary_metrics()
118
119 defp periodic_measurements do
120 [
121 {__MODULE__, :instance_stats, []}
122 ]
123 end
124
125 def instance_stats do
126 stats = Stats.get_stats()
127 :telemetry.execute([:pleroma, :local_users], %{total: stats.user_count}, %{})
128 :telemetry.execute([:pleroma, :domains], %{total: stats.domain_count}, %{})
129 :telemetry.execute([:pleroma, :local_statuses], %{total: stats.status_count}, %{})
130 end
131 end