435f557992c233fbdfa0bcb4ff8db3032e705bae
[akkoma] / lib / pleroma / web / telemetry.ex
1 defmodule Pleroma.Web.Telemetry do
2 use Supervisor
3 import Telemetry.Metrics
4 alias Pleroma.Stats
5
6 def start_link(arg) do
7 Supervisor.start_link(__MODULE__, arg, name: __MODULE__)
8 end
9
10 @impl true
11 def init(_arg) do
12 children = [
13 {:telemetry_poller, measurements: periodic_measurements(), period: 10_000},
14 {TelemetryMetricsPrometheus, metrics: prometheus_metrics(), plug_cowboy_opts: [ip: {127, 0, 0, 1}]}
15 ]
16
17 Supervisor.init(children, strategy: :one_for_one)
18 end
19
20 @doc """
21 A seperate set of metrics for distributions because phoenix dashboard does NOT handle
22 them well
23 """
24 defp distribution_metrics do
25 [
26 distribution(
27 "phoenix.router_dispatch.stop.duration",
28 # event_name: [:pleroma, :repo, :query, :total_time],
29 measurement: :duration,
30 unit: {:native, :second},
31 tags: [:route],
32 reporter_options: [
33 buckets: [0.1, 0.2, 0.5, 1, 2.5, 5, 10, 25, 50, 100, 250, 500, 1000]
34 ]
35 ),
36
37 # Database Time Metrics
38 distribution(
39 "pleroma.repo.query.total_time",
40 # event_name: [:pleroma, :repo, :query, :total_time],
41 measurement: :total_time,
42 unit: {:native, :millisecond},
43 reporter_options: [
44 buckets: [0.1, 0.2, 0.5, 1, 2.5, 5, 10, 25, 50, 100, 250, 500, 1000]
45 ]
46 ),
47 distribution(
48 "pleroma.repo.query.queue_time",
49 # event_name: [:pleroma, :repo, :query, :total_time],
50 measurement: :queue_time,
51 unit: {:native, :millisecond},
52 reporter_options: [
53 buckets: [0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2.5, 5, 10]
54 ]
55 ),
56 distribution(
57 "oban_job_exception",
58 event_name: [:oban, :job, :exception],
59 measurement: :duration,
60 tags: [:worker],
61 tag_values: fn tags -> Map.put(tags, :worker, tags.job.worker) end,
62 unit: {:native, :second},
63 reporter_options: [
64 buckets: [0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2.5, 5, 10]
65 ]
66 ),
67 distribution(
68 "tesla_request_completed",
69 event_name: [:tesla, :request, :stop],
70 measurement: :duration,
71 tags: [:response_code],
72 tag_values: fn tags -> Map.put(tags, :response_code, tags.env.status) end,
73 unit: {:native, :second},
74 reporter_options: [
75 buckets: [0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2.5, 5, 10]
76 ]
77 ),
78 distribution(
79 "oban_job_completion",
80 event_name: [:oban, :job, :stop],
81 measurement: :duration,
82 tags: [:worker],
83 tag_values: fn tags -> Map.put(tags, :worker, tags.job.worker) end,
84 unit: {:native, :second},
85 reporter_options: [
86 buckets: [0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2.5, 5, 10]
87 ]
88 )
89 ]
90 end
91
92 defp summary_metrics do
93 [
94 # Phoenix Metrics
95 summary("phoenix.endpoint.stop.duration",
96 unit: {:native, :millisecond}
97 ),
98 summary("phoenix.router_dispatch.stop.duration",
99 tags: [:route],
100 unit: {:native, :millisecond}
101 ),
102 summary("pleroma.repo.query.total_time", unit: {:native, :millisecond}),
103 summary("pleroma.repo.query.decode_time", unit: {:native, :millisecond}),
104 summary("pleroma.repo.query.query_time", unit: {:native, :millisecond}),
105 summary("pleroma.repo.query.queue_time", unit: {:native, :millisecond}),
106 summary("pleroma.repo.query.idle_time", unit: {:native, :millisecond}),
107
108 # VM Metrics
109 summary("vm.memory.total", unit: {:byte, :kilobyte}),
110 summary("vm.total_run_queue_lengths.total"),
111 summary("vm.total_run_queue_lengths.cpu"),
112 summary("vm.total_run_queue_lengths.io"),
113
114
115 last_value("pleroma.local_users.total"),
116 last_value("pleroma.domains.total"),
117 last_value("pleroma.local_statuses.total")
118 ]
119 end
120
121 def prometheus_metrics, do: summary_metrics() ++ distribution_metrics()
122 def live_dashboard_metrics, do: summary_metrics()
123
124 defp periodic_measurements do
125 [
126 {__MODULE__, :instance_stats, []}
127 ]
128 end
129
130 def instance_stats do
131 stats = Stats.get_stats()
132 :telemetry.execute([:pleroma, :local_users], %{total: stats.user_count}, %{})
133 :telemetry.execute([:pleroma, :domains], %{total: stats.domain_count}, %{})
134 :telemetry.execute([:pleroma, :local_statuses], %{total: stats.status_count}, %{})
135 end
136 end