Merge branch 'configurable-federator' into 'develop'
[akkoma] / lib / pleroma / web / federator / retry_queue.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.Web.Federator.RetryQueue do
6 use GenServer
7
8 require Logger
9
10 def init(args) do
11 queue_table = :ets.new(:pleroma_retry_queue, [:bag, :protected])
12
13 {:ok, %{args | queue_table: queue_table, running_jobs: :sets.new()}}
14 end
15
16 def start_link() do
17 enabled =
18 if Mix.env() == :test, do: true, else: Pleroma.Config.get([__MODULE__, :enabled], false)
19
20 if enabled do
21 Logger.info("Starting retry queue")
22
23 linkres =
24 GenServer.start_link(
25 __MODULE__,
26 %{delivered: 0, dropped: 0, queue_table: nil, running_jobs: nil},
27 name: __MODULE__
28 )
29
30 maybe_kickoff_timer()
31 linkres
32 else
33 Logger.info("Retry queue disabled")
34 :ignore
35 end
36 end
37
38 def enqueue(data, transport, retries \\ 0) do
39 GenServer.cast(__MODULE__, {:maybe_enqueue, data, transport, retries + 1})
40 end
41
42 def get_stats() do
43 GenServer.call(__MODULE__, :get_stats)
44 end
45
46 def reset_stats() do
47 GenServer.call(__MODULE__, :reset_stats)
48 end
49
50 def get_retry_params(retries) do
51 if retries > Pleroma.Config.get([__MODULE__, :max_retries]) do
52 {:drop, "Max retries reached"}
53 else
54 {:retry, growth_function(retries)}
55 end
56 end
57
58 def get_retry_timer_interval() do
59 Pleroma.Config.get([:retry_queue, :interval], 1000)
60 end
61
62 defp ets_count_expires(table, current_time) do
63 :ets.select_count(
64 table,
65 [
66 {
67 {:"$1", :"$2"},
68 [{:"=<", :"$1", {:const, current_time}}],
69 [true]
70 }
71 ]
72 )
73 end
74
75 defp ets_pop_n_expired(table, current_time, desired) do
76 {popped, _continuation} =
77 :ets.select(
78 table,
79 [
80 {
81 {:"$1", :"$2"},
82 [{:"=<", :"$1", {:const, current_time}}],
83 [:"$_"]
84 }
85 ],
86 desired
87 )
88
89 popped
90 |> List.foldl(true, fn e, acc ->
91 :ets.delete_object(table, e)
92 acc
93 end)
94
95 popped
96 end
97
98 def maybe_start_job(running_jobs, queue_table) do
99 # we don't want to hit the ets or the DateTime more times than we have to
100 # could optimize slightly further by not using the count, and instead grabbing
101 # up to N objects early...
102 current_time = DateTime.to_unix(DateTime.utc_now())
103 n_running_jobs = :sets.size(running_jobs)
104
105 if n_running_jobs < Pleroma.Config.get([__MODULE__, :max_jobs]) do
106 n_ready_jobs = ets_count_expires(queue_table, current_time)
107
108 if n_ready_jobs > 0 do
109 # figure out how many we could start
110 available_job_slots = Pleroma.Config.get([__MODULE__, :max_jobs]) - n_running_jobs
111 start_n_jobs(running_jobs, queue_table, current_time, available_job_slots)
112 else
113 running_jobs
114 end
115 else
116 running_jobs
117 end
118 end
119
120 defp start_n_jobs(running_jobs, _queue_table, _current_time, 0) do
121 running_jobs
122 end
123
124 defp start_n_jobs(running_jobs, queue_table, current_time, available_job_slots)
125 when available_job_slots > 0 do
126 candidates = ets_pop_n_expired(queue_table, current_time, available_job_slots)
127
128 candidates
129 |> List.foldl(running_jobs, fn {_, e}, rj ->
130 {:ok, pid} = Task.start(fn -> worker(e) end)
131 mref = Process.monitor(pid)
132 :sets.add_element(mref, rj)
133 end)
134 end
135
136 def worker({:send, data, transport, retries}) do
137 case transport.publish_one(data) do
138 {:ok, _} ->
139 GenServer.cast(__MODULE__, :inc_delivered)
140 :delivered
141
142 {:error, _reason} ->
143 enqueue(data, transport, retries)
144 :retry
145 end
146 end
147
148 def handle_call(:get_stats, _from, %{delivered: delivery_count, dropped: drop_count} = state) do
149 {:reply, %{delivered: delivery_count, dropped: drop_count}, state}
150 end
151
152 def handle_call(:reset_stats, _from, %{delivered: delivery_count, dropped: drop_count} = state) do
153 {:reply, %{delivered: delivery_count, dropped: drop_count},
154 %{state | delivered: 0, dropped: 0}}
155 end
156
157 def handle_cast(:reset_stats, state) do
158 {:noreply, %{state | delivered: 0, dropped: 0}}
159 end
160
161 def handle_cast(
162 {:maybe_enqueue, data, transport, retries},
163 %{dropped: drop_count, queue_table: queue_table, running_jobs: running_jobs} = state
164 ) do
165 case get_retry_params(retries) do
166 {:retry, timeout} ->
167 :ets.insert(queue_table, {timeout, {:send, data, transport, retries}})
168 running_jobs = maybe_start_job(running_jobs, queue_table)
169 {:noreply, %{state | running_jobs: running_jobs}}
170
171 {:drop, message} ->
172 Logger.debug(message)
173 {:noreply, %{state | dropped: drop_count + 1}}
174 end
175 end
176
177 def handle_cast(:kickoff_timer, state) do
178 retry_interval = get_retry_timer_interval()
179 Process.send_after(__MODULE__, :retry_timer_run, retry_interval)
180 {:noreply, state}
181 end
182
183 def handle_cast(:inc_delivered, %{delivered: delivery_count} = state) do
184 {:noreply, %{state | delivered: delivery_count + 1}}
185 end
186
187 def handle_cast(:inc_dropped, %{dropped: drop_count} = state) do
188 {:noreply, %{state | dropped: drop_count + 1}}
189 end
190
191 def handle_info({:send, data, transport, retries}, %{delivered: delivery_count} = state) do
192 case transport.publish_one(data) do
193 {:ok, _} ->
194 {:noreply, %{state | delivered: delivery_count + 1}}
195
196 {:error, _reason} ->
197 enqueue(data, transport, retries)
198 {:noreply, state}
199 end
200 end
201
202 def handle_info(
203 :retry_timer_run,
204 %{queue_table: queue_table, running_jobs: running_jobs} = state
205 ) do
206 maybe_kickoff_timer()
207 running_jobs = maybe_start_job(running_jobs, queue_table)
208 {:noreply, %{state | running_jobs: running_jobs}}
209 end
210
211 def handle_info({:DOWN, ref, :process, _pid, _reason}, state) do
212 %{running_jobs: running_jobs, queue_table: queue_table} = state
213 running_jobs = :sets.del_element(ref, running_jobs)
214 running_jobs = maybe_start_job(running_jobs, queue_table)
215 {:noreply, %{state | running_jobs: running_jobs}}
216 end
217
218 def handle_info(unknown, state) do
219 Logger.debug("RetryQueue: don't know what to do with #{inspect(unknown)}, ignoring")
220 {:noreply, state}
221 end
222
223 if Mix.env() == :test do
224 defp growth_function(_retries) do
225 _shutit = Pleroma.Config.get([__MODULE__, :initial_timeout])
226 DateTime.to_unix(DateTime.utc_now()) - 1
227 end
228 else
229 defp growth_function(retries) do
230 round(Pleroma.Config.get([__MODULE__, :initial_timeout]) * :math.pow(retries, 3)) +
231 DateTime.to_unix(DateTime.utc_now())
232 end
233 end
234
235 defp maybe_kickoff_timer() do
236 GenServer.cast(__MODULE__, :kickoff_timer)
237 end
238 end