Merge branch 'maintenance/copyright-2019' into 'develop'
[akkoma] / lib / pleroma / web / federator / retry_queue.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.Web.Federator.RetryQueue do
6 use GenServer
7
8 require Logger
9
10 # seconds
11 @initial_timeout 30
12 @max_retries 5
13
14 @max_jobs 20
15
16 def init(args) do
17 queue_table = :ets.new(:pleroma_retry_queue, [:bag, :protected])
18
19 {:ok, %{args | queue_table: queue_table, running_jobs: :sets.new()}}
20 end
21
22 def start_link() do
23 enabled =
24 if Mix.env() == :test, do: true, else: Pleroma.Config.get([:retry_queue, :enabled], false)
25
26 if enabled do
27 Logger.info("Starting retry queue")
28
29 linkres =
30 GenServer.start_link(
31 __MODULE__,
32 %{delivered: 0, dropped: 0, queue_table: nil, running_jobs: nil},
33 name: __MODULE__
34 )
35
36 maybe_kickoff_timer()
37 linkres
38 else
39 Logger.info("Retry queue disabled")
40 :ignore
41 end
42 end
43
44 def enqueue(data, transport, retries \\ 0) do
45 GenServer.cast(__MODULE__, {:maybe_enqueue, data, transport, retries + 1})
46 end
47
48 def get_stats() do
49 GenServer.call(__MODULE__, :get_stats)
50 end
51
52 def reset_stats() do
53 GenServer.call(__MODULE__, :reset_stats)
54 end
55
56 def get_retry_params(retries) do
57 if retries > @max_retries do
58 {:drop, "Max retries reached"}
59 else
60 {:retry, growth_function(retries)}
61 end
62 end
63
64 def get_retry_timer_interval() do
65 Pleroma.Config.get([:retry_queue, :interval], 1000)
66 end
67
68 defp ets_count_expires(table, current_time) do
69 :ets.select_count(
70 table,
71 [
72 {
73 {:"$1", :"$2"},
74 [{:"=<", :"$1", {:const, current_time}}],
75 [true]
76 }
77 ]
78 )
79 end
80
81 defp ets_pop_n_expired(table, current_time, desired) do
82 {popped, _continuation} =
83 :ets.select(
84 table,
85 [
86 {
87 {:"$1", :"$2"},
88 [{:"=<", :"$1", {:const, current_time}}],
89 [:"$_"]
90 }
91 ],
92 desired
93 )
94
95 popped
96 |> List.foldl(true, fn e, acc ->
97 :ets.delete_object(table, e)
98 acc
99 end)
100
101 popped
102 end
103
104 def maybe_start_job(running_jobs, queue_table) do
105 # we don't want to hit the ets or the DateTime more times than we have to
106 # could optimize slightly further by not using the count, and instead grabbing
107 # up to N objects early...
108 current_time = DateTime.to_unix(DateTime.utc_now())
109 n_running_jobs = :sets.size(running_jobs)
110
111 if n_running_jobs < @max_jobs do
112 n_ready_jobs = ets_count_expires(queue_table, current_time)
113
114 if n_ready_jobs > 0 do
115 # figure out how many we could start
116 available_job_slots = @max_jobs - n_running_jobs
117 start_n_jobs(running_jobs, queue_table, current_time, available_job_slots)
118 else
119 running_jobs
120 end
121 else
122 running_jobs
123 end
124 end
125
126 defp start_n_jobs(running_jobs, _queue_table, _current_time, 0) do
127 running_jobs
128 end
129
130 defp start_n_jobs(running_jobs, queue_table, current_time, available_job_slots)
131 when available_job_slots > 0 do
132 candidates = ets_pop_n_expired(queue_table, current_time, available_job_slots)
133
134 candidates
135 |> List.foldl(running_jobs, fn {_, e}, rj ->
136 {:ok, pid} = Task.start(fn -> worker(e) end)
137 mref = Process.monitor(pid)
138 :sets.add_element(mref, rj)
139 end)
140 end
141
142 def worker({:send, data, transport, retries}) do
143 case transport.publish_one(data) do
144 {:ok, _} ->
145 GenServer.cast(__MODULE__, :inc_delivered)
146 :delivered
147
148 {:error, _reason} ->
149 enqueue(data, transport, retries)
150 :retry
151 end
152 end
153
154 def handle_call(:get_stats, _from, %{delivered: delivery_count, dropped: drop_count} = state) do
155 {:reply, %{delivered: delivery_count, dropped: drop_count}, state}
156 end
157
158 def handle_call(:reset_stats, _from, %{delivered: delivery_count, dropped: drop_count} = state) do
159 {:reply, %{delivered: delivery_count, dropped: drop_count},
160 %{state | delivered: 0, dropped: 0}}
161 end
162
163 def handle_cast(:reset_stats, state) do
164 {:noreply, %{state | delivered: 0, dropped: 0}}
165 end
166
167 def handle_cast(
168 {:maybe_enqueue, data, transport, retries},
169 %{dropped: drop_count, queue_table: queue_table, running_jobs: running_jobs} = state
170 ) do
171 case get_retry_params(retries) do
172 {:retry, timeout} ->
173 :ets.insert(queue_table, {timeout, {:send, data, transport, retries}})
174 running_jobs = maybe_start_job(running_jobs, queue_table)
175 {:noreply, %{state | running_jobs: running_jobs}}
176
177 {:drop, message} ->
178 Logger.debug(message)
179 {:noreply, %{state | dropped: drop_count + 1}}
180 end
181 end
182
183 def handle_cast(:kickoff_timer, state) do
184 retry_interval = get_retry_timer_interval()
185 Process.send_after(__MODULE__, :retry_timer_run, retry_interval)
186 {:noreply, state}
187 end
188
189 def handle_cast(:inc_delivered, %{delivered: delivery_count} = state) do
190 {:noreply, %{state | delivered: delivery_count + 1}}
191 end
192
193 def handle_cast(:inc_dropped, %{dropped: drop_count} = state) do
194 {:noreply, %{state | dropped: drop_count + 1}}
195 end
196
197 def handle_info({:send, data, transport, retries}, %{delivered: delivery_count} = state) do
198 case transport.publish_one(data) do
199 {:ok, _} ->
200 {:noreply, %{state | delivered: delivery_count + 1}}
201
202 {:error, _reason} ->
203 enqueue(data, transport, retries)
204 {:noreply, state}
205 end
206 end
207
208 def handle_info(
209 :retry_timer_run,
210 %{queue_table: queue_table, running_jobs: running_jobs} = state
211 ) do
212 maybe_kickoff_timer()
213 running_jobs = maybe_start_job(running_jobs, queue_table)
214 {:noreply, %{state | running_jobs: running_jobs}}
215 end
216
217 def handle_info({:DOWN, ref, :process, _pid, _reason}, state) do
218 %{running_jobs: running_jobs, queue_table: queue_table} = state
219 running_jobs = :sets.del_element(ref, running_jobs)
220 running_jobs = maybe_start_job(running_jobs, queue_table)
221 {:noreply, %{state | running_jobs: running_jobs}}
222 end
223
224 def handle_info(unknown, state) do
225 Logger.debug("RetryQueue: don't know what to do with #{inspect(unknown)}, ignoring")
226 {:noreply, state}
227 end
228
229 if Mix.env() == :test do
230 defp growth_function(_retries) do
231 _shutit = @initial_timeout
232 DateTime.to_unix(DateTime.utc_now()) - 1
233 end
234 else
235 defp growth_function(retries) do
236 round(@initial_timeout * :math.pow(retries, 3)) + DateTime.to_unix(DateTime.utc_now())
237 end
238 end
239
240 defp maybe_kickoff_timer() do
241 GenServer.cast(__MODULE__, :kickoff_timer)
242 end
243 end