Merge branch 'develop' into 'remove-twitter-api'
[akkoma] / lib / pleroma / reverse_proxy / reverse_proxy.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.ReverseProxy do
6 @keep_req_headers ~w(accept user-agent accept-encoding cache-control if-modified-since) ++
7 ~w(if-unmodified-since if-none-match if-range range)
8 @resp_cache_headers ~w(etag date last-modified)
9 @keep_resp_headers @resp_cache_headers ++
10 ~w(content-type content-disposition content-encoding content-range) ++
11 ~w(accept-ranges vary)
12 @default_cache_control_header "public, max-age=1209600"
13 @valid_resp_codes [200, 206, 304]
14 @max_read_duration :timer.seconds(30)
15 @max_body_length :infinity
16 @failed_request_ttl :timer.seconds(60)
17 @methods ~w(GET HEAD)
18
19 @moduledoc """
20 A reverse proxy.
21
22 Pleroma.ReverseProxy.call(conn, url, options)
23
24 It is not meant to be added into a plug pipeline, but to be called from another plug or controller.
25
26 Supports `#{inspect(@methods)}` HTTP methods, and only allows `#{inspect(@valid_resp_codes)}` status codes.
27
28 Responses are chunked to the client while downloading from the upstream.
29
30 Some request / responses headers are preserved:
31
32 * request: `#{inspect(@keep_req_headers)}`
33 * response: `#{inspect(@keep_resp_headers)}`
34
35 Options:
36
37 * `redirect_on_failure` (default `false`). Redirects the client to the real remote URL if there's any HTTP
38 errors. Any error during body processing will not be redirected as the response is chunked. This may expose
39 remote URL, clients IPs, ….
40
41 * `max_body_length` (default `#{inspect(@max_body_length)}`): limits the content length to be approximately the
42 specified length. It is validated with the `content-length` header and also verified when proxying.
43
44 * `max_read_duration` (default `#{inspect(@max_read_duration)}` ms): the total time the connection is allowed to
45 read from the remote upstream.
46
47 * `failed_request_ttl` (default `#{inspect(@failed_request_ttl)}` ms): the time the failed request is cached and cannot be retried.
48
49 * `inline_content_types`:
50 * `true` will not alter `content-disposition` (up to the upstream),
51 * `false` will add `content-disposition: attachment` to any request,
52 * a list of whitelisted content types
53
54 * `keep_user_agent` will forward the client's user-agent to the upstream. This may be useful if the upstream is
55 doing content transformation (encoding, …) depending on the request.
56
57 * `req_headers`, `resp_headers` additional headers.
58
59 * `http`: options for [hackney](https://github.com/benoitc/hackney) or [gun](https://github.com/ninenines/gun).
60
61 """
62 @default_options [pool: :media]
63
64 @inline_content_types [
65 "image/gif",
66 "image/jpeg",
67 "image/jpg",
68 "image/png",
69 "image/svg+xml",
70 "audio/mpeg",
71 "audio/mp3",
72 "video/webm",
73 "video/mp4",
74 "video/quicktime"
75 ]
76
77 require Logger
78 import Plug.Conn
79
80 @type option() ::
81 {:keep_user_agent, boolean}
82 | {:max_read_duration, :timer.time() | :infinity}
83 | {:max_body_length, non_neg_integer() | :infinity}
84 | {:failed_request_ttl, :timer.time() | :infinity}
85 | {:http, []}
86 | {:req_headers, [{String.t(), String.t()}]}
87 | {:resp_headers, [{String.t(), String.t()}]}
88 | {:inline_content_types, boolean() | [String.t()]}
89 | {:redirect_on_failure, boolean()}
90
91 @spec call(Plug.Conn.t(), url :: String.t(), [option()]) :: Plug.Conn.t()
92 def call(_conn, _url, _opts \\ [])
93
94 def call(conn = %{method: method}, url, opts) when method in @methods do
95 client_opts = Keyword.merge(@default_options, Keyword.get(opts, :http, []))
96
97 req_headers = build_req_headers(conn.req_headers, opts)
98
99 opts =
100 if filename = Pleroma.Web.MediaProxy.filename(url) do
101 Keyword.put_new(opts, :attachment_name, filename)
102 else
103 opts
104 end
105
106 with {:ok, nil} <- Cachex.get(:failed_proxy_url_cache, url),
107 {:ok, code, headers, client} <- request(method, url, req_headers, client_opts),
108 :ok <-
109 header_length_constraint(
110 headers,
111 Keyword.get(opts, :max_body_length, @max_body_length)
112 ) do
113 response(conn, client, url, code, headers, opts)
114 else
115 {:ok, true} ->
116 conn
117 |> error_or_redirect(url, 500, "Request failed", opts)
118 |> halt()
119
120 {:ok, code, headers} ->
121 head_response(conn, url, code, headers, opts)
122 |> halt()
123
124 {:error, {:invalid_http_response, code}} ->
125 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed with HTTP status #{code}")
126 track_failed_url(url, code, opts)
127
128 conn
129 |> error_or_redirect(
130 url,
131 code,
132 "Request failed: " <> Plug.Conn.Status.reason_phrase(code),
133 opts
134 )
135 |> halt()
136
137 {:error, error} ->
138 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed: #{inspect(error)}")
139 track_failed_url(url, error, opts)
140
141 conn
142 |> error_or_redirect(url, 500, "Request failed", opts)
143 |> halt()
144 end
145 end
146
147 def call(conn, _, _) do
148 conn
149 |> send_resp(400, Plug.Conn.Status.reason_phrase(400))
150 |> halt()
151 end
152
153 defp request(method, url, headers, opts) do
154 Logger.debug("#{__MODULE__} #{method} #{url} #{inspect(headers)}")
155 method = method |> String.downcase() |> String.to_existing_atom()
156
157 case client().request(method, url, headers, "", opts) do
158 {:ok, code, headers, client} when code in @valid_resp_codes ->
159 {:ok, code, downcase_headers(headers), client}
160
161 {:ok, code, headers} when code in @valid_resp_codes ->
162 {:ok, code, downcase_headers(headers)}
163
164 {:ok, code, _, _} ->
165 {:error, {:invalid_http_response, code}}
166
167 {:error, error} ->
168 {:error, error}
169 end
170 end
171
172 defp response(conn, client, url, status, headers, opts) do
173 result =
174 conn
175 |> put_resp_headers(build_resp_headers(headers, opts))
176 |> send_chunked(status)
177 |> chunk_reply(client, opts)
178
179 case result do
180 {:ok, conn} ->
181 halt(conn)
182
183 {:error, :closed, conn} ->
184 client().close(client)
185 halt(conn)
186
187 {:error, error, conn} ->
188 Logger.warn(
189 "#{__MODULE__} request to #{url} failed while reading/chunking: #{inspect(error)}"
190 )
191
192 client().close(client)
193 halt(conn)
194 end
195 end
196
197 defp chunk_reply(conn, client, opts) do
198 chunk_reply(conn, client, opts, 0, 0)
199 end
200
201 defp chunk_reply(conn, client, opts, sent_so_far, duration) do
202 with {:ok, duration} <-
203 check_read_duration(
204 duration,
205 Keyword.get(opts, :max_read_duration, @max_read_duration)
206 ),
207 {:ok, data, client} <- client().stream_body(client),
208 {:ok, duration} <- increase_read_duration(duration),
209 sent_so_far = sent_so_far + byte_size(data),
210 :ok <-
211 body_size_constraint(
212 sent_so_far,
213 Keyword.get(opts, :max_body_length, @max_body_length)
214 ),
215 {:ok, conn} <- chunk(conn, data) do
216 chunk_reply(conn, client, opts, sent_so_far, duration)
217 else
218 :done -> {:ok, conn}
219 {:error, error} -> {:error, error, conn}
220 end
221 end
222
223 defp head_response(conn, _url, code, headers, opts) do
224 conn
225 |> put_resp_headers(build_resp_headers(headers, opts))
226 |> send_resp(code, "")
227 end
228
229 defp error_or_redirect(conn, url, code, body, opts) do
230 if Keyword.get(opts, :redirect_on_failure, false) do
231 conn
232 |> Phoenix.Controller.redirect(external: url)
233 |> halt()
234 else
235 conn
236 |> send_resp(code, body)
237 |> halt
238 end
239 end
240
241 defp downcase_headers(headers) do
242 Enum.map(headers, fn {k, v} ->
243 {String.downcase(k), v}
244 end)
245 end
246
247 defp get_content_type(headers) do
248 {_, content_type} =
249 List.keyfind(headers, "content-type", 0, {"content-type", "application/octet-stream"})
250
251 [content_type | _] = String.split(content_type, ";")
252 content_type
253 end
254
255 defp put_resp_headers(conn, headers) do
256 Enum.reduce(headers, conn, fn {k, v}, conn ->
257 put_resp_header(conn, k, v)
258 end)
259 end
260
261 defp build_req_headers(headers, opts) do
262 headers
263 |> downcase_headers()
264 |> Enum.filter(fn {k, _} -> k in @keep_req_headers end)
265 |> (fn headers ->
266 headers = headers ++ Keyword.get(opts, :req_headers, [])
267
268 if Keyword.get(opts, :keep_user_agent, false) do
269 List.keystore(
270 headers,
271 "user-agent",
272 0,
273 {"user-agent", Pleroma.Application.user_agent()}
274 )
275 else
276 headers
277 end
278 end).()
279 end
280
281 defp build_resp_headers(headers, opts) do
282 headers
283 |> Enum.filter(fn {k, _} -> k in @keep_resp_headers end)
284 |> build_resp_cache_headers(opts)
285 |> build_resp_content_disposition_header(opts)
286 |> (fn headers -> headers ++ Keyword.get(opts, :resp_headers, []) end).()
287 end
288
289 defp build_resp_cache_headers(headers, _opts) do
290 has_cache? = Enum.any?(headers, fn {k, _} -> k in @resp_cache_headers end)
291
292 cond do
293 has_cache? ->
294 # There's caching header present but no cache-control -- we need to set our own
295 # as Plug defaults to "max-age=0, private, must-revalidate"
296 List.keystore(
297 headers,
298 "cache-control",
299 0,
300 {"cache-control", @default_cache_control_header}
301 )
302
303 true ->
304 List.keystore(
305 headers,
306 "cache-control",
307 0,
308 {"cache-control", @default_cache_control_header}
309 )
310 end
311 end
312
313 defp build_resp_content_disposition_header(headers, opts) do
314 opt = Keyword.get(opts, :inline_content_types, @inline_content_types)
315
316 content_type = get_content_type(headers)
317
318 attachment? =
319 cond do
320 is_list(opt) && !Enum.member?(opt, content_type) -> true
321 opt == false -> true
322 true -> false
323 end
324
325 if attachment? do
326 name =
327 try do
328 {{"content-disposition", content_disposition_string}, _} =
329 List.keytake(headers, "content-disposition", 0)
330
331 [name | _] =
332 Regex.run(
333 ~r/filename="((?:[^"\\]|\\.)*)"/u,
334 content_disposition_string || "",
335 capture: :all_but_first
336 )
337
338 name
339 rescue
340 MatchError -> Keyword.get(opts, :attachment_name, "attachment")
341 end
342
343 disposition = "attachment; filename=\"#{name}\""
344
345 List.keystore(headers, "content-disposition", 0, {"content-disposition", disposition})
346 else
347 headers
348 end
349 end
350
351 defp header_length_constraint(headers, limit) when is_integer(limit) and limit > 0 do
352 with {_, size} <- List.keyfind(headers, "content-length", 0),
353 {size, _} <- Integer.parse(size),
354 true <- size <= limit do
355 :ok
356 else
357 false ->
358 {:error, :body_too_large}
359
360 _ ->
361 :ok
362 end
363 end
364
365 defp header_length_constraint(_, _), do: :ok
366
367 defp body_size_constraint(size, limit) when is_integer(limit) and limit > 0 and size >= limit do
368 {:error, :body_too_large}
369 end
370
371 defp body_size_constraint(_, _), do: :ok
372
373 defp check_read_duration(duration, max)
374 when is_integer(duration) and is_integer(max) and max > 0 do
375 if duration > max do
376 {:error, :read_duration_exceeded}
377 else
378 {:ok, {duration, :erlang.system_time(:millisecond)}}
379 end
380 end
381
382 defp check_read_duration(_, _), do: {:ok, :no_duration_limit, :no_duration_limit}
383
384 defp increase_read_duration({previous_duration, started})
385 when is_integer(previous_duration) and is_integer(started) do
386 duration = :erlang.system_time(:millisecond) - started
387 {:ok, previous_duration + duration}
388 end
389
390 defp increase_read_duration(_) do
391 {:ok, :no_duration_limit, :no_duration_limit}
392 end
393
394 defp client, do: Pleroma.ReverseProxy.Client
395
396 defp track_failed_url(url, error, opts) do
397 ttl =
398 unless error in [:body_too_large, 400, 204] do
399 Keyword.get(opts, :failed_request_ttl, @failed_request_ttl)
400 else
401 nil
402 end
403
404 Cachex.put(:failed_proxy_url_cache, url, true, ttl: ttl)
405 end
406 end