Merge branch 'fix/hackney-global-options' into 'develop'
[akkoma] / lib / pleroma / reverse_proxy / reverse_proxy.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.ReverseProxy do
6 alias Pleroma.HTTP
7
8 @keep_req_headers ~w(accept user-agent accept-encoding cache-control if-modified-since) ++
9 ~w(if-unmodified-since if-none-match if-range range)
10 @resp_cache_headers ~w(etag date last-modified cache-control)
11 @keep_resp_headers @resp_cache_headers ++
12 ~w(content-type content-disposition content-encoding content-range) ++
13 ~w(accept-ranges vary)
14 @default_cache_control_header "public, max-age=1209600"
15 @valid_resp_codes [200, 206, 304]
16 @max_read_duration :timer.seconds(30)
17 @max_body_length :infinity
18 @methods ~w(GET HEAD)
19
20 @moduledoc """
21 A reverse proxy.
22
23 Pleroma.ReverseProxy.call(conn, url, options)
24
25 It is not meant to be added into a plug pipeline, but to be called from another plug or controller.
26
27 Supports `#{inspect(@methods)}` HTTP methods, and only allows `#{inspect(@valid_resp_codes)}` status codes.
28
29 Responses are chunked to the client while downloading from the upstream.
30
31 Some request / responses headers are preserved:
32
33 * request: `#{inspect(@keep_req_headers)}`
34 * response: `#{inspect(@keep_resp_headers)}`
35
36 If no caching headers (`#{inspect(@resp_cache_headers)}`) are returned by upstream, `cache-control` will be
37 set to `#{inspect(@default_cache_control_header)}`.
38
39 Options:
40
41 * `redirect_on_failure` (default `false`). Redirects the client to the real remote URL if there's any HTTP
42 errors. Any error during body processing will not be redirected as the response is chunked. This may expose
43 remote URL, clients IPs, ….
44
45 * `max_body_length` (default `#{inspect(@max_body_length)}`): limits the content length to be approximately the
46 specified length. It is validated with the `content-length` header and also verified when proxying.
47
48 * `max_read_duration` (default `#{inspect(@max_read_duration)}` ms): the total time the connection is allowed to
49 read from the remote upstream.
50
51 * `inline_content_types`:
52 * `true` will not alter `content-disposition` (up to the upstream),
53 * `false` will add `content-disposition: attachment` to any request,
54 * a list of whitelisted content types
55
56 * `keep_user_agent` will forward the client's user-agent to the upstream. This may be useful if the upstream is
57 doing content transformation (encoding, …) depending on the request.
58
59 * `req_headers`, `resp_headers` additional headers.
60
61 * `http`: options for [hackney](https://github.com/benoitc/hackney).
62
63 """
64 @default_hackney_options [pool: :media]
65
66 @inline_content_types [
67 "image/gif",
68 "image/jpeg",
69 "image/jpg",
70 "image/png",
71 "image/svg+xml",
72 "audio/mpeg",
73 "audio/mp3",
74 "video/webm",
75 "video/mp4",
76 "video/quicktime"
77 ]
78
79 require Logger
80 import Plug.Conn
81
82 @type option() ::
83 {:keep_user_agent, boolean}
84 | {:max_read_duration, :timer.time() | :infinity}
85 | {:max_body_length, non_neg_integer() | :infinity}
86 | {:http, []}
87 | {:req_headers, [{String.t(), String.t()}]}
88 | {:resp_headers, [{String.t(), String.t()}]}
89 | {:inline_content_types, boolean() | [String.t()]}
90 | {:redirect_on_failure, boolean()}
91
92 @spec call(Plug.Conn.t(), url :: String.t(), [option()]) :: Plug.Conn.t()
93 def call(_conn, _url, _opts \\ [])
94
95 def call(conn = %{method: method}, url, opts) when method in @methods do
96 hackney_opts =
97 Pleroma.HTTP.Connection.hackney_options([])
98 |> Keyword.merge(@default_hackney_options)
99 |> Keyword.merge(Keyword.get(opts, :http, []))
100 |> HTTP.process_request_options()
101
102 req_headers = build_req_headers(conn.req_headers, opts)
103
104 opts =
105 if filename = Pleroma.Web.MediaProxy.filename(url) do
106 Keyword.put_new(opts, :attachment_name, filename)
107 else
108 opts
109 end
110
111 with {:ok, code, headers, client} <- request(method, url, req_headers, hackney_opts),
112 :ok <- header_length_constraint(headers, Keyword.get(opts, :max_body_length)) do
113 response(conn, client, url, code, headers, opts)
114 else
115 {:ok, code, headers} ->
116 head_response(conn, url, code, headers, opts)
117 |> halt()
118
119 {:error, {:invalid_http_response, code}} ->
120 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed with HTTP status #{code}")
121
122 conn
123 |> error_or_redirect(
124 url,
125 code,
126 "Request failed: " <> Plug.Conn.Status.reason_phrase(code),
127 opts
128 )
129 |> halt()
130
131 {:error, error} ->
132 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed: #{inspect(error)}")
133
134 conn
135 |> error_or_redirect(url, 500, "Request failed", opts)
136 |> halt()
137 end
138 end
139
140 def call(conn, _, _) do
141 conn
142 |> send_resp(400, Plug.Conn.Status.reason_phrase(400))
143 |> halt()
144 end
145
146 defp request(method, url, headers, hackney_opts) do
147 Logger.debug("#{__MODULE__} #{method} #{url} #{inspect(headers)}")
148 method = method |> String.downcase() |> String.to_existing_atom()
149
150 case client().request(method, url, headers, "", hackney_opts) do
151 {:ok, code, headers, client} when code in @valid_resp_codes ->
152 {:ok, code, downcase_headers(headers), client}
153
154 {:ok, code, headers} when code in @valid_resp_codes ->
155 {:ok, code, downcase_headers(headers)}
156
157 {:ok, code, _, _} ->
158 {:error, {:invalid_http_response, code}}
159
160 {:error, error} ->
161 {:error, error}
162 end
163 end
164
165 defp response(conn, client, url, status, headers, opts) do
166 result =
167 conn
168 |> put_resp_headers(build_resp_headers(headers, opts))
169 |> send_chunked(status)
170 |> chunk_reply(client, opts)
171
172 case result do
173 {:ok, conn} ->
174 halt(conn)
175
176 {:error, :closed, conn} ->
177 client().close(client)
178 halt(conn)
179
180 {:error, error, conn} ->
181 Logger.warn(
182 "#{__MODULE__} request to #{url} failed while reading/chunking: #{inspect(error)}"
183 )
184
185 client().close(client)
186 halt(conn)
187 end
188 end
189
190 defp chunk_reply(conn, client, opts) do
191 chunk_reply(conn, client, opts, 0, 0)
192 end
193
194 defp chunk_reply(conn, client, opts, sent_so_far, duration) do
195 with {:ok, duration} <-
196 check_read_duration(
197 duration,
198 Keyword.get(opts, :max_read_duration, @max_read_duration)
199 ),
200 {:ok, data} <- client().stream_body(client),
201 {:ok, duration} <- increase_read_duration(duration),
202 sent_so_far = sent_so_far + byte_size(data),
203 :ok <- body_size_constraint(sent_so_far, Keyword.get(opts, :max_body_size)),
204 {:ok, conn} <- chunk(conn, data) do
205 chunk_reply(conn, client, opts, sent_so_far, duration)
206 else
207 :done -> {:ok, conn}
208 {:error, error} -> {:error, error, conn}
209 end
210 end
211
212 defp head_response(conn, _url, code, headers, opts) do
213 conn
214 |> put_resp_headers(build_resp_headers(headers, opts))
215 |> send_resp(code, "")
216 end
217
218 defp error_or_redirect(conn, url, code, body, opts) do
219 if Keyword.get(opts, :redirect_on_failure, false) do
220 conn
221 |> Phoenix.Controller.redirect(external: url)
222 |> halt()
223 else
224 conn
225 |> send_resp(code, body)
226 |> halt
227 end
228 end
229
230 defp downcase_headers(headers) do
231 Enum.map(headers, fn {k, v} ->
232 {String.downcase(k), v}
233 end)
234 end
235
236 defp get_content_type(headers) do
237 {_, content_type} =
238 List.keyfind(headers, "content-type", 0, {"content-type", "application/octet-stream"})
239
240 [content_type | _] = String.split(content_type, ";")
241 content_type
242 end
243
244 defp put_resp_headers(conn, headers) do
245 Enum.reduce(headers, conn, fn {k, v}, conn ->
246 put_resp_header(conn, k, v)
247 end)
248 end
249
250 defp build_req_headers(headers, opts) do
251 headers
252 |> downcase_headers()
253 |> Enum.filter(fn {k, _} -> k in @keep_req_headers end)
254 |> (fn headers ->
255 headers = headers ++ Keyword.get(opts, :req_headers, [])
256
257 if Keyword.get(opts, :keep_user_agent, false) do
258 List.keystore(
259 headers,
260 "user-agent",
261 0,
262 {"user-agent", Pleroma.Application.user_agent()}
263 )
264 else
265 headers
266 end
267 end).()
268 end
269
270 defp build_resp_headers(headers, opts) do
271 headers
272 |> Enum.filter(fn {k, _} -> k in @keep_resp_headers end)
273 |> build_resp_cache_headers(opts)
274 |> build_resp_content_disposition_header(opts)
275 |> (fn headers -> headers ++ Keyword.get(opts, :resp_headers, []) end).()
276 end
277
278 defp build_resp_cache_headers(headers, _opts) do
279 has_cache? = Enum.any?(headers, fn {k, _} -> k in @resp_cache_headers end)
280 has_cache_control? = List.keymember?(headers, "cache-control", 0)
281
282 cond do
283 has_cache? && has_cache_control? ->
284 headers
285
286 has_cache? ->
287 # There's caching header present but no cache-control -- we need to explicitely override it
288 # to public as Plug defaults to "max-age=0, private, must-revalidate"
289 List.keystore(headers, "cache-control", 0, {"cache-control", "public"})
290
291 true ->
292 List.keystore(
293 headers,
294 "cache-control",
295 0,
296 {"cache-control", @default_cache_control_header}
297 )
298 end
299 end
300
301 defp build_resp_content_disposition_header(headers, opts) do
302 opt = Keyword.get(opts, :inline_content_types, @inline_content_types)
303
304 content_type = get_content_type(headers)
305
306 attachment? =
307 cond do
308 is_list(opt) && !Enum.member?(opt, content_type) -> true
309 opt == false -> true
310 true -> false
311 end
312
313 if attachment? do
314 name =
315 try do
316 {{"content-disposition", content_disposition_string}, _} =
317 List.keytake(headers, "content-disposition", 0)
318
319 [name | _] =
320 Regex.run(
321 ~r/filename="((?:[^"\\]|\\.)*)"/u,
322 content_disposition_string || "",
323 capture: :all_but_first
324 )
325
326 name
327 rescue
328 MatchError -> Keyword.get(opts, :attachment_name, "attachment")
329 end
330
331 disposition = "attachment; filename=\"#{name}\""
332
333 List.keystore(headers, "content-disposition", 0, {"content-disposition", disposition})
334 else
335 headers
336 end
337 end
338
339 defp header_length_constraint(headers, limit) when is_integer(limit) and limit > 0 do
340 with {_, size} <- List.keyfind(headers, "content-length", 0),
341 {size, _} <- Integer.parse(size),
342 true <- size <= limit do
343 :ok
344 else
345 false ->
346 {:error, :body_too_large}
347
348 _ ->
349 :ok
350 end
351 end
352
353 defp header_length_constraint(_, _), do: :ok
354
355 defp body_size_constraint(size, limit) when is_integer(limit) and limit > 0 and size >= limit do
356 {:error, :body_too_large}
357 end
358
359 defp body_size_constraint(_, _), do: :ok
360
361 defp check_read_duration(duration, max)
362 when is_integer(duration) and is_integer(max) and max > 0 do
363 if duration > max do
364 {:error, :read_duration_exceeded}
365 else
366 {:ok, {duration, :erlang.system_time(:millisecond)}}
367 end
368 end
369
370 defp check_read_duration(_, _), do: {:ok, :no_duration_limit, :no_duration_limit}
371
372 defp increase_read_duration({previous_duration, started})
373 when is_integer(previous_duration) and is_integer(started) do
374 duration = :erlang.system_time(:millisecond) - started
375 {:ok, previous_duration + duration}
376 end
377
378 defp increase_read_duration(_) do
379 {:ok, :no_duration_limit, :no_duration_limit}
380 end
381
382 defp client, do: Pleroma.ReverseProxy.Client
383 end