Merge remote-tracking branch 'upstream/develop' into admin-create-users
[akkoma] / lib / pleroma / reverse_proxy.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.ReverseProxy do
6 alias Pleroma.HTTP
7
8 @keep_req_headers ~w(accept user-agent accept-encoding cache-control if-modified-since) ++
9 ~w(if-unmodified-since if-none-match if-range range)
10 @resp_cache_headers ~w(etag date last-modified cache-control)
11 @keep_resp_headers @resp_cache_headers ++
12 ~w(content-type content-disposition content-encoding content-range) ++
13 ~w(accept-ranges vary)
14 @default_cache_control_header "public, max-age=1209600"
15 @valid_resp_codes [200, 206, 304]
16 @max_read_duration :timer.seconds(30)
17 @max_body_length :infinity
18 @methods ~w(GET HEAD)
19
20 @moduledoc """
21 A reverse proxy.
22
23 Pleroma.ReverseProxy.call(conn, url, options)
24
25 It is not meant to be added into a plug pipeline, but to be called from another plug or controller.
26
27 Supports `#{inspect(@methods)}` HTTP methods, and only allows `#{inspect(@valid_resp_codes)}` status codes.
28
29 Responses are chunked to the client while downloading from the upstream.
30
31 Some request / responses headers are preserved:
32
33 * request: `#{inspect(@keep_req_headers)}`
34 * response: `#{inspect(@keep_resp_headers)}`
35
36 If no caching headers (`#{inspect(@resp_cache_headers)}`) are returned by upstream, `cache-control` will be
37 set to `#{inspect(@default_cache_control_header)}`.
38
39 Options:
40
41 * `redirect_on_failure` (default `false`). Redirects the client to the real remote URL if there's any HTTP
42 errors. Any error during body processing will not be redirected as the response is chunked. This may expose
43 remote URL, clients IPs, ….
44
45 * `max_body_length` (default `#{inspect(@max_body_length)}`): limits the content length to be approximately the
46 specified length. It is validated with the `content-length` header and also verified when proxying.
47
48 * `max_read_duration` (default `#{inspect(@max_read_duration)}` ms): the total time the connection is allowed to
49 read from the remote upstream.
50
51 * `inline_content_types`:
52 * `true` will not alter `content-disposition` (up to the upstream),
53 * `false` will add `content-disposition: attachment` to any request,
54 * a list of whitelisted content types
55
56 * `keep_user_agent` will forward the client's user-agent to the upstream. This may be useful if the upstream is
57 doing content transformation (encoding, …) depending on the request.
58
59 * `req_headers`, `resp_headers` additional headers.
60
61 * `http`: options for [hackney](https://github.com/benoitc/hackney).
62
63 """
64 @hackney Pleroma.Config.get(:hackney, :hackney)
65
66 @default_hackney_options []
67
68 @inline_content_types [
69 "image/gif",
70 "image/jpeg",
71 "image/jpg",
72 "image/png",
73 "image/svg+xml",
74 "audio/mpeg",
75 "audio/mp3",
76 "video/webm",
77 "video/mp4",
78 "video/quicktime"
79 ]
80
81 require Logger
82 import Plug.Conn
83
84 @type option() ::
85 {:keep_user_agent, boolean}
86 | {:max_read_duration, :timer.time() | :infinity}
87 | {:max_body_length, non_neg_integer() | :infinity}
88 | {:http, []}
89 | {:req_headers, [{String.t(), String.t()}]}
90 | {:resp_headers, [{String.t(), String.t()}]}
91 | {:inline_content_types, boolean() | [String.t()]}
92 | {:redirect_on_failure, boolean()}
93
94 @spec call(Plug.Conn.t(), url :: String.t(), [option()]) :: Plug.Conn.t()
95 def call(_conn, _url, _opts \\ [])
96
97 def call(conn = %{method: method}, url, opts) when method in @methods do
98 hackney_opts =
99 @default_hackney_options
100 |> Keyword.merge(Keyword.get(opts, :http, []))
101 |> HTTP.process_request_options()
102
103 req_headers = build_req_headers(conn.req_headers, opts)
104
105 opts =
106 if filename = Pleroma.Web.MediaProxy.filename(url) do
107 Keyword.put_new(opts, :attachment_name, filename)
108 else
109 opts
110 end
111
112 with {:ok, code, headers, client} <- request(method, url, req_headers, hackney_opts),
113 :ok <- header_length_constraint(headers, Keyword.get(opts, :max_body_length)) do
114 response(conn, client, url, code, headers, opts)
115 else
116 {:ok, code, headers} ->
117 head_response(conn, url, code, headers, opts)
118 |> halt()
119
120 {:error, {:invalid_http_response, code}} ->
121 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed with HTTP status #{code}")
122
123 conn
124 |> error_or_redirect(
125 url,
126 code,
127 "Request failed: " <> Plug.Conn.Status.reason_phrase(code),
128 opts
129 )
130 |> halt()
131
132 {:error, error} ->
133 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed: #{inspect(error)}")
134
135 conn
136 |> error_or_redirect(url, 500, "Request failed", opts)
137 |> halt()
138 end
139 end
140
141 def call(conn, _, _) do
142 conn
143 |> send_resp(400, Plug.Conn.Status.reason_phrase(400))
144 |> halt()
145 end
146
147 defp request(method, url, headers, hackney_opts) do
148 Logger.debug("#{__MODULE__} #{method} #{url} #{inspect(headers)}")
149 method = method |> String.downcase() |> String.to_existing_atom()
150
151 case @hackney.request(method, url, headers, "", hackney_opts) do
152 {:ok, code, headers, client} when code in @valid_resp_codes ->
153 {:ok, code, downcase_headers(headers), client}
154
155 {:ok, code, headers} when code in @valid_resp_codes ->
156 {:ok, code, downcase_headers(headers)}
157
158 {:ok, code, _, _} ->
159 {:error, {:invalid_http_response, code}}
160
161 {:error, error} ->
162 {:error, error}
163 end
164 end
165
166 defp response(conn, client, url, status, headers, opts) do
167 result =
168 conn
169 |> put_resp_headers(build_resp_headers(headers, opts))
170 |> send_chunked(status)
171 |> chunk_reply(client, opts)
172
173 case result do
174 {:ok, conn} ->
175 halt(conn)
176
177 {:error, :closed, conn} ->
178 :hackney.close(client)
179 halt(conn)
180
181 {:error, error, conn} ->
182 Logger.warn(
183 "#{__MODULE__} request to #{url} failed while reading/chunking: #{inspect(error)}"
184 )
185
186 :hackney.close(client)
187 halt(conn)
188 end
189 end
190
191 defp chunk_reply(conn, client, opts) do
192 chunk_reply(conn, client, opts, 0, 0)
193 end
194
195 defp chunk_reply(conn, client, opts, sent_so_far, duration) do
196 with {:ok, duration} <-
197 check_read_duration(
198 duration,
199 Keyword.get(opts, :max_read_duration, @max_read_duration)
200 ),
201 {:ok, data} <- @hackney.stream_body(client),
202 {:ok, duration} <- increase_read_duration(duration),
203 sent_so_far = sent_so_far + byte_size(data),
204 :ok <- body_size_constraint(sent_so_far, Keyword.get(opts, :max_body_size)),
205 {:ok, conn} <- chunk(conn, data) do
206 chunk_reply(conn, client, opts, sent_so_far, duration)
207 else
208 :done -> {:ok, conn}
209 {:error, error} -> {:error, error, conn}
210 end
211 end
212
213 defp head_response(conn, _url, code, headers, opts) do
214 conn
215 |> put_resp_headers(build_resp_headers(headers, opts))
216 |> send_resp(code, "")
217 end
218
219 defp error_or_redirect(conn, url, code, body, opts) do
220 if Keyword.get(opts, :redirect_on_failure, false) do
221 conn
222 |> Phoenix.Controller.redirect(external: url)
223 |> halt()
224 else
225 conn
226 |> send_resp(code, body)
227 |> halt
228 end
229 end
230
231 defp downcase_headers(headers) do
232 Enum.map(headers, fn {k, v} ->
233 {String.downcase(k), v}
234 end)
235 end
236
237 defp get_content_type(headers) do
238 {_, content_type} =
239 List.keyfind(headers, "content-type", 0, {"content-type", "application/octet-stream"})
240
241 [content_type | _] = String.split(content_type, ";")
242 content_type
243 end
244
245 defp put_resp_headers(conn, headers) do
246 Enum.reduce(headers, conn, fn {k, v}, conn ->
247 put_resp_header(conn, k, v)
248 end)
249 end
250
251 defp build_req_headers(headers, opts) do
252 headers
253 |> downcase_headers()
254 |> Enum.filter(fn {k, _} -> k in @keep_req_headers end)
255 |> (fn headers ->
256 headers = headers ++ Keyword.get(opts, :req_headers, [])
257
258 if Keyword.get(opts, :keep_user_agent, false) do
259 List.keystore(
260 headers,
261 "user-agent",
262 0,
263 {"user-agent", Pleroma.Application.user_agent()}
264 )
265 else
266 headers
267 end
268 end).()
269 end
270
271 defp build_resp_headers(headers, opts) do
272 headers
273 |> Enum.filter(fn {k, _} -> k in @keep_resp_headers end)
274 |> build_resp_cache_headers(opts)
275 |> build_resp_content_disposition_header(opts)
276 |> (fn headers -> headers ++ Keyword.get(opts, :resp_headers, []) end).()
277 end
278
279 defp build_resp_cache_headers(headers, _opts) do
280 has_cache? = Enum.any?(headers, fn {k, _} -> k in @resp_cache_headers end)
281 has_cache_control? = List.keymember?(headers, "cache-control", 0)
282
283 cond do
284 has_cache? && has_cache_control? ->
285 headers
286
287 has_cache? ->
288 # There's caching header present but no cache-control -- we need to explicitely override it
289 # to public as Plug defaults to "max-age=0, private, must-revalidate"
290 List.keystore(headers, "cache-control", 0, {"cache-control", "public"})
291
292 true ->
293 List.keystore(
294 headers,
295 "cache-control",
296 0,
297 {"cache-control", @default_cache_control_header}
298 )
299 end
300 end
301
302 defp build_resp_content_disposition_header(headers, opts) do
303 opt = Keyword.get(opts, :inline_content_types, @inline_content_types)
304
305 content_type = get_content_type(headers)
306
307 attachment? =
308 cond do
309 is_list(opt) && !Enum.member?(opt, content_type) -> true
310 opt == false -> true
311 true -> false
312 end
313
314 if attachment? do
315 name =
316 try do
317 {{"content-disposition", content_disposition_string}, _} =
318 List.keytake(headers, "content-disposition", 0)
319
320 [name | _] =
321 Regex.run(
322 ~r/filename="((?:[^"\\]|\\.)*)"/u,
323 content_disposition_string || "",
324 capture: :all_but_first
325 )
326
327 name
328 rescue
329 MatchError -> Keyword.get(opts, :attachment_name, "attachment")
330 end
331
332 disposition = "attachment; filename=\"#{name}\""
333
334 List.keystore(headers, "content-disposition", 0, {"content-disposition", disposition})
335 else
336 headers
337 end
338 end
339
340 defp header_length_constraint(headers, limit) when is_integer(limit) and limit > 0 do
341 with {_, size} <- List.keyfind(headers, "content-length", 0),
342 {size, _} <- Integer.parse(size),
343 true <- size <= limit do
344 :ok
345 else
346 false ->
347 {:error, :body_too_large}
348
349 _ ->
350 :ok
351 end
352 end
353
354 defp header_length_constraint(_, _), do: :ok
355
356 defp body_size_constraint(size, limit) when is_integer(limit) and limit > 0 and size >= limit do
357 {:error, :body_too_large}
358 end
359
360 defp body_size_constraint(_, _), do: :ok
361
362 defp check_read_duration(duration, max)
363 when is_integer(duration) and is_integer(max) and max > 0 do
364 if duration > max do
365 {:error, :read_duration_exceeded}
366 else
367 {:ok, {duration, :erlang.system_time(:millisecond)}}
368 end
369 end
370
371 defp check_read_duration(_, _), do: {:ok, :no_duration_limit, :no_duration_limit}
372
373 defp increase_read_duration({previous_duration, started})
374 when is_integer(previous_duration) and is_integer(started) do
375 duration = :erlang.system_time(:millisecond) - started
376 {:ok, previous_duration + duration}
377 end
378
379 defp increase_read_duration(_) do
380 {:ok, :no_duration_limit, :no_duration_limit}
381 end
382 end