Merge remote-tracking branch 'origin/develop' into sixohsix/pleroma-post_expiration
[akkoma] / lib / pleroma / reverse_proxy / reverse_proxy.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.ReverseProxy do
6 alias Pleroma.HTTP
7
8 @keep_req_headers ~w(accept user-agent accept-encoding cache-control if-modified-since) ++
9 ~w(if-unmodified-since if-none-match if-range range)
10 @resp_cache_headers ~w(etag date last-modified cache-control)
11 @keep_resp_headers @resp_cache_headers ++
12 ~w(content-type content-disposition content-encoding content-range) ++
13 ~w(accept-ranges vary)
14 @default_cache_control_header "public, max-age=1209600"
15 @valid_resp_codes [200, 206, 304]
16 @max_read_duration :timer.seconds(30)
17 @max_body_length :infinity
18 @methods ~w(GET HEAD)
19
20 @moduledoc """
21 A reverse proxy.
22
23 Pleroma.ReverseProxy.call(conn, url, options)
24
25 It is not meant to be added into a plug pipeline, but to be called from another plug or controller.
26
27 Supports `#{inspect(@methods)}` HTTP methods, and only allows `#{inspect(@valid_resp_codes)}` status codes.
28
29 Responses are chunked to the client while downloading from the upstream.
30
31 Some request / responses headers are preserved:
32
33 * request: `#{inspect(@keep_req_headers)}`
34 * response: `#{inspect(@keep_resp_headers)}`
35
36 If no caching headers (`#{inspect(@resp_cache_headers)}`) are returned by upstream, `cache-control` will be
37 set to `#{inspect(@default_cache_control_header)}`.
38
39 Options:
40
41 * `redirect_on_failure` (default `false`). Redirects the client to the real remote URL if there's any HTTP
42 errors. Any error during body processing will not be redirected as the response is chunked. This may expose
43 remote URL, clients IPs, ….
44
45 * `max_body_length` (default `#{inspect(@max_body_length)}`): limits the content length to be approximately the
46 specified length. It is validated with the `content-length` header and also verified when proxying.
47
48 * `max_read_duration` (default `#{inspect(@max_read_duration)}` ms): the total time the connection is allowed to
49 read from the remote upstream.
50
51 * `inline_content_types`:
52 * `true` will not alter `content-disposition` (up to the upstream),
53 * `false` will add `content-disposition: attachment` to any request,
54 * a list of whitelisted content types
55
56 * `keep_user_agent` will forward the client's user-agent to the upstream. This may be useful if the upstream is
57 doing content transformation (encoding, …) depending on the request.
58
59 * `req_headers`, `resp_headers` additional headers.
60
61 * `http`: options for [hackney](https://github.com/benoitc/hackney).
62
63 """
64 @default_hackney_options [pool: :media]
65
66 @inline_content_types [
67 "image/gif",
68 "image/jpeg",
69 "image/jpg",
70 "image/png",
71 "image/svg+xml",
72 "audio/mpeg",
73 "audio/mp3",
74 "video/webm",
75 "video/mp4",
76 "video/quicktime"
77 ]
78
79 require Logger
80 import Plug.Conn
81
82 @type option() ::
83 {:keep_user_agent, boolean}
84 | {:max_read_duration, :timer.time() | :infinity}
85 | {:max_body_length, non_neg_integer() | :infinity}
86 | {:http, []}
87 | {:req_headers, [{String.t(), String.t()}]}
88 | {:resp_headers, [{String.t(), String.t()}]}
89 | {:inline_content_types, boolean() | [String.t()]}
90 | {:redirect_on_failure, boolean()}
91
92 @spec call(Plug.Conn.t(), url :: String.t(), [option()]) :: Plug.Conn.t()
93 def call(_conn, _url, _opts \\ [])
94
95 def call(conn = %{method: method}, url, opts) when method in @methods do
96 hackney_opts =
97 Pleroma.HTTP.Connection.hackney_options([])
98 |> Keyword.merge(@default_hackney_options)
99 |> Keyword.merge(Keyword.get(opts, :http, []))
100 |> HTTP.process_request_options()
101
102 req_headers = build_req_headers(conn.req_headers, opts)
103
104 opts =
105 if filename = Pleroma.Web.MediaProxy.filename(url) do
106 Keyword.put_new(opts, :attachment_name, filename)
107 else
108 opts
109 end
110
111 with {:ok, code, headers, client} <- request(method, url, req_headers, hackney_opts),
112 :ok <-
113 header_length_constraint(
114 headers,
115 Keyword.get(opts, :max_body_length, @max_body_length)
116 ) do
117 response(conn, client, url, code, headers, opts)
118 else
119 {:ok, code, headers} ->
120 head_response(conn, url, code, headers, opts)
121 |> halt()
122
123 {:error, {:invalid_http_response, code}} ->
124 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed with HTTP status #{code}")
125
126 conn
127 |> error_or_redirect(
128 url,
129 code,
130 "Request failed: " <> Plug.Conn.Status.reason_phrase(code),
131 opts
132 )
133 |> halt()
134
135 {:error, error} ->
136 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed: #{inspect(error)}")
137
138 conn
139 |> error_or_redirect(url, 500, "Request failed", opts)
140 |> halt()
141 end
142 end
143
144 def call(conn, _, _) do
145 conn
146 |> send_resp(400, Plug.Conn.Status.reason_phrase(400))
147 |> halt()
148 end
149
150 defp request(method, url, headers, hackney_opts) do
151 Logger.debug("#{__MODULE__} #{method} #{url} #{inspect(headers)}")
152 method = method |> String.downcase() |> String.to_existing_atom()
153
154 case client().request(method, url, headers, "", hackney_opts) do
155 {:ok, code, headers, client} when code in @valid_resp_codes ->
156 {:ok, code, downcase_headers(headers), client}
157
158 {:ok, code, headers} when code in @valid_resp_codes ->
159 {:ok, code, downcase_headers(headers)}
160
161 {:ok, code, _, _} ->
162 {:error, {:invalid_http_response, code}}
163
164 {:error, error} ->
165 {:error, error}
166 end
167 end
168
169 defp response(conn, client, url, status, headers, opts) do
170 result =
171 conn
172 |> put_resp_headers(build_resp_headers(headers, opts))
173 |> send_chunked(status)
174 |> chunk_reply(client, opts)
175
176 case result do
177 {:ok, conn} ->
178 halt(conn)
179
180 {:error, :closed, conn} ->
181 client().close(client)
182 halt(conn)
183
184 {:error, error, conn} ->
185 Logger.warn(
186 "#{__MODULE__} request to #{url} failed while reading/chunking: #{inspect(error)}"
187 )
188
189 client().close(client)
190 halt(conn)
191 end
192 end
193
194 defp chunk_reply(conn, client, opts) do
195 chunk_reply(conn, client, opts, 0, 0)
196 end
197
198 defp chunk_reply(conn, client, opts, sent_so_far, duration) do
199 with {:ok, duration} <-
200 check_read_duration(
201 duration,
202 Keyword.get(opts, :max_read_duration, @max_read_duration)
203 ),
204 {:ok, data} <- client().stream_body(client),
205 {:ok, duration} <- increase_read_duration(duration),
206 sent_so_far = sent_so_far + byte_size(data),
207 :ok <-
208 body_size_constraint(
209 sent_so_far,
210 Keyword.get(opts, :max_body_length, @max_body_length)
211 ),
212 {:ok, conn} <- chunk(conn, data) do
213 chunk_reply(conn, client, opts, sent_so_far, duration)
214 else
215 :done -> {:ok, conn}
216 {:error, error} -> {:error, error, conn}
217 end
218 end
219
220 defp head_response(conn, _url, code, headers, opts) do
221 conn
222 |> put_resp_headers(build_resp_headers(headers, opts))
223 |> send_resp(code, "")
224 end
225
226 defp error_or_redirect(conn, url, code, body, opts) do
227 if Keyword.get(opts, :redirect_on_failure, false) do
228 conn
229 |> Phoenix.Controller.redirect(external: url)
230 |> halt()
231 else
232 conn
233 |> send_resp(code, body)
234 |> halt
235 end
236 end
237
238 defp downcase_headers(headers) do
239 Enum.map(headers, fn {k, v} ->
240 {String.downcase(k), v}
241 end)
242 end
243
244 defp get_content_type(headers) do
245 {_, content_type} =
246 List.keyfind(headers, "content-type", 0, {"content-type", "application/octet-stream"})
247
248 [content_type | _] = String.split(content_type, ";")
249 content_type
250 end
251
252 defp put_resp_headers(conn, headers) do
253 Enum.reduce(headers, conn, fn {k, v}, conn ->
254 put_resp_header(conn, k, v)
255 end)
256 end
257
258 defp build_req_headers(headers, opts) do
259 headers
260 |> downcase_headers()
261 |> Enum.filter(fn {k, _} -> k in @keep_req_headers end)
262 |> (fn headers ->
263 headers = headers ++ Keyword.get(opts, :req_headers, [])
264
265 if Keyword.get(opts, :keep_user_agent, false) do
266 List.keystore(
267 headers,
268 "user-agent",
269 0,
270 {"user-agent", Pleroma.Application.user_agent()}
271 )
272 else
273 headers
274 end
275 end).()
276 end
277
278 defp build_resp_headers(headers, opts) do
279 headers
280 |> Enum.filter(fn {k, _} -> k in @keep_resp_headers end)
281 |> build_resp_cache_headers(opts)
282 |> build_resp_content_disposition_header(opts)
283 |> (fn headers -> headers ++ Keyword.get(opts, :resp_headers, []) end).()
284 end
285
286 defp build_resp_cache_headers(headers, _opts) do
287 has_cache? = Enum.any?(headers, fn {k, _} -> k in @resp_cache_headers end)
288 has_cache_control? = List.keymember?(headers, "cache-control", 0)
289
290 cond do
291 has_cache? && has_cache_control? ->
292 headers
293
294 has_cache? ->
295 # There's caching header present but no cache-control -- we need to explicitely override it
296 # to public as Plug defaults to "max-age=0, private, must-revalidate"
297 List.keystore(headers, "cache-control", 0, {"cache-control", "public"})
298
299 true ->
300 List.keystore(
301 headers,
302 "cache-control",
303 0,
304 {"cache-control", @default_cache_control_header}
305 )
306 end
307 end
308
309 defp build_resp_content_disposition_header(headers, opts) do
310 opt = Keyword.get(opts, :inline_content_types, @inline_content_types)
311
312 content_type = get_content_type(headers)
313
314 attachment? =
315 cond do
316 is_list(opt) && !Enum.member?(opt, content_type) -> true
317 opt == false -> true
318 true -> false
319 end
320
321 if attachment? do
322 name =
323 try do
324 {{"content-disposition", content_disposition_string}, _} =
325 List.keytake(headers, "content-disposition", 0)
326
327 [name | _] =
328 Regex.run(
329 ~r/filename="((?:[^"\\]|\\.)*)"/u,
330 content_disposition_string || "",
331 capture: :all_but_first
332 )
333
334 name
335 rescue
336 MatchError -> Keyword.get(opts, :attachment_name, "attachment")
337 end
338
339 disposition = "attachment; filename=\"#{name}\""
340
341 List.keystore(headers, "content-disposition", 0, {"content-disposition", disposition})
342 else
343 headers
344 end
345 end
346
347 defp header_length_constraint(headers, limit) when is_integer(limit) and limit > 0 do
348 with {_, size} <- List.keyfind(headers, "content-length", 0),
349 {size, _} <- Integer.parse(size),
350 true <- size <= limit do
351 :ok
352 else
353 false ->
354 {:error, :body_too_large}
355
356 _ ->
357 :ok
358 end
359 end
360
361 defp header_length_constraint(_, _), do: :ok
362
363 defp body_size_constraint(size, limit) when is_integer(limit) and limit > 0 and size >= limit do
364 {:error, :body_too_large}
365 end
366
367 defp body_size_constraint(_, _), do: :ok
368
369 defp check_read_duration(duration, max)
370 when is_integer(duration) and is_integer(max) and max > 0 do
371 if duration > max do
372 {:error, :read_duration_exceeded}
373 else
374 {:ok, {duration, :erlang.system_time(:millisecond)}}
375 end
376 end
377
378 defp check_read_duration(_, _), do: {:ok, :no_duration_limit, :no_duration_limit}
379
380 defp increase_read_duration({previous_duration, started})
381 when is_integer(previous_duration) and is_integer(started) do
382 duration = :erlang.system_time(:millisecond) - started
383 {:ok, previous_duration + duration}
384 end
385
386 defp increase_read_duration(_) do
387 {:ok, :no_duration_limit, :no_duration_limit}
388 end
389
390 defp client, do: Pleroma.ReverseProxy.Client
391 end