Merge branch 'develop' into feature/polls-2-electric-boogalo
[akkoma] / lib / pleroma / reverse_proxy.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.ReverseProxy do
6 alias Pleroma.HTTP
7
8 @keep_req_headers ~w(accept user-agent accept-encoding cache-control if-modified-since) ++
9 ~w(if-unmodified-since if-none-match if-range range)
10 @resp_cache_headers ~w(etag date last-modified cache-control)
11 @keep_resp_headers @resp_cache_headers ++
12 ~w(content-type content-disposition content-encoding content-range) ++
13 ~w(accept-ranges vary)
14 @default_cache_control_header "public, max-age=1209600"
15 @valid_resp_codes [200, 206, 304]
16 @max_read_duration :timer.seconds(30)
17 @max_body_length :infinity
18 @methods ~w(GET HEAD)
19
20 @moduledoc """
21 A reverse proxy.
22
23 Pleroma.ReverseProxy.call(conn, url, options)
24
25 It is not meant to be added into a plug pipeline, but to be called from another plug or controller.
26
27 Supports `#{inspect(@methods)}` HTTP methods, and only allows `#{inspect(@valid_resp_codes)}` status codes.
28
29 Responses are chunked to the client while downloading from the upstream.
30
31 Some request / responses headers are preserved:
32
33 * request: `#{inspect(@keep_req_headers)}`
34 * response: `#{inspect(@keep_resp_headers)}`
35
36 If no caching headers (`#{inspect(@resp_cache_headers)}`) are returned by upstream, `cache-control` will be
37 set to `#{inspect(@default_cache_control_header)}`.
38
39 Options:
40
41 * `redirect_on_failure` (default `false`). Redirects the client to the real remote URL if there's any HTTP
42 errors. Any error during body processing will not be redirected as the response is chunked. This may expose
43 remote URL, clients IPs, ….
44
45 * `max_body_length` (default `#{inspect(@max_body_length)}`): limits the content length to be approximately the
46 specified length. It is validated with the `content-length` header and also verified when proxying.
47
48 * `max_read_duration` (default `#{inspect(@max_read_duration)}` ms): the total time the connection is allowed to
49 read from the remote upstream.
50
51 * `inline_content_types`:
52 * `true` will not alter `content-disposition` (up to the upstream),
53 * `false` will add `content-disposition: attachment` to any request,
54 * a list of whitelisted content types
55
56 * `keep_user_agent` will forward the client's user-agent to the upstream. This may be useful if the upstream is
57 doing content transformation (encoding, …) depending on the request.
58
59 * `req_headers`, `resp_headers` additional headers.
60
61 * `http`: options for [hackney](https://github.com/benoitc/hackney).
62
63 """
64 @default_hackney_options []
65
66 @inline_content_types [
67 "image/gif",
68 "image/jpeg",
69 "image/jpg",
70 "image/png",
71 "image/svg+xml",
72 "audio/mpeg",
73 "audio/mp3",
74 "video/webm",
75 "video/mp4",
76 "video/quicktime"
77 ]
78
79 require Logger
80 import Plug.Conn
81
82 @type option() ::
83 {:keep_user_agent, boolean}
84 | {:max_read_duration, :timer.time() | :infinity}
85 | {:max_body_length, non_neg_integer() | :infinity}
86 | {:http, []}
87 | {:req_headers, [{String.t(), String.t()}]}
88 | {:resp_headers, [{String.t(), String.t()}]}
89 | {:inline_content_types, boolean() | [String.t()]}
90 | {:redirect_on_failure, boolean()}
91
92 @spec call(Plug.Conn.t(), url :: String.t(), [option()]) :: Plug.Conn.t()
93 def call(_conn, _url, _opts \\ [])
94
95 def call(conn = %{method: method}, url, opts) when method in @methods do
96 hackney_opts =
97 @default_hackney_options
98 |> Keyword.merge(Keyword.get(opts, :http, []))
99 |> HTTP.process_request_options()
100
101 req_headers = build_req_headers(conn.req_headers, opts)
102
103 opts =
104 if filename = Pleroma.Web.MediaProxy.filename(url) do
105 Keyword.put_new(opts, :attachment_name, filename)
106 else
107 opts
108 end
109
110 with {:ok, code, headers, client} <- request(method, url, req_headers, hackney_opts),
111 :ok <- header_length_constraint(headers, Keyword.get(opts, :max_body_length)) do
112 response(conn, client, url, code, headers, opts)
113 else
114 {:ok, code, headers} ->
115 head_response(conn, url, code, headers, opts)
116 |> halt()
117
118 {:error, {:invalid_http_response, code}} ->
119 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed with HTTP status #{code}")
120
121 conn
122 |> error_or_redirect(
123 url,
124 code,
125 "Request failed: " <> Plug.Conn.Status.reason_phrase(code),
126 opts
127 )
128 |> halt()
129
130 {:error, error} ->
131 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed: #{inspect(error)}")
132
133 conn
134 |> error_or_redirect(url, 500, "Request failed", opts)
135 |> halt()
136 end
137 end
138
139 def call(conn, _, _) do
140 conn
141 |> send_resp(400, Plug.Conn.Status.reason_phrase(400))
142 |> halt()
143 end
144
145 defp request(method, url, headers, hackney_opts) do
146 Logger.debug("#{__MODULE__} #{method} #{url} #{inspect(headers)}")
147 method = method |> String.downcase() |> String.to_existing_atom()
148
149 case :hackney.request(method, url, headers, "", hackney_opts) do
150 {:ok, code, headers, client} when code in @valid_resp_codes ->
151 {:ok, code, downcase_headers(headers), client}
152
153 {:ok, code, headers} when code in @valid_resp_codes ->
154 {:ok, code, downcase_headers(headers)}
155
156 {:ok, code, _, _} ->
157 {:error, {:invalid_http_response, code}}
158
159 {:error, error} ->
160 {:error, error}
161 end
162 end
163
164 defp response(conn, client, url, status, headers, opts) do
165 result =
166 conn
167 |> put_resp_headers(build_resp_headers(headers, opts))
168 |> send_chunked(status)
169 |> chunk_reply(client, opts)
170
171 case result do
172 {:ok, conn} ->
173 halt(conn)
174
175 {:error, :closed, conn} ->
176 :hackney.close(client)
177 halt(conn)
178
179 {:error, error, conn} ->
180 Logger.warn(
181 "#{__MODULE__} request to #{url} failed while reading/chunking: #{inspect(error)}"
182 )
183
184 :hackney.close(client)
185 halt(conn)
186 end
187 end
188
189 defp chunk_reply(conn, client, opts) do
190 chunk_reply(conn, client, opts, 0, 0)
191 end
192
193 defp chunk_reply(conn, client, opts, sent_so_far, duration) do
194 with {:ok, duration} <-
195 check_read_duration(
196 duration,
197 Keyword.get(opts, :max_read_duration, @max_read_duration)
198 ),
199 {:ok, data} <- :hackney.stream_body(client),
200 {:ok, duration} <- increase_read_duration(duration),
201 sent_so_far = sent_so_far + byte_size(data),
202 :ok <- body_size_constraint(sent_so_far, Keyword.get(opts, :max_body_size)),
203 {:ok, conn} <- chunk(conn, data) do
204 chunk_reply(conn, client, opts, sent_so_far, duration)
205 else
206 :done -> {:ok, conn}
207 {:error, error} -> {:error, error, conn}
208 end
209 end
210
211 defp head_response(conn, _url, code, headers, opts) do
212 conn
213 |> put_resp_headers(build_resp_headers(headers, opts))
214 |> send_resp(code, "")
215 end
216
217 defp error_or_redirect(conn, url, code, body, opts) do
218 if Keyword.get(opts, :redirect_on_failure, false) do
219 conn
220 |> Phoenix.Controller.redirect(external: url)
221 |> halt()
222 else
223 conn
224 |> send_resp(code, body)
225 |> halt
226 end
227 end
228
229 defp downcase_headers(headers) do
230 Enum.map(headers, fn {k, v} ->
231 {String.downcase(k), v}
232 end)
233 end
234
235 defp get_content_type(headers) do
236 {_, content_type} =
237 List.keyfind(headers, "content-type", 0, {"content-type", "application/octet-stream"})
238
239 [content_type | _] = String.split(content_type, ";")
240 content_type
241 end
242
243 defp put_resp_headers(conn, headers) do
244 Enum.reduce(headers, conn, fn {k, v}, conn ->
245 put_resp_header(conn, k, v)
246 end)
247 end
248
249 defp build_req_headers(headers, opts) do
250 headers
251 |> downcase_headers()
252 |> Enum.filter(fn {k, _} -> k in @keep_req_headers end)
253 |> (fn headers ->
254 headers = headers ++ Keyword.get(opts, :req_headers, [])
255
256 if Keyword.get(opts, :keep_user_agent, false) do
257 List.keystore(
258 headers,
259 "user-agent",
260 0,
261 {"user-agent", Pleroma.Application.user_agent()}
262 )
263 else
264 headers
265 end
266 end).()
267 end
268
269 defp build_resp_headers(headers, opts) do
270 headers
271 |> Enum.filter(fn {k, _} -> k in @keep_resp_headers end)
272 |> build_resp_cache_headers(opts)
273 |> build_resp_content_disposition_header(opts)
274 |> (fn headers -> headers ++ Keyword.get(opts, :resp_headers, []) end).()
275 end
276
277 defp build_resp_cache_headers(headers, _opts) do
278 has_cache? = Enum.any?(headers, fn {k, _} -> k in @resp_cache_headers end)
279 has_cache_control? = List.keymember?(headers, "cache-control", 0)
280
281 cond do
282 has_cache? && has_cache_control? ->
283 headers
284
285 has_cache? ->
286 # There's caching header present but no cache-control -- we need to explicitely override it
287 # to public as Plug defaults to "max-age=0, private, must-revalidate"
288 List.keystore(headers, "cache-control", 0, {"cache-control", "public"})
289
290 true ->
291 List.keystore(
292 headers,
293 "cache-control",
294 0,
295 {"cache-control", @default_cache_control_header}
296 )
297 end
298 end
299
300 defp build_resp_content_disposition_header(headers, opts) do
301 opt = Keyword.get(opts, :inline_content_types, @inline_content_types)
302
303 content_type = get_content_type(headers)
304
305 attachment? =
306 cond do
307 is_list(opt) && !Enum.member?(opt, content_type) -> true
308 opt == false -> true
309 true -> false
310 end
311
312 if attachment? do
313 name =
314 try do
315 {{"content-disposition", content_disposition_string}, _} =
316 List.keytake(headers, "content-disposition", 0)
317
318 [name | _] =
319 Regex.run(
320 ~r/filename="((?:[^"\\]|\\.)*)"/u,
321 content_disposition_string || "",
322 capture: :all_but_first
323 )
324
325 name
326 rescue
327 MatchError -> Keyword.get(opts, :attachment_name, "attachment")
328 end
329
330 disposition = "attachment; filename=\"#{name}\""
331
332 List.keystore(headers, "content-disposition", 0, {"content-disposition", disposition})
333 else
334 headers
335 end
336 end
337
338 defp header_length_constraint(headers, limit) when is_integer(limit) and limit > 0 do
339 with {_, size} <- List.keyfind(headers, "content-length", 0),
340 {size, _} <- Integer.parse(size),
341 true <- size <= limit do
342 :ok
343 else
344 false ->
345 {:error, :body_too_large}
346
347 _ ->
348 :ok
349 end
350 end
351
352 defp header_length_constraint(_, _), do: :ok
353
354 defp body_size_constraint(size, limit) when is_integer(limit) and limit > 0 and size >= limit do
355 {:error, :body_too_large}
356 end
357
358 defp body_size_constraint(_, _), do: :ok
359
360 defp check_read_duration(duration, max)
361 when is_integer(duration) and is_integer(max) and max > 0 do
362 if duration > max do
363 {:error, :read_duration_exceeded}
364 else
365 {:ok, {duration, :erlang.system_time(:millisecond)}}
366 end
367 end
368
369 defp check_read_duration(_, _), do: {:ok, :no_duration_limit, :no_duration_limit}
370
371 defp increase_read_duration({previous_duration, started})
372 when is_integer(previous_duration) and is_integer(started) do
373 duration = :erlang.system_time(:millisecond) - started
374 {:ok, previous_duration + duration}
375 end
376
377 defp increase_read_duration(_) do
378 {:ok, :no_duration_limit, :no_duration_limit}
379 end
380 end