64c3c3a19407e4fbf6391af2a5a6a7b85d74ca6c
[akkoma] / lib / pleroma / reverse_proxy.ex
1 defmodule Pleroma.ReverseProxy do
2 @keep_req_headers ~w(accept user-agent accept-encoding cache-control if-modified-since if-none-match range)
3 @resp_cache_headers ~w(etag date last-modified cache-control)
4 @keep_resp_headers @resp_cache_headers ++
5 ~w(content-type content-disposition content-length accept-ranges vary)
6 @default_cache_control_header "public, max-age=1209600"
7 @valid_resp_codes [200, 206, 304]
8 @max_read_duration :timer.minutes(2)
9 @max_body_length :infinity
10 @methods ~w(GET HEAD)
11
12 @moduledoc """
13 A reverse proxy.
14
15 Pleroma.ReverseProxy.call(conn, url, options)
16
17 It is not meant to be added into a plug pipeline, but to be called from another plug or controller.
18
19 Supports `#{inspect(@methods)}` HTTP methods, and only allows `#{inspect(@valid_resp_codes)}` status codes.
20
21 Responses are chunked to the client while downloading from the upstream.
22
23 Some request / responses headers are preserved:
24
25 * request: `#{inspect(@keep_req_headers)}`
26 * response: `#{inspect(@keep_resp_headers)}`
27
28 If no caching headers (`#{inspect(@resp_cache_headers)}`) are returned by upstream, `cache-control` will be
29 set to `#{inspect(@default_cache_control_header)}`.
30
31 Options:
32
33 * `redirect_on_failure` (default `false`). Redirects the client to the real remote URL if there's any HTTP
34 errors. Any error during body processing will not be redirected as the response is chunked. This may expose
35 remote URL, clients IPs, ….
36
37 * `max_body_length` (default `#{inspect(@max_body_length)}`): limits the content length to be approximately the
38 specified length. It is validated with the `content-length` header and also verified when proxying.
39
40 * `max_read_duration` (default `#{inspect(@max_read_duration)}` ms): the total time the connection is allowed to
41 read from the remote upstream.
42
43 * `inline_content_types`:
44 * `true` will not alter `content-disposition` (up to the upstream),
45 * `false` will add `content-disposition: attachment` to any request,
46 * a list of whitelisted content types
47
48 * `keep_user_agent` will forward the client's user-agent to the upstream. This may be useful if the upstream is
49 doing content transformation (encoding, …) depending on the request.
50
51 * `req_headers`, `resp_headers` additional headers.
52
53 * `http`: options for [hackney](https://github.com/benoitc/hackney).
54
55 """
56 @hackney Application.get_env(:pleroma, :hackney, :hackney)
57 @httpoison Application.get_env(:pleroma, :httpoison, HTTPoison)
58
59 @default_hackney_options [{:follow_redirect, true}]
60
61 @inline_content_types [
62 "image/gif",
63 "image/jpeg",
64 "image/jpg",
65 "image/png",
66 "image/svg+xml",
67 "audio/mpeg",
68 "audio/mp3",
69 "video/webm",
70 "video/mp4",
71 "video/quicktime"
72 ]
73
74 require Logger
75 import Plug.Conn
76
77 @type option() ::
78 {:keep_user_agent, boolean}
79 | {:max_read_duration, :timer.time() | :infinity}
80 | {:max_body_length, non_neg_integer() | :infinity}
81 | {:http, []}
82 | {:req_headers, [{String.t(), String.t()}]}
83 | {:resp_headers, [{String.t(), String.t()}]}
84 | {:inline_content_types, boolean() | [String.t()]}
85 | {:redirect_on_failure, boolean()}
86
87 @spec call(Plug.Conn.t(), url :: String.t(), [option()]) :: Plug.Conn.t()
88 def call(conn = %{method: method}, url, opts \\ []) when method in @methods do
89 hackney_opts =
90 @default_hackney_options
91 |> Keyword.merge(Keyword.get(opts, :http, []))
92 |> @httpoison.process_request_options()
93
94 req_headers = build_req_headers(conn.req_headers, opts)
95
96 opts =
97 if filename = Pleroma.Web.MediaProxy.filename(url) do
98 Keyword.put_new(opts, :attachment_name, filename)
99 else
100 opts
101 end
102
103 with {:ok, code, headers, client} <- request(method, url, req_headers, hackney_opts),
104 :ok <- header_lenght_constraint(headers, Keyword.get(opts, :max_body_length)) do
105 response(conn, client, url, code, headers, opts)
106 else
107 {:ok, code, headers} ->
108 head_response(conn, url, code, headers, opts)
109 |> halt()
110
111 {:error, {:invalid_http_response, code}} ->
112 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed with HTTP status #{code}")
113
114 conn
115 |> error_or_redirect(
116 url,
117 code,
118 "Request failed: " <> Plug.Conn.Status.reason_phrase(code),
119 opts
120 )
121 |> halt()
122
123 {:error, error} ->
124 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed: #{inspect(error)}")
125
126 conn
127 |> error_or_redirect(url, 500, "Request failed", opts)
128 |> halt()
129 end
130 end
131
132 def call(conn, _, _) do
133 conn
134 |> send_resp(400, Plug.Conn.Status.reason_phrase(400))
135 |> halt()
136 end
137
138 defp request(method, url, headers, hackney_opts) do
139 Logger.debug("#{__MODULE__} #{method} #{url} #{inspect(headers)}")
140 method = method |> String.downcase() |> String.to_existing_atom()
141
142 case @hackney.request(method, url, headers, "", hackney_opts) do
143 {:ok, code, headers, client} when code in @valid_resp_codes ->
144 {:ok, code, downcase_headers(headers), client}
145
146 {:ok, code, headers} when code in @valid_resp_codes ->
147 {:ok, code, downcase_headers(headers)}
148
149 {:ok, code, _, _} ->
150 {:error, {:invalid_http_response, code}}
151
152 {:error, error} ->
153 {:error, error}
154 end
155 end
156
157 defp response(conn, client, url, status, headers, opts) do
158 result =
159 conn
160 |> put_resp_headers(build_resp_headers(headers, opts))
161 |> send_chunked(status)
162 |> chunk_reply(client, opts)
163
164 case result do
165 {:ok, conn} ->
166 halt(conn)
167
168 {:error, :closed, conn} ->
169 :hackney.close(client)
170 halt(conn)
171
172 {:error, error, conn} ->
173 Logger.warn(
174 "#{__MODULE__} request to #{url} failed while reading/chunking: #{inspect(error)}"
175 )
176
177 :hackney.close(client)
178 halt(conn)
179 end
180 end
181
182 defp chunk_reply(conn, client, opts) do
183 chunk_reply(conn, client, opts, 0, 0)
184 end
185
186 defp chunk_reply(conn, client, opts, sent_so_far, duration) do
187 with {:ok, duration} <-
188 check_read_duration(
189 duration,
190 Keyword.get(opts, :max_read_duration, @max_read_duration)
191 ),
192 {:ok, data} <- @hackney.stream_body(client),
193 {:ok, duration} <- increase_read_duration(duration),
194 sent_so_far = sent_so_far + byte_size(data),
195 :ok <- body_size_constraint(sent_so_far, Keyword.get(opts, :max_body_size)),
196 {:ok, conn} <- chunk(conn, data) do
197 chunk_reply(conn, client, opts, sent_so_far, duration)
198 else
199 :done -> {:ok, conn}
200 {:error, error} -> {:error, error, conn}
201 end
202 end
203
204 defp head_response(conn, _url, code, headers, opts) do
205 conn
206 |> put_resp_headers(build_resp_headers(headers, opts))
207 |> send_resp(code, "")
208 end
209
210 defp error_or_redirect(conn, url, code, body, opts) do
211 if Keyword.get(opts, :redirect_on_failure, false) do
212 conn
213 |> Phoenix.Controller.redirect(external: url)
214 |> halt()
215 else
216 conn
217 |> send_resp(code, body)
218 |> halt
219 end
220 end
221
222 defp downcase_headers(headers) do
223 Enum.map(headers, fn {k, v} ->
224 {String.downcase(k), v}
225 end)
226 end
227
228 defp put_resp_headers(conn, headers) do
229 Enum.reduce(headers, conn, fn {k, v}, conn ->
230 put_resp_header(conn, k, v)
231 end)
232 end
233
234 defp build_req_headers(headers, opts) do
235 headers =
236 headers
237 |> downcase_headers()
238 |> Enum.filter(fn {k, _} -> k in @keep_req_headers end)
239 |> (fn headers ->
240 headers = headers ++ Keyword.get(opts, :req_headers, [])
241
242 if Keyword.get(opts, :keep_user_agent, false) do
243 List.keystore(
244 headers,
245 "user-agent",
246 0,
247 {"user-agent", Pleroma.Application.user_agent()}
248 )
249 else
250 headers
251 end
252 end).()
253 end
254
255 defp build_resp_headers(headers, opts) do
256 headers =
257 headers
258 |> Enum.filter(fn {k, _} -> k in @keep_resp_headers end)
259 |> build_resp_cache_headers(opts)
260 |> build_resp_content_disposition_header(opts)
261 |> (fn headers -> headers ++ Keyword.get(opts, :resp_headers, []) end).()
262 end
263
264 defp build_resp_cache_headers(headers, opts) do
265 has_cache? = Enum.any?(headers, fn {k, _} -> k in @resp_cache_headers end)
266
267 if has_cache? do
268 headers
269 else
270 List.keystore(headers, "cache-control", 0, {"cache-control", @default_cache_control_header})
271 end
272 end
273
274 defp build_resp_content_disposition_header(headers, opts) do
275 opt = Keyword.get(opts, :inline_content_types, @inline_content_types)
276
277 {_, content_type} =
278 List.keyfind(headers, "content-type", 0, {"content-type", "application/octect-stream"})
279
280 attachment? =
281 cond do
282 is_list(opt) && !Enum.member?(opt, content_type) -> true
283 opt == false -> true
284 true -> false
285 end
286
287 if attachment? do
288 disposition = "attachment; filename=" <> Keyword.get(opts, :attachment_name, "attachment")
289 List.keystore(headers, "content-disposition", 0, {"content-disposition", disposition})
290 else
291 headers
292 end
293 end
294
295 defp header_lenght_constraint(headers, limit) when is_integer(limit) and limit > 0 do
296 with {_, size} <- List.keyfind(headers, "content-length", 0),
297 {size, _} <- Integer.parse(size),
298 true <- size <= limit do
299 :ok
300 else
301 false ->
302 {:error, :body_too_large}
303
304 _ ->
305 :ok
306 end
307 end
308
309 defp header_lenght_constraint(_, _), do: :ok
310
311 defp body_size_constraint(size, limit) when is_integer(limit) and limit > 0 and size >= limit do
312 {:error, :body_too_large}
313 end
314
315 defp body_size_constraint(_, _), do: :ok
316
317 defp check_read_duration(duration, max)
318 when is_integer(duration) and is_integer(max) and max > 0 do
319 if duration > max do
320 {:error, :read_duration_exceeded}
321 else
322 Logger.debug("Duration #{inspect(duration)}")
323 {:ok, {duration, :erlang.system_time(:millisecond)}}
324 end
325 end
326
327 defp check_read_duration(_, _), do: {:ok, :no_duration_limit, :no_duration_limit}
328
329 defp increase_read_duration({previous_duration, started})
330 when is_integer(previous_duration) and is_integer(started) do
331 duration = :erlang.system_time(:millisecond) - started
332 {:ok, previous_duration + duration}
333 end
334
335 defp increase_read_duration(_) do
336 {:ok, :no_duration_limit, :no_duration_limit}
337 end
338 end