Parse correctly content-type & do not forward content-length
[akkoma] / lib / pleroma / reverse_proxy.ex
1 defmodule Pleroma.ReverseProxy do
2 @keep_req_headers ~w(accept user-agent accept-encoding cache-control if-modified-since if-none-match range)
3 @resp_cache_headers ~w(etag date last-modified cache-control)
4 @keep_resp_headers @resp_cache_headers ++
5 ~w(content-type content-disposition accept-ranges vary)
6 @default_cache_control_header "public, max-age=1209600"
7 @valid_resp_codes [200, 206, 304]
8 @max_read_duration :timer.minutes(2)
9 @max_body_length :infinity
10 @methods ~w(GET HEAD)
11
12 @moduledoc """
13 A reverse proxy.
14
15 Pleroma.ReverseProxy.call(conn, url, options)
16
17 It is not meant to be added into a plug pipeline, but to be called from another plug or controller.
18
19 Supports `#{inspect(@methods)}` HTTP methods, and only allows `#{inspect(@valid_resp_codes)}` status codes.
20
21 Responses are chunked to the client while downloading from the upstream.
22
23 Some request / responses headers are preserved:
24
25 * request: `#{inspect(@keep_req_headers)}`
26 * response: `#{inspect(@keep_resp_headers)}`
27
28 If no caching headers (`#{inspect(@resp_cache_headers)}`) are returned by upstream, `cache-control` will be
29 set to `#{inspect(@default_cache_control_header)}`.
30
31 Options:
32
33 * `redirect_on_failure` (default `false`). Redirects the client to the real remote URL if there's any HTTP
34 errors. Any error during body processing will not be redirected as the response is chunked. This may expose
35 remote URL, clients IPs, ….
36
37 * `max_body_length` (default `#{inspect(@max_body_length)}`): limits the content length to be approximately the
38 specified length. It is validated with the `content-length` header and also verified when proxying.
39
40 * `max_read_duration` (default `#{inspect(@max_read_duration)}` ms): the total time the connection is allowed to
41 read from the remote upstream.
42
43 * `inline_content_types`:
44 * `true` will not alter `content-disposition` (up to the upstream),
45 * `false` will add `content-disposition: attachment` to any request,
46 * a list of whitelisted content types
47
48 * `keep_user_agent` will forward the client's user-agent to the upstream. This may be useful if the upstream is
49 doing content transformation (encoding, …) depending on the request.
50
51 * `req_headers`, `resp_headers` additional headers.
52
53 * `http`: options for [hackney](https://github.com/benoitc/hackney).
54
55 """
56 @hackney Application.get_env(:pleroma, :hackney, :hackney)
57 @httpoison Application.get_env(:pleroma, :httpoison, HTTPoison)
58
59 @default_hackney_options [{:follow_redirect, true}]
60
61 @inline_content_types [
62 "image/gif",
63 "image/jpeg",
64 "image/jpg",
65 "image/png",
66 "image/svg+xml",
67 "audio/mpeg",
68 "audio/mp3",
69 "video/webm",
70 "video/mp4",
71 "video/quicktime"
72 ]
73
74 require Logger
75 import Plug.Conn
76
77 @type option() ::
78 {:keep_user_agent, boolean}
79 | {:max_read_duration, :timer.time() | :infinity}
80 | {:max_body_length, non_neg_integer() | :infinity}
81 | {:http, []}
82 | {:req_headers, [{String.t(), String.t()}]}
83 | {:resp_headers, [{String.t(), String.t()}]}
84 | {:inline_content_types, boolean() | [String.t()]}
85 | {:redirect_on_failure, boolean()}
86
87 @spec call(Plug.Conn.t(), url :: String.t(), [option()]) :: Plug.Conn.t()
88 def call(conn = %{method: method}, url, opts \\ []) when method in @methods do
89 hackney_opts =
90 @default_hackney_options
91 |> Keyword.merge(Keyword.get(opts, :http, []))
92 |> @httpoison.process_request_options()
93
94 req_headers = build_req_headers(conn.req_headers, opts)
95
96 opts =
97 if filename = Pleroma.Web.MediaProxy.filename(url) do
98 Keyword.put_new(opts, :attachment_name, filename)
99 else
100 opts
101 end
102
103 with {:ok, code, headers, client} <- request(method, url, req_headers, hackney_opts),
104 :ok <- header_lenght_constraint(headers, Keyword.get(opts, :max_body_length)) do
105 response(conn, client, url, code, headers, opts)
106 else
107 {:ok, code, headers} ->
108 head_response(conn, url, code, headers, opts)
109 |> halt()
110
111 {:error, {:invalid_http_response, code}} ->
112 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed with HTTP status #{code}")
113
114 conn
115 |> error_or_redirect(
116 url,
117 code,
118 "Request failed: " <> Plug.Conn.Status.reason_phrase(code),
119 opts
120 )
121 |> halt()
122
123 {:error, error} ->
124 Logger.error("#{__MODULE__}: request to #{inspect(url)} failed: #{inspect(error)}")
125
126 conn
127 |> error_or_redirect(url, 500, "Request failed", opts)
128 |> halt()
129 end
130 end
131
132 def call(conn, _, _) do
133 conn
134 |> send_resp(400, Plug.Conn.Status.reason_phrase(400))
135 |> halt()
136 end
137
138 defp request(method, url, headers, hackney_opts) do
139 Logger.debug("#{__MODULE__} #{method} #{url} #{inspect(headers)}")
140 method = method |> String.downcase() |> String.to_existing_atom()
141
142 case @hackney.request(method, url, headers, "", hackney_opts) do
143 {:ok, code, headers, client} when code in @valid_resp_codes ->
144 {:ok, code, downcase_headers(headers), client}
145
146 {:ok, code, headers} when code in @valid_resp_codes ->
147 {:ok, code, downcase_headers(headers)}
148
149 {:ok, code, _, _} ->
150 {:error, {:invalid_http_response, code}}
151
152 {:error, error} ->
153 {:error, error}
154 end
155 end
156
157 defp response(conn, client, url, status, headers, opts) do
158 result =
159 conn
160 |> put_resp_headers(build_resp_headers(headers, opts))
161 |> send_chunked(status)
162 |> chunk_reply(client, opts)
163
164 case result do
165 {:ok, conn} ->
166 halt(conn)
167
168 {:error, :closed, conn} ->
169 :hackney.close(client)
170 halt(conn)
171
172 {:error, error, conn} ->
173 Logger.warn(
174 "#{__MODULE__} request to #{url} failed while reading/chunking: #{inspect(error)}"
175 )
176
177 :hackney.close(client)
178 halt(conn)
179 end
180 end
181
182 defp chunk_reply(conn, client, opts) do
183 chunk_reply(conn, client, opts, 0, 0)
184 end
185
186 defp chunk_reply(conn, client, opts, sent_so_far, duration) do
187 with {:ok, duration} <-
188 check_read_duration(
189 duration,
190 Keyword.get(opts, :max_read_duration, @max_read_duration)
191 ),
192 {:ok, data} <- @hackney.stream_body(client),
193 {:ok, duration} <- increase_read_duration(duration),
194 sent_so_far = sent_so_far + byte_size(data),
195 :ok <- body_size_constraint(sent_so_far, Keyword.get(opts, :max_body_size)),
196 {:ok, conn} <- chunk(conn, data) do
197 chunk_reply(conn, client, opts, sent_so_far, duration)
198 else
199 :done -> {:ok, conn}
200 {:error, error} -> {:error, error, conn}
201 end
202 end
203
204 defp head_response(conn, _url, code, headers, opts) do
205 conn
206 |> put_resp_headers(build_resp_headers(headers, opts))
207 |> send_resp(code, "")
208 end
209
210 defp error_or_redirect(conn, url, code, body, opts) do
211 if Keyword.get(opts, :redirect_on_failure, false) do
212 conn
213 |> Phoenix.Controller.redirect(external: url)
214 |> halt()
215 else
216 conn
217 |> send_resp(code, body)
218 |> halt
219 end
220 end
221
222 defp downcase_headers(headers) do
223 Enum.map(headers, fn {k, v} ->
224 {String.downcase(k), v}
225 end)
226 end
227
228 defp get_content_type(headers) do
229 {_, content_type} = List.keyfind(headers, "content-type", 0, {"content-type", "application/octet-stream"})
230 [content_type | _] = String.split(content_type, ";")
231 content_type
232 end
233
234 defp put_resp_headers(conn, headers) do
235 Enum.reduce(headers, conn, fn {k, v}, conn ->
236 put_resp_header(conn, k, v)
237 end)
238 end
239
240 defp build_req_headers(headers, opts) do
241 headers =
242 headers
243 |> downcase_headers()
244 |> Enum.filter(fn {k, _} -> k in @keep_req_headers end)
245 |> (fn headers ->
246 headers = headers ++ Keyword.get(opts, :req_headers, [])
247
248 if Keyword.get(opts, :keep_user_agent, false) do
249 List.keystore(
250 headers,
251 "user-agent",
252 0,
253 {"user-agent", Pleroma.Application.user_agent()}
254 )
255 else
256 headers
257 end
258 end).()
259 end
260
261 defp build_resp_headers(headers, opts) do
262 headers =
263 headers
264 |> Enum.filter(fn {k, _} -> k in @keep_resp_headers end)
265 |> build_resp_cache_headers(opts)
266 |> build_resp_content_disposition_header(opts)
267 |> (fn headers -> headers ++ Keyword.get(opts, :resp_headers, []) end).()
268 end
269
270 defp build_resp_cache_headers(headers, opts) do
271 has_cache? = Enum.any?(headers, fn {k, _} -> k in @resp_cache_headers end)
272
273 if has_cache? do
274 headers
275 else
276 List.keystore(headers, "cache-control", 0, {"cache-control", @default_cache_control_header})
277 end
278 end
279
280 defp build_resp_content_disposition_header(headers, opts) do
281 opt = Keyword.get(opts, :inline_content_types, @inline_content_types)
282
283 content_type = get_content_type(headers)
284
285 attachment? =
286 cond do
287 is_list(opt) && !Enum.member?(opt, content_type) -> true
288 opt == false -> true
289 true -> false
290 end
291
292 if attachment? do
293 disposition = "attachment; filename=" <> Keyword.get(opts, :attachment_name, "attachment")
294 List.keystore(headers, "content-disposition", 0, {"content-disposition", disposition})
295 else
296 headers
297 end
298 end
299
300 defp header_lenght_constraint(headers, limit) when is_integer(limit) and limit > 0 do
301 with {_, size} <- List.keyfind(headers, "content-length", 0),
302 {size, _} <- Integer.parse(size),
303 true <- size <= limit do
304 :ok
305 else
306 false ->
307 {:error, :body_too_large}
308
309 _ ->
310 :ok
311 end
312 end
313
314 defp header_lenght_constraint(_, _), do: :ok
315
316 defp body_size_constraint(size, limit) when is_integer(limit) and limit > 0 and size >= limit do
317 {:error, :body_too_large}
318 end
319
320 defp body_size_constraint(_, _), do: :ok
321
322 defp check_read_duration(duration, max)
323 when is_integer(duration) and is_integer(max) and max > 0 do
324 if duration > max do
325 {:error, :read_duration_exceeded}
326 else
327 Logger.debug("Duration #{inspect(duration)}")
328 {:ok, {duration, :erlang.system_time(:millisecond)}}
329 end
330 end
331
332 defp check_read_duration(_, _), do: {:ok, :no_duration_limit, :no_duration_limit}
333
334 defp increase_read_duration({previous_duration, started})
335 when is_integer(previous_duration) and is_integer(started) do
336 duration = :erlang.system_time(:millisecond) - started
337 {:ok, previous_duration + duration}
338 end
339
340 defp increase_read_duration(_) do
341 {:ok, :no_duration_limit, :no_duration_limit}
342 end
343 end