fcf3181bf530550cb18120a6b873b0772f6b67db
[akkoma] / lib / pleroma / instances / instance.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.Instances.Instance do
6 @moduledoc "Instance."
7
8 @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
9
10 alias Pleroma.Instances
11 alias Pleroma.Instances.Instance
12 alias Pleroma.Repo
13 alias Pleroma.User
14 alias Pleroma.Workers.BackgroundWorker
15
16 use Ecto.Schema
17
18 import Ecto.Query
19 import Ecto.Changeset
20
21 require Logger
22
23 schema "instances" do
24 field(:host, :string)
25 field(:unreachable_since, :naive_datetime_usec)
26 field(:favicon, :string)
27 field(:metadata_updated_at, :naive_datetime)
28 field(:nodeinfo, :map, default: %{})
29
30 timestamps()
31 end
32
33 defdelegate host(url_or_host), to: Instances
34
35 def changeset(struct, params \\ %{}) do
36 struct
37 |> cast(params, [:host, :unreachable_since, :favicon, :nodeinfo, :metadata_updated_at])
38 |> validate_required([:host])
39 |> unique_constraint(:host)
40 end
41
42 def filter_reachable([]), do: %{}
43
44 def filter_reachable(urls_or_hosts) when is_list(urls_or_hosts) do
45 hosts =
46 urls_or_hosts
47 |> Enum.map(&(&1 && host(&1)))
48 |> Enum.filter(&(to_string(&1) != ""))
49
50 unreachable_since_by_host =
51 Repo.all(
52 from(i in Instance,
53 where: i.host in ^hosts,
54 select: {i.host, i.unreachable_since}
55 )
56 )
57 |> Map.new(& &1)
58
59 reachability_datetime_threshold = Instances.reachability_datetime_threshold()
60
61 for entry <- Enum.filter(urls_or_hosts, &is_binary/1) do
62 host = host(entry)
63 unreachable_since = unreachable_since_by_host[host]
64
65 if !unreachable_since ||
66 NaiveDateTime.compare(unreachable_since, reachability_datetime_threshold) == :gt do
67 {entry, unreachable_since}
68 end
69 end
70 |> Enum.filter(& &1)
71 |> Map.new(& &1)
72 end
73
74 def reachable?(url_or_host) when is_binary(url_or_host) do
75 !Repo.one(
76 from(i in Instance,
77 where:
78 i.host == ^host(url_or_host) and
79 i.unreachable_since <= ^Instances.reachability_datetime_threshold(),
80 select: true
81 )
82 )
83 end
84
85 def reachable?(url_or_host) when is_binary(url_or_host), do: true
86
87 def set_reachable(url_or_host) when is_binary(url_or_host) do
88 with host <- host(url_or_host),
89 %Instance{} = existing_record <- Repo.get_by(Instance, %{host: host}) do
90 {:ok, _instance} =
91 existing_record
92 |> changeset(%{unreachable_since: nil})
93 |> Repo.update()
94 end
95 end
96
97 def set_reachable(_), do: {:error, nil}
98
99 def set_unreachable(url_or_host, unreachable_since \\ nil)
100
101 def set_unreachable(url_or_host, unreachable_since) when is_binary(url_or_host) do
102 unreachable_since = parse_datetime(unreachable_since) || NaiveDateTime.utc_now()
103 host = host(url_or_host)
104 existing_record = Repo.get_by(Instance, %{host: host})
105
106 changes = %{unreachable_since: unreachable_since}
107
108 cond do
109 is_nil(existing_record) ->
110 %Instance{}
111 |> changeset(Map.put(changes, :host, host))
112 |> Repo.insert()
113
114 existing_record.unreachable_since &&
115 NaiveDateTime.compare(existing_record.unreachable_since, unreachable_since) != :gt ->
116 {:ok, existing_record}
117
118 true ->
119 existing_record
120 |> changeset(changes)
121 |> Repo.update()
122 end
123 end
124
125 def set_unreachable(_, _), do: {:error, nil}
126
127 def get_consistently_unreachable do
128 reachability_datetime_threshold = Instances.reachability_datetime_threshold()
129
130 from(i in Instance,
131 where: ^reachability_datetime_threshold > i.unreachable_since,
132 order_by: i.unreachable_since,
133 select: {i.host, i.unreachable_since}
134 )
135 |> Repo.all()
136 end
137
138 defp parse_datetime(datetime) when is_binary(datetime) do
139 NaiveDateTime.from_iso8601(datetime)
140 end
141
142 defp parse_datetime(datetime), do: datetime
143
144 def needs_update(nil), do: true
145
146 def needs_update(%Instance{metadata_updated_at: nil}), do: true
147
148 def needs_update(%Instance{metadata_updated_at: metadata_updated_at}) do
149 now = NaiveDateTime.utc_now()
150 NaiveDateTime.diff(now, metadata_updated_at) > 86_400
151 end
152
153 def local do
154 %Instance{
155 host: Pleroma.Web.Endpoint.host(),
156 favicon: Pleroma.Web.Endpoint.url() <> "/favicon.png",
157 nodeinfo: Pleroma.Web.Nodeinfo.NodeinfoController.raw_nodeinfo()
158 }
159 end
160
161 def update_metadata(%URI{host: host} = uri) do
162 Logger.info("Checking metadata for #{host}")
163 existing_record = Repo.get_by(Instance, %{host: host})
164
165 if reachable?(host) do
166 do_update_metadata(uri, existing_record)
167 else
168 {:discard, :unreachable}
169 end
170 end
171
172 defp do_update_metadata(%URI{host: host} = uri, existing_record) do
173 if existing_record do
174 if needs_update(existing_record) do
175 Logger.info("Updating metadata for #{host}")
176 favicon = scrape_favicon(uri)
177 nodeinfo = scrape_nodeinfo(uri)
178
179 {:ok, instance} =
180 existing_record
181 |> changeset(%{
182 host: host,
183 favicon: favicon,
184 nodeinfo: nodeinfo,
185 metadata_updated_at: NaiveDateTime.utc_now()
186 })
187 |> Repo.update()
188
189 @cachex.put(:instances_cache, "instances:#{host}", instance)
190 else
191 {:discard, "Does not require update"}
192 end
193 else
194 favicon = scrape_favicon(uri)
195 nodeinfo = scrape_nodeinfo(uri)
196
197 Logger.info("Creating metadata for #{host}")
198
199 {:ok, instance} =
200 %Instance{}
201 |> changeset(%{
202 host: host,
203 favicon: favicon,
204 nodeinfo: nodeinfo,
205 metadata_updated_at: NaiveDateTime.utc_now()
206 })
207 |> Repo.insert()
208
209 @cachex.put(:instances_cache, "instances:#{host}", instance)
210 end
211 end
212
213 def get_favicon(%URI{host: host}) do
214 existing_record = Repo.get_by(Instance, %{host: host})
215
216 if existing_record do
217 existing_record.favicon
218 else
219 nil
220 end
221 end
222
223 defp scrape_nodeinfo(%URI{} = instance_uri) do
224 with true <- Pleroma.Config.get([:instances_nodeinfo, :enabled]),
225 {_, true} <- {:reachable, reachable?(instance_uri.host)},
226 {:ok, %Tesla.Env{status: 200, body: body}} <-
227 Tesla.get(
228 "https://#{instance_uri.host}/.well-known/nodeinfo",
229 headers: [{"Accept", "application/json"}]
230 ),
231 {:ok, json} <- Jason.decode(body),
232 {:ok, %{"links" => links}} <- {:ok, json},
233 {:ok, %{"href" => href}} <-
234 {:ok,
235 Enum.find(links, &(&1["rel"] == "http://nodeinfo.diaspora.software/ns/schema/2.0"))},
236 {:ok, %Tesla.Env{body: data}} <-
237 Pleroma.HTTP.get(href, [{"accept", "application/json"}], []),
238 {:length, true} <- {:length, String.length(data) < 50_000},
239 {:ok, nodeinfo} <- Jason.decode(data) do
240 nodeinfo
241 else
242 {:reachable, false} ->
243 Logger.debug(
244 "Instance.scrape_nodeinfo(\"#{to_string(instance_uri)}\") ignored unreachable host"
245 )
246
247 nil
248
249 {:length, false} ->
250 Logger.debug(
251 "Instance.scrape_nodeinfo(\"#{to_string(instance_uri)}\") ignored too long body"
252 )
253
254 nil
255
256 _ ->
257 nil
258 end
259 end
260
261 defp scrape_favicon(%URI{} = instance_uri) do
262 with true <- Pleroma.Config.get([:instances_favicons, :enabled]),
263 {_, true} <- {:reachable, reachable?(instance_uri.host)},
264 {:ok, %Tesla.Env{body: html}} <-
265 Pleroma.HTTP.get(to_string(instance_uri), [{"accept", "text/html"}], []),
266 {_, [favicon_rel | _]} when is_binary(favicon_rel) <-
267 {:parse, html |> Floki.parse_document!() |> Floki.attribute("link[rel=icon]", "href")},
268 {_, favicon} when is_binary(favicon) <-
269 {:merge, URI.merge(instance_uri, favicon_rel) |> to_string()},
270 {:length, true} <- {:length, String.length(favicon) < 255} do
271 favicon
272 else
273 {:reachable, false} ->
274 Logger.debug(
275 "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") ignored unreachable host"
276 )
277
278 nil
279
280 _ ->
281 nil
282 end
283 end
284
285 @doc """
286 Deletes all users from an instance in a background task, thus also deleting
287 all of those users' activities and notifications.
288 """
289 def delete_users_and_activities(host) when is_binary(host) do
290 BackgroundWorker.enqueue("delete_instance", %{"host" => host})
291 end
292
293 def perform(:delete_instance, host) when is_binary(host) do
294 User.Query.build(%{nickname: "@#{host}"})
295 |> Repo.chunk_stream(100, :batches)
296 |> Stream.each(fn users ->
297 users
298 |> Enum.each(fn user ->
299 User.perform(:delete, user)
300 end)
301 end)
302 |> Stream.run()
303 end
304
305 def get_by_url(url_or_host) do
306 url = host(url_or_host)
307 Repo.get_by(Instance, host: url)
308 end
309
310 def get_cached_by_url(url_or_host) do
311 url = host(url_or_host)
312
313 if url == Pleroma.Web.Endpoint.host() do
314 {:ok, local()}
315 else
316 @cachex.fetch!(:instances_cache, "instances:#{url}", fn _ ->
317 with %Instance{} = instance <- get_by_url(url) do
318 {:commit, {:ok, instance}}
319 else
320 _ -> {:ignore, nil}
321 end
322 end)
323 end
324 end
325 end