27dbf7661a0fcd75033f09356109017e3b410e51
[akkoma] / lib / pleroma / instances / instance.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.Instances.Instance do
6 @moduledoc "Instance."
7
8 @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
9
10 alias Pleroma.Instances
11 alias Pleroma.Instances.Instance
12 alias Pleroma.Repo
13 alias Pleroma.User
14 alias Pleroma.Workers.BackgroundWorker
15
16 use Ecto.Schema
17
18 import Ecto.Query
19 import Ecto.Changeset
20
21 require Logger
22
23 schema "instances" do
24 field(:host, :string)
25 field(:unreachable_since, :naive_datetime_usec)
26 field(:favicon, :string)
27 field(:metadata_updated_at, :naive_datetime)
28 field(:nodeinfo, :map, default: %{})
29
30 timestamps()
31 end
32
33 defdelegate host(url_or_host), to: Instances
34
35 def changeset(struct, params \\ %{}) do
36 struct
37 |> cast(params, [:host, :unreachable_since, :favicon, :nodeinfo, :metadata_updated_at])
38 |> validate_required([:host])
39 |> unique_constraint(:host)
40 end
41
42 def filter_reachable([]), do: %{}
43
44 def filter_reachable(urls_or_hosts) when is_list(urls_or_hosts) do
45 hosts =
46 urls_or_hosts
47 |> Enum.map(&(&1 && host(&1)))
48 |> Enum.filter(&(to_string(&1) != ""))
49
50 unreachable_since_by_host =
51 Repo.all(
52 from(i in Instance,
53 where: i.host in ^hosts,
54 select: {i.host, i.unreachable_since}
55 )
56 )
57 |> Map.new(& &1)
58
59 reachability_datetime_threshold = Instances.reachability_datetime_threshold()
60
61 for entry <- Enum.filter(urls_or_hosts, &is_binary/1) do
62 host = host(entry)
63 unreachable_since = unreachable_since_by_host[host]
64
65 if !unreachable_since ||
66 NaiveDateTime.compare(unreachable_since, reachability_datetime_threshold) == :gt do
67 {entry, unreachable_since}
68 end
69 end
70 |> Enum.filter(& &1)
71 |> Map.new(& &1)
72 end
73
74 def reachable?(url_or_host) when is_binary(url_or_host) do
75 !Repo.one(
76 from(i in Instance,
77 where:
78 i.host == ^host(url_or_host) and
79 i.unreachable_since <= ^Instances.reachability_datetime_threshold(),
80 select: true
81 )
82 )
83 end
84
85 def reachable?(url_or_host) when is_binary(url_or_host), do: true
86
87 def set_reachable(url_or_host) when is_binary(url_or_host) do
88 with host <- host(url_or_host),
89 %Instance{} = existing_record <- Repo.get_by(Instance, %{host: host}) do
90 {:ok, _instance} =
91 existing_record
92 |> changeset(%{unreachable_since: nil})
93 |> Repo.update()
94 end
95 end
96
97 def set_reachable(_), do: {:error, nil}
98
99 def set_unreachable(url_or_host, unreachable_since \\ nil)
100
101 def set_unreachable(url_or_host, unreachable_since) when is_binary(url_or_host) do
102 unreachable_since = parse_datetime(unreachable_since) || NaiveDateTime.utc_now()
103 host = host(url_or_host)
104 existing_record = Repo.get_by(Instance, %{host: host})
105
106 changes = %{unreachable_since: unreachable_since}
107
108 cond do
109 is_nil(existing_record) ->
110 %Instance{}
111 |> changeset(Map.put(changes, :host, host))
112 |> Repo.insert()
113
114 existing_record.unreachable_since &&
115 NaiveDateTime.compare(existing_record.unreachable_since, unreachable_since) != :gt ->
116 {:ok, existing_record}
117
118 true ->
119 existing_record
120 |> changeset(changes)
121 |> Repo.update()
122 end
123 end
124
125 def set_unreachable(_, _), do: {:error, nil}
126
127 def get_consistently_unreachable do
128 reachability_datetime_threshold = Instances.reachability_datetime_threshold()
129
130 from(i in Instance,
131 where: ^reachability_datetime_threshold > i.unreachable_since,
132 order_by: i.unreachable_since,
133 select: {i.host, i.unreachable_since}
134 )
135 |> Repo.all()
136 end
137
138 defp parse_datetime(datetime) when is_binary(datetime) do
139 NaiveDateTime.from_iso8601(datetime)
140 end
141
142 defp parse_datetime(datetime), do: datetime
143
144 def needs_update(nil), do: true
145
146 def needs_update(%Instance{metadata_updated_at: nil}), do: true
147
148 def needs_update(%Instance{metadata_updated_at: metadata_updated_at}) do
149 now = NaiveDateTime.utc_now()
150 NaiveDateTime.diff(now, metadata_updated_at) > 86_400
151 end
152
153 def local do
154 %Instance{
155 host: Pleroma.Web.Endpoint.host(),
156 favicon: Pleroma.Web.Endpoint.url() <> "/favicon.png",
157 nodeinfo: Pleroma.Web.Nodeinfo.NodeinfoController.raw_nodeinfo()
158 }
159 end
160
161 def update_metadata(%URI{host: host} = uri) do
162 Logger.debug("Checking metadata for #{host}")
163 existing_record = Repo.get_by(Instance, %{host: host})
164
165 if reachable?(host) do
166 do_update_metadata(uri, existing_record)
167 else
168 {:discard, :unreachable}
169 end
170 end
171
172 defp do_update_metadata(%URI{host: host} = uri, existing_record) do
173 if existing_record do
174 if needs_update(existing_record) do
175 Logger.info("Updating metadata for #{host}")
176 favicon = scrape_favicon(uri)
177 nodeinfo = scrape_nodeinfo(uri)
178
179 existing_record
180 |> changeset(%{
181 host: host,
182 favicon: favicon,
183 nodeinfo: nodeinfo,
184 metadata_updated_at: NaiveDateTime.utc_now()
185 })
186 |> Repo.update()
187 else
188 {:discard, "Does not require update"}
189 end
190 else
191 favicon = scrape_favicon(uri)
192 nodeinfo = scrape_nodeinfo(uri)
193
194 Logger.info("Creating metadata for #{host}")
195
196 %Instance{}
197 |> changeset(%{
198 host: host,
199 favicon: favicon,
200 nodeinfo: nodeinfo,
201 metadata_updated_at: NaiveDateTime.utc_now()
202 })
203 |> Repo.insert()
204 end
205 end
206
207 def get_favicon(%URI{host: host}) do
208 existing_record = Repo.get_by(Instance, %{host: host})
209
210 if existing_record do
211 existing_record.favicon
212 else
213 nil
214 end
215 end
216
217 defp scrape_nodeinfo(%URI{} = instance_uri) do
218 with true <- Pleroma.Config.get([:instances_nodeinfo, :enabled]),
219 {_, true} <- {:reachable, reachable?(instance_uri.host)},
220 {:ok, %Tesla.Env{status: 200, body: body}} <-
221 Tesla.get(
222 "https://#{instance_uri.host}/.well-known/nodeinfo",
223 headers: [{"Accept", "application/json"}]
224 ),
225 {:ok, json} <- Jason.decode(body),
226 {:ok, %{"links" => links}} <- {:ok, json},
227 {:ok, %{"href" => href}} <-
228 {:ok,
229 Enum.find(links, &(&1["rel"] == "http://nodeinfo.diaspora.software/ns/schema/2.0"))},
230 {:ok, %Tesla.Env{body: data}} <-
231 Pleroma.HTTP.get(href, [{"accept", "application/json"}], []),
232 {:length, true} <- {:length, String.length(data) < 50_000},
233 {:ok, nodeinfo} <- Jason.decode(data) do
234 nodeinfo
235 else
236 {:reachable, false} ->
237 Logger.debug(
238 "Instance.scrape_nodeinfo(\"#{to_string(instance_uri)}\") ignored unreachable host"
239 )
240
241 nil
242
243 {:length, false} ->
244 Logger.debug(
245 "Instance.scrape_nodeinfo(\"#{to_string(instance_uri)}\") ignored too long body"
246 )
247
248 nil
249
250 _ ->
251 nil
252 end
253 end
254
255 defp scrape_favicon(%URI{} = instance_uri) do
256 with true <- Pleroma.Config.get([:instances_favicons, :enabled]),
257 {_, true} <- {:reachable, reachable?(instance_uri.host)},
258 {:ok, %Tesla.Env{body: html}} <-
259 Pleroma.HTTP.get(to_string(instance_uri), [{"accept", "text/html"}], []),
260 {_, [favicon_rel | _]} when is_binary(favicon_rel) <-
261 {:parse, html |> Floki.parse_document!() |> Floki.attribute("link[rel=icon]", "href")},
262 {_, favicon} when is_binary(favicon) <-
263 {:merge, URI.merge(instance_uri, favicon_rel) |> to_string()},
264 {:length, true} <- {:length, String.length(favicon) < 255} do
265 favicon
266 else
267 {:reachable, false} ->
268 Logger.debug(
269 "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") ignored unreachable host"
270 )
271
272 nil
273
274 _ ->
275 nil
276 end
277 end
278
279 @doc """
280 Deletes all users from an instance in a background task, thus also deleting
281 all of those users' activities and notifications.
282 """
283 def delete_users_and_activities(host) when is_binary(host) do
284 BackgroundWorker.enqueue("delete_instance", %{"host" => host})
285 end
286
287 def perform(:delete_instance, host) when is_binary(host) do
288 User.Query.build(%{nickname: "@#{host}"})
289 |> Repo.chunk_stream(100, :batches)
290 |> Stream.each(fn users ->
291 users
292 |> Enum.each(fn user ->
293 User.perform(:delete, user)
294 end)
295 end)
296 |> Stream.run()
297 end
298
299 def get_by_url(url_or_host) do
300 url = host(url_or_host)
301 Repo.get_by(Instance, host: url)
302 end
303
304 def get_cached_by_url(url_or_host) do
305 url = host(url_or_host)
306
307 if url == Pleroma.Web.Endpoint.host() do
308 {:ok, local()}
309 else
310 @cachex.fetch!(:instances_cache, "instances:#{url}", fn _ ->
311 with %Instance{} = instance <- get_by_url(url) do
312 {:commit, {:ok, instance}}
313 else
314 _ -> {:ignore, nil}
315 end
316 end)
317 end
318 end
319 end