5ee408f21ea77e7a4dafbd12f4e5bf8b2b601952
[akkoma] / lib / pleroma / instances / instance.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.Instances.Instance do
6 @moduledoc "Instance."
7
8 @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
9
10 alias Pleroma.Instances
11 alias Pleroma.Instances.Instance
12 alias Pleroma.Repo
13 alias Pleroma.User
14 alias Pleroma.Workers.BackgroundWorker
15
16 use Ecto.Schema
17
18 import Ecto.Query
19 import Ecto.Changeset
20
21 require Logger
22
23 schema "instances" do
24 field(:host, :string)
25 field(:unreachable_since, :naive_datetime_usec)
26 field(:favicon, :string)
27 field(:metadata_updated_at, :naive_datetime)
28 field(:nodeinfo, :map, default: %{})
29
30 timestamps()
31 end
32
33 defdelegate host(url_or_host), to: Instances
34
35 def changeset(struct, params \\ %{}) do
36 struct
37 |> cast(params, [:host, :unreachable_since, :favicon, :nodeinfo, :metadata_updated_at])
38 |> validate_required([:host])
39 |> unique_constraint(:host)
40 end
41
42 def filter_reachable([]), do: %{}
43
44 def filter_reachable(urls_or_hosts) when is_list(urls_or_hosts) do
45 hosts =
46 urls_or_hosts
47 |> Enum.map(&(&1 && host(&1)))
48 |> Enum.filter(&(to_string(&1) != ""))
49
50 unreachable_since_by_host =
51 Repo.all(
52 from(i in Instance,
53 where: i.host in ^hosts,
54 select: {i.host, i.unreachable_since}
55 )
56 )
57 |> Map.new(& &1)
58
59 reachability_datetime_threshold = Instances.reachability_datetime_threshold()
60
61 for entry <- Enum.filter(urls_or_hosts, &is_binary/1) do
62 host = host(entry)
63 unreachable_since = unreachable_since_by_host[host]
64
65 if !unreachable_since ||
66 NaiveDateTime.compare(unreachable_since, reachability_datetime_threshold) == :gt do
67 {entry, unreachable_since}
68 end
69 end
70 |> Enum.filter(& &1)
71 |> Map.new(& &1)
72 end
73
74 def reachable?(url_or_host) when is_binary(url_or_host) do
75 !Repo.one(
76 from(i in Instance,
77 where:
78 i.host == ^host(url_or_host) and
79 i.unreachable_since <= ^Instances.reachability_datetime_threshold(),
80 select: true
81 )
82 )
83 end
84
85 def reachable?(url_or_host) when is_binary(url_or_host), do: true
86
87 def set_reachable(url_or_host) when is_binary(url_or_host) do
88 with host <- host(url_or_host),
89 %Instance{} = existing_record <- Repo.get_by(Instance, %{host: host}) do
90 {:ok, _instance} =
91 existing_record
92 |> changeset(%{unreachable_since: nil})
93 |> Repo.update()
94 end
95 end
96
97 def set_reachable(_), do: {:error, nil}
98
99 def set_unreachable(url_or_host, unreachable_since \\ nil)
100
101 def set_unreachable(url_or_host, unreachable_since) when is_binary(url_or_host) do
102 unreachable_since = parse_datetime(unreachable_since) || NaiveDateTime.utc_now()
103 host = host(url_or_host)
104 existing_record = Repo.get_by(Instance, %{host: host})
105
106 changes = %{unreachable_since: unreachable_since}
107
108 cond do
109 is_nil(existing_record) ->
110 %Instance{}
111 |> changeset(Map.put(changes, :host, host))
112 |> Repo.insert()
113
114 existing_record.unreachable_since &&
115 NaiveDateTime.compare(existing_record.unreachable_since, unreachable_since) != :gt ->
116 {:ok, existing_record}
117
118 true ->
119 existing_record
120 |> changeset(changes)
121 |> Repo.update()
122 end
123 end
124
125 def set_unreachable(_, _), do: {:error, nil}
126
127 def get_consistently_unreachable do
128 reachability_datetime_threshold = Instances.reachability_datetime_threshold()
129
130 from(i in Instance,
131 where: ^reachability_datetime_threshold > i.unreachable_since,
132 order_by: i.unreachable_since,
133 select: {i.host, i.unreachable_since}
134 )
135 |> Repo.all()
136 end
137
138 defp parse_datetime(datetime) when is_binary(datetime) do
139 NaiveDateTime.from_iso8601(datetime)
140 end
141
142 defp parse_datetime(datetime), do: datetime
143
144 def needs_update(nil), do: true
145
146 def needs_update(%Instance{metadata_updated_at: nil}), do: true
147
148 def needs_update(%Instance{metadata_updated_at: metadata_updated_at}) do
149 now = NaiveDateTime.utc_now()
150 NaiveDateTime.diff(now, metadata_updated_at) > 86_400
151 end
152
153 def local do
154 %Instance{
155 host: Pleroma.Web.Endpoint.host(),
156 favicon: Pleroma.Web.Endpoint.url() <> "/favicon.png",
157 nodeinfo: Pleroma.Web.Nodeinfo.NodeinfoController.raw_nodeinfo()
158 }
159 end
160
161 def update_metadata(%URI{host: host} = uri) do
162 Logger.info("Checking metadata for #{host}")
163 existing_record = Repo.get_by(Instance, %{host: host})
164
165 if reachable?(host) do
166 do_update_metadata(uri, existing_record)
167 else
168 {:discard, :unreachable}
169 end
170 end
171
172 defp do_update_metadata(%URI{host: host} = uri, existing_record) do
173 if existing_record do
174 if needs_update(existing_record) do
175 Logger.info("Updating metadata for #{host}")
176 favicon = scrape_favicon(uri)
177 nodeinfo = scrape_nodeinfo(uri)
178
179 existing_record
180 |> changeset(%{
181 host: host,
182 favicon: favicon,
183 nodeinfo: nodeinfo,
184 metadata_updated_at: NaiveDateTime.utc_now()
185 })
186 |> Repo.update()
187 else
188 {:discard, "Does not require update"}
189 end
190 else
191 favicon = scrape_favicon(uri)
192 nodeinfo = scrape_nodeinfo(uri)
193
194 Logger.info("Creating metadata for #{host}")
195
196 {:ok, instance} =
197 %Instance{}
198 |> changeset(%{
199 host: host,
200 favicon: favicon,
201 nodeinfo: nodeinfo,
202 metadata_updated_at: NaiveDateTime.utc_now()
203 })
204 |> Repo.insert()
205 end
206 end
207
208 def get_favicon(%URI{host: host}) do
209 existing_record = Repo.get_by(Instance, %{host: host})
210
211 if existing_record do
212 existing_record.favicon
213 else
214 nil
215 end
216 end
217
218 defp scrape_nodeinfo(%URI{} = instance_uri) do
219 with true <- Pleroma.Config.get([:instances_nodeinfo, :enabled]),
220 {_, true} <- {:reachable, reachable?(instance_uri.host)},
221 {:ok, %Tesla.Env{status: 200, body: body}} <-
222 Tesla.get(
223 "https://#{instance_uri.host}/.well-known/nodeinfo",
224 headers: [{"Accept", "application/json"}]
225 ),
226 {:ok, json} <- Jason.decode(body),
227 {:ok, %{"links" => links}} <- {:ok, json},
228 {:ok, %{"href" => href}} <-
229 {:ok,
230 Enum.find(links, &(&1["rel"] == "http://nodeinfo.diaspora.software/ns/schema/2.0"))},
231 {:ok, %Tesla.Env{body: data}} <-
232 Pleroma.HTTP.get(href, [{"accept", "application/json"}], []),
233 {:length, true} <- {:length, String.length(data) < 50_000},
234 {:ok, nodeinfo} <- Jason.decode(data) do
235 nodeinfo
236 else
237 {:reachable, false} ->
238 Logger.debug(
239 "Instance.scrape_nodeinfo(\"#{to_string(instance_uri)}\") ignored unreachable host"
240 )
241
242 nil
243
244 {:length, false} ->
245 Logger.debug(
246 "Instance.scrape_nodeinfo(\"#{to_string(instance_uri)}\") ignored too long body"
247 )
248
249 nil
250
251 _ ->
252 nil
253 end
254 end
255
256 defp scrape_favicon(%URI{} = instance_uri) do
257 with true <- Pleroma.Config.get([:instances_favicons, :enabled]),
258 {_, true} <- {:reachable, reachable?(instance_uri.host)},
259 {:ok, %Tesla.Env{body: html}} <-
260 Pleroma.HTTP.get(to_string(instance_uri), [{"accept", "text/html"}], []),
261 {_, [favicon_rel | _]} when is_binary(favicon_rel) <-
262 {:parse, html |> Floki.parse_document!() |> Floki.attribute("link[rel=icon]", "href")},
263 {_, favicon} when is_binary(favicon) <-
264 {:merge, URI.merge(instance_uri, favicon_rel) |> to_string()},
265 {:length, true} <- {:length, String.length(favicon) < 255} do
266 favicon
267 else
268 {:reachable, false} ->
269 Logger.debug(
270 "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") ignored unreachable host"
271 )
272
273 nil
274
275 _ ->
276 nil
277 end
278 end
279
280 @doc """
281 Deletes all users from an instance in a background task, thus also deleting
282 all of those users' activities and notifications.
283 """
284 def delete_users_and_activities(host) when is_binary(host) do
285 BackgroundWorker.enqueue("delete_instance", %{"host" => host})
286 end
287
288 def perform(:delete_instance, host) when is_binary(host) do
289 User.Query.build(%{nickname: "@#{host}"})
290 |> Repo.chunk_stream(100, :batches)
291 |> Stream.each(fn users ->
292 users
293 |> Enum.each(fn user ->
294 User.perform(:delete, user)
295 end)
296 end)
297 |> Stream.run()
298 end
299
300 def get_by_url(url_or_host) do
301 url = host(url_or_host)
302 Repo.get_by(Instance, host: url)
303 end
304
305 def get_cached_by_url(url_or_host) do
306 url = host(url_or_host)
307
308 if url == Pleroma.Web.Endpoint.host() do
309 {:ok, local()}
310 else
311 @cachex.fetch!(:instances_cache, "instances:#{url}", fn _ ->
312 with %Instance{} = instance <- get_by_url(url) do
313 {:commit, {:ok, instance}}
314 else
315 _ -> {:ignore, nil}
316 end
317 end)
318 end
319 end
320 end