instance.reachable?: Limit to binary input
[akkoma] / lib / pleroma / instances / instance.ex
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.Instances.Instance do
6 @moduledoc "Instance."
7
8 alias Pleroma.Instances
9 alias Pleroma.Instances.Instance
10 alias Pleroma.Repo
11
12 use Ecto.Schema
13
14 import Ecto.Query
15 import Ecto.Changeset
16
17 require Logger
18
19 schema "instances" do
20 field(:host, :string)
21 field(:unreachable_since, :naive_datetime_usec)
22 field(:favicon, :string)
23 field(:favicon_updated_at, :naive_datetime)
24
25 timestamps()
26 end
27
28 defdelegate host(url_or_host), to: Instances
29
30 def changeset(struct, params \\ %{}) do
31 struct
32 |> cast(params, [:host, :unreachable_since, :favicon, :favicon_updated_at])
33 |> validate_required([:host])
34 |> unique_constraint(:host)
35 end
36
37 def filter_reachable([]), do: %{}
38
39 def filter_reachable(urls_or_hosts) when is_list(urls_or_hosts) do
40 hosts =
41 urls_or_hosts
42 |> Enum.map(&(&1 && host(&1)))
43 |> Enum.filter(&(to_string(&1) != ""))
44
45 unreachable_since_by_host =
46 Repo.all(
47 from(i in Instance,
48 where: i.host in ^hosts,
49 select: {i.host, i.unreachable_since}
50 )
51 )
52 |> Map.new(& &1)
53
54 reachability_datetime_threshold = Instances.reachability_datetime_threshold()
55
56 for entry <- Enum.filter(urls_or_hosts, &is_binary/1) do
57 host = host(entry)
58 unreachable_since = unreachable_since_by_host[host]
59
60 if !unreachable_since ||
61 NaiveDateTime.compare(unreachable_since, reachability_datetime_threshold) == :gt do
62 {entry, unreachable_since}
63 end
64 end
65 |> Enum.filter(& &1)
66 |> Map.new(& &1)
67 end
68
69 def reachable?(url_or_host) when is_binary(url_or_host) do
70 !Repo.one(
71 from(i in Instance,
72 where:
73 i.host == ^host(url_or_host) and
74 i.unreachable_since <= ^Instances.reachability_datetime_threshold(),
75 select: true
76 )
77 )
78 end
79
80 def reachable?(url_or_host) when is_binary(url_or_host), do: true
81
82 def set_reachable(url_or_host) when is_binary(url_or_host) do
83 with host <- host(url_or_host),
84 %Instance{} = existing_record <- Repo.get_by(Instance, %{host: host}) do
85 {:ok, _instance} =
86 existing_record
87 |> changeset(%{unreachable_since: nil})
88 |> Repo.update()
89 end
90 end
91
92 def set_reachable(_), do: {:error, nil}
93
94 def set_unreachable(url_or_host, unreachable_since \\ nil)
95
96 def set_unreachable(url_or_host, unreachable_since) when is_binary(url_or_host) do
97 unreachable_since = parse_datetime(unreachable_since) || NaiveDateTime.utc_now()
98 host = host(url_or_host)
99 existing_record = Repo.get_by(Instance, %{host: host})
100
101 changes = %{unreachable_since: unreachable_since}
102
103 cond do
104 is_nil(existing_record) ->
105 %Instance{}
106 |> changeset(Map.put(changes, :host, host))
107 |> Repo.insert()
108
109 existing_record.unreachable_since &&
110 NaiveDateTime.compare(existing_record.unreachable_since, unreachable_since) != :gt ->
111 {:ok, existing_record}
112
113 true ->
114 existing_record
115 |> changeset(changes)
116 |> Repo.update()
117 end
118 end
119
120 def set_unreachable(_, _), do: {:error, nil}
121
122 def get_consistently_unreachable do
123 reachability_datetime_threshold = Instances.reachability_datetime_threshold()
124
125 from(i in Instance,
126 where: ^reachability_datetime_threshold > i.unreachable_since,
127 order_by: i.unreachable_since,
128 select: {i.host, i.unreachable_since}
129 )
130 |> Repo.all()
131 end
132
133 defp parse_datetime(datetime) when is_binary(datetime) do
134 NaiveDateTime.from_iso8601(datetime)
135 end
136
137 defp parse_datetime(datetime), do: datetime
138
139 def get_or_update_favicon(%URI{host: host} = instance_uri) do
140 existing_record = Repo.get_by(Instance, %{host: host})
141 now = NaiveDateTime.utc_now()
142
143 if existing_record && existing_record.favicon_updated_at &&
144 NaiveDateTime.diff(now, existing_record.favicon_updated_at) < 86_400 do
145 existing_record.favicon
146 else
147 favicon = scrape_favicon(instance_uri)
148
149 if existing_record do
150 existing_record
151 |> changeset(%{favicon: favicon, favicon_updated_at: now})
152 |> Repo.update()
153 else
154 %Instance{}
155 |> changeset(%{host: host, favicon: favicon, favicon_updated_at: now})
156 |> Repo.insert()
157 end
158
159 favicon
160 end
161 rescue
162 e ->
163 Logger.warn("Instance.get_or_update_favicon(\"#{host}\") error: #{inspect(e)}")
164 nil
165 end
166
167 defp scrape_favicon(%URI{} = instance_uri) do
168 try do
169 with {_, true} <- {:reachable, reachable?(instance_uri.host)},
170 {:ok, %Tesla.Env{body: html}} <-
171 Pleroma.HTTP.get(to_string(instance_uri), [{"accept", "text/html"}], pool: :media),
172 {_, [favicon_rel | _]} when is_binary(favicon_rel) <-
173 {:parse,
174 html |> Floki.parse_document!() |> Floki.attribute("link[rel=icon]", "href")},
175 {_, favicon} when is_binary(favicon) <-
176 {:merge, URI.merge(instance_uri, favicon_rel) |> to_string()} do
177 favicon
178 else
179 {:reachable, false} ->
180 Logger.debug(
181 "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") ignored unreachable host"
182 )
183
184 nil
185
186 _ ->
187 nil
188 end
189 rescue
190 e ->
191 Logger.warn(
192 "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") error: #{inspect(e)}"
193 )
194
195 nil
196 end
197 end
198 end