projects
/
akkoma
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
parsers configurable
[akkoma]
/
lib
/
pleroma
/
web
/
rich_media
/
parser.ex
diff --git
a/lib/pleroma/web/rich_media/parser.ex
b/lib/pleroma/web/rich_media/parser.ex
index 874e8c5e68eea6313fdbce0d4c9eec4a2ece8073..0d25233388a67ff977bbee8622bab2393ab944e9 100644
(file)
--- a/
lib/pleroma/web/rich_media/parser.ex
+++ b/
lib/pleroma/web/rich_media/parser.ex
@@
-3,15
+3,20
@@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser do
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser do
- @parsers [
- Pleroma.Web.RichMedia.Parsers.OGP,
- Pleroma.Web.RichMedia.Parsers.TwitterCard,
- Pleroma.Web.RichMedia.Parsers.OEmbed
+ @hackney_options [
+ pool: :media,
+ recv_timeout: 2_000,
+ max_body: 2_000_000,
+ with_body: true
]
]
+ defp parsers do
+ Pleroma.Config.get([:rich_media, :parsers])
+ end
+
def parse(nil), do: {:error, "No URL provided"}
def parse(nil), do: {:error, "No URL provided"}
- if
Mix.env(
) == :test do
+ if
Pleroma.Config.get(:env
) == :test do
def parse(url), do: parse_url(url)
else
def parse(url) do
def parse(url), do: parse_url(url)
else
def parse(url) do
@@
-28,9
+33,12
@@
defmodule Pleroma.Web.RichMedia.Parser do
defp parse_url(url) do
try do
defp parse_url(url) do
try do
- {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [],
pool: :media
)
+ {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [],
adapter: @hackney_options
)
- html |> maybe_parse() |> get_parsed_data()
+ html
+ |> maybe_parse()
+ |> clean_parsed_data()
+ |> check_parsed_data()
rescue
e ->
{:error, "Parsing error: #{inspect(e)}"}
rescue
e ->
{:error, "Parsing error: #{inspect(e)}"}
@@
-38,7
+46,7
@@
defmodule Pleroma.Web.RichMedia.Parser do
end
defp maybe_parse(html) do
end
defp maybe_parse(html) do
- Enum.reduce_while(
@parsers
, %{}, fn parser, acc ->
+ Enum.reduce_while(
parsers()
, %{}, fn parser, acc ->
case parser.parse(html, acc) do
{:ok, data} -> {:halt, data}
{:error, _msg} -> {:cont, acc}
case parser.parse(html, acc) do
{:ok, data} -> {:halt, data}
{:error, _msg} -> {:cont, acc}
@@
-46,11
+54,23
@@
defmodule Pleroma.Web.RichMedia.Parser do
end)
end
end)
end
- defp
get
_parsed_data(%{title: title} = data) when is_binary(title) and byte_size(title) > 0 do
+ defp
check
_parsed_data(%{title: title} = data) when is_binary(title) and byte_size(title) > 0 do
{:ok, data}
end
{:ok, data}
end
- defp
get
_parsed_data(data) do
+ defp
check
_parsed_data(data) do
{:error, "Found metadata was invalid or incomplete: #{inspect(data)}"}
end
{:error, "Found metadata was invalid or incomplete: #{inspect(data)}"}
end
+
+ defp clean_parsed_data(data) do
+ data
+ |> Enum.reject(fn {key, val} ->
+ with {:ok, _} <- Jason.encode(%{key => val}) do
+ false
+ else
+ _ -> true
+ end
+ end)
+ |> Map.new()
+ end
end
end