Attempt to use <title> from HTML as a fallback
[akkoma] / lib / pleroma / web / rich_media / parsers / meta_tags_parser.ex
1 defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
2 def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do
3 with elements = [_ | _] <- get_elements(html, key_name, prefix),
4 page_title = get_page_title(html),
5 meta_data =
6 Enum.reduce(elements, data, fn el, acc ->
7 attributes = normalize_attributes(el, prefix, key_name, value_name)
8
9 Map.merge(acc, attributes)
10 end)
11 |> Map.put_new(:title, page_title) do
12 {:ok, meta_data}
13 else
14 _e -> {:error, error_message}
15 end
16 end
17
18 defp get_elements(html, key_name, prefix) do
19 html |> Floki.find("meta[#{key_name}^='#{prefix}:']")
20 end
21
22 defp normalize_attributes(html_node, prefix, key_name, value_name) do
23 {_tag, attributes, _children} = html_node
24
25 data =
26 Enum.into(attributes, %{}, fn {name, value} ->
27 {name, String.trim_leading(value, "#{prefix}:")}
28 end)
29
30 %{String.to_atom(data[key_name]) => data[value_name]}
31 end
32
33 defp get_page_title(html) do
34 Floki.find(html, "title") |> Floki.text()
35 end
36 end