added prepare html for RichMedia.Parser
authorMaksim Pechnikov <parallel588@gmail.com>
Sun, 15 Sep 2019 11:53:58 +0000 (14:53 +0300)
committerMaksim Pechnikov <parallel588@gmail.com>
Sun, 15 Sep 2019 11:53:58 +0000 (14:53 +0300)
lib/pleroma/web/rich_media/parser.ex
mix.exs
mix.lock

index f5f9e358c23bce1996dad12a0f3e480a516cfe8d..c06b0a0f2668338415d592fde838bff42e49e056 100644 (file)
@@ -81,6 +81,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
       {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: @hackney_options)
 
       html
+      |> parse_html
       |> maybe_parse()
       |> Map.put(:url, url)
       |> clean_parsed_data()
@@ -91,6 +92,8 @@ defmodule Pleroma.Web.RichMedia.Parser do
     end
   end
 
+  defp parse_html(html), do: Floki.parse(html)
+
   defp maybe_parse(html) do
     Enum.reduce_while(parsers(), %{}, fn parser, acc ->
       case parser.parse(html, acc) do
@@ -100,7 +103,8 @@ defmodule Pleroma.Web.RichMedia.Parser do
     end)
   end
 
-  defp check_parsed_data(%{title: title} = data) when is_binary(title) and byte_size(title) > 0 do
+  defp check_parsed_data(%{title: title} = data)
+       when is_binary(title) and byte_size(title) > 0 do
     {:ok, data}
   end
 
diff --git a/mix.exs b/mix.exs
index dfa53035886ac410227665d82a1af1fa38be4048..6f952fa1205d182c0153724478f8e60d8dff6ac5 100644 (file)
--- a/mix.exs
+++ b/mix.exs
@@ -131,7 +131,7 @@ defmodule Pleroma.Mixfile do
       {:phoenix_swoosh, "~> 0.2"},
       {:gen_smtp, "~> 0.13"},
       {:websocket_client, git: "https://github.com/jeremyong/websocket_client.git", only: :test},
-      {:floki, "~> 0.20.0"},
+      {:floki, "~> 0.23.0"},
       {:ex_syslogger, github: "slashmili/ex_syslogger", tag: "1.4.0"},
       {:timex, "~> 3.5"},
       {:ueberauth, "~> 0.4"},
index 5762dae4f34c3e8d3cd435b26355b0f506b18f81..2bce00dea4090d80cd275ce48e33b9e9f223ee57 100644 (file)
--- a/mix.lock
+++ b/mix.lock
@@ -34,7 +34,7 @@
   "ex_rated": {:hex, :ex_rated, "1.3.3", "30ecbdabe91f7eaa9d37fa4e81c85ba420f371babeb9d1910adbcd79ec798d27", [:mix], [{:ex2ms, "~> 1.5", [hex: :ex2ms, repo: "hexpm", optional: false]}], "hexpm"},
   "ex_syslogger": {:git, "https://github.com/slashmili/ex_syslogger.git", "f3963399047af17e038897c69e20d552e6899e1d", [tag: "1.4.0"]},
   "excoveralls": {:hex, :excoveralls, "0.11.1", "dd677fbdd49114fdbdbf445540ec735808250d56b011077798316505064edb2c", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm"},
-  "floki": {:hex, :floki, "0.20.4", "be42ac911fece24b4c72f3b5846774b6e61b83fe685c2fc9d62093277fb3bc86", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}, {:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"},
+  "floki": {:hex, :floki, "0.23.0", "956ab6dba828c96e732454809fb0bd8d43ce0979b75f34de6322e73d4c917829", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm"},
   "gen_smtp": {:hex, :gen_smtp, "0.14.0", "39846a03522456077c6429b4badfd1d55e5e7d0fdfb65e935b7c5e38549d9202", [:rebar3], [], "hexpm"},
   "gettext": {:hex, :gettext, "0.17.0", "abe21542c831887a2b16f4c94556db9c421ab301aee417b7c4fbde7fbdbe01ec", [:mix], [], "hexpm"},
   "hackney": {:hex, :hackney, "1.15.1", "9f8f471c844b8ce395f7b6d8398139e26ddca9ebc171a8b91342ee15a19963f4", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"},