Fix atom leak in Rich Media Parser

author Egor Kislitsyn <egor@kislitsyn.com>

Tue, 9 Jun 2020 17:49:24 +0000 (21:49 +0400)

committer rinpatch <rinpatch@sdf.org>

Sat, 13 Jun 2020 09:08:46 +0000 (12:08 +0300)
author Egor Kislitsyn <egor@kislitsyn.com>
Tue, 9 Jun 2020 17:49:24 +0000 (21:49 +0400)
committer rinpatch <rinpatch@sdf.org>
Sat, 13 Jun 2020 09:08:46 +0000 (12:08 +0300)
diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex

index 8e37150931a9c4c3a0ab7c1ff510e226c5091e49..2c49bedb36760f23698836d988eb952845b9c982 100644 (file)
--- a/lib/pleroma/web/mastodon_api/views/status_view.ex
+++ b/lib/pleroma/web/mastodon_api/views/status_view.ex
@@ -377,8 +377,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
      page_url_data = URI.parse(page_url)
  
      page_url_data =
      page_url_data = URI.parse(page_url)
  
      page_url_data =
-      if rich_media[:url] != nil do
-        URI.merge(page_url_data, URI.parse(rich_media[:url]))
+      if is_binary(rich_media["url"]) do
+        URI.merge(page_url_data, URI.parse(rich_media["url"]))
        else
          page_url_data
        end
        else
          page_url_data
        end
@@ -386,11 +386,9 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
      page_url = page_url_data |> to_string
  
      image_url =
      page_url = page_url_data |> to_string
  
      image_url =
-      if rich_media[:image] != nil do
-        URI.merge(page_url_data, URI.parse(rich_media[:image]))
+      if is_binary(rich_media["image"]) do
+        URI.merge(page_url_data, URI.parse(rich_media["image"]))
          |> to_string
          |> to_string
-      else
-        nil
        end
  
      %{
        end
  
      %{
@@ -399,8 +397,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
        provider_url: page_url_data.scheme <> "://" <> page_url_data.host,
        url: page_url,
        image: image_url |> MediaProxy.url(),
        provider_url: page_url_data.scheme <> "://" <> page_url_data.host,
        url: page_url,
        image: image_url |> MediaProxy.url(),
-      title: rich_media[:title] || "",
-      description: rich_media[:description] || "",
+      title: rich_media["title"] || "",
+      description: rich_media["description"] || "",
        pleroma: %{
          opengraph: rich_media
        }
        pleroma: %{
          opengraph: rich_media
        }
diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex

index 9d3d7f978b10b48c9afc3960a222fb85bcfb4061..1729141e996bba5fc9de39846da505390e311f0a 100644 (file)
--- a/lib/pleroma/web/rich_media/helpers.ex
+++ b/lib/pleroma/web/rich_media/helpers.ex
@@ -9,7 +9,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
    alias Pleroma.Object
    alias Pleroma.Web.RichMedia.Parser
  
    alias Pleroma.Object
    alias Pleroma.Web.RichMedia.Parser
  
-  @spec validate_page_url(any()) :: :ok | :error
+  @spec validate_page_url(URI.t() | binary()) :: :ok | :error
    defp validate_page_url(page_url) when is_binary(page_url) do
      validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld]
  
    defp validate_page_url(page_url) when is_binary(page_url) do
      validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld]
  
@@ -18,8 +18,8 @@ defmodule Pleroma.Web.RichMedia.Helpers do
      |> parse_uri(page_url)
    end
  
      |> parse_uri(page_url)
    end
  
-  defp validate_page_url(%URI{host: host, scheme: scheme, authority: authority})
-       when scheme == "https" and not is_nil(authority) do
+  defp validate_page_url(%URI{host: host, scheme: "https", authority: authority})
+       when is_binary(authority) do
      cond do
        host in Config.get([:rich_media, :ignore_hosts], []) ->
          :error
      cond do
        host in Config.get([:rich_media, :ignore_hosts], []) ->
          :error
diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex

index 40980def8198d134496ed429a3efd494352c4c95..d9b5068b117bf81a6b5d0a6b7573d4ed2fb21300 100644 (file)
--- a/lib/pleroma/web/rich_media/parser.ex
+++ b/lib/pleroma/web/rich_media/parser.ex
@@ -91,7 +91,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
        html
        |> parse_html()
        |> maybe_parse()
        html
        |> parse_html()
        |> maybe_parse()
-      |> Map.put(:url, url)
+      |> Map.put("url", url)
        |> clean_parsed_data()
        |> check_parsed_data()
      rescue
        |> clean_parsed_data()
        |> check_parsed_data()
      rescue
@@ -111,8 +111,8 @@ defmodule Pleroma.Web.RichMedia.Parser do
      end)
    end
  
      end)
    end
  
-  defp check_parsed_data(%{title: title} = data)
-       when is_binary(title) and byte_size(title) > 0 do
+  defp check_parsed_data(%{"title" => title} = data)
+       when is_binary(title) and title != "" do
      {:ok, data}
    end
  
      {:ok, data}
    end
  
@@ -123,11 +123,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
    defp clean_parsed_data(data) do
      data
      |> Enum.reject(fn {key, val} ->
    defp clean_parsed_data(data) do
      data
      |> Enum.reject(fn {key, val} ->
-      with {:ok, _} <- Jason.encode(%{key => val}) do
-        false
-      else
-        _ -> true
-      end
+      not match?({:ok, _}, Jason.encode(%{key => val}))
      end)
      |> Map.new()
    end
      end)
      |> Map.new()
    end
diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex

index ae0f36702ef36492458e5ed1a22183077a4da7c2..2762b5902970afd06bc7e2bc0fb7a74e3f1ee47b 100644 (file)
--- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
+++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
@@ -29,19 +29,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
      {_tag, attributes, _children} = html_node
  
      data =
      {_tag, attributes, _children} = html_node
  
      data =
-      Enum.into(attributes, %{}, fn {name, value} ->
+      Map.new(attributes, fn {name, value} ->
          {name, String.trim_leading(value, "#{prefix}:")}
        end)
  
          {name, String.trim_leading(value, "#{prefix}:")}
        end)
  
-    %{String.to_atom(data[key_name]) => data[value_name]}
+    %{data[key_name] => data[value_name]}
    end
  
    end
  
-  defp maybe_put_title(%{title: _} = meta, _), do: meta
+  defp maybe_put_title(%{"title" => _} = meta, _), do: meta
  
    defp maybe_put_title(meta, html) when meta != %{} do
      case get_page_title(html) do
        "" -> meta
  
    defp maybe_put_title(meta, html) when meta != %{} do
      case get_page_title(html) do
        "" -> meta
-      title -> Map.put_new(meta, :title, title)
+      title -> Map.put_new(meta, "title", title)
      end
    end
  
      end
    end
  
diff --git a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex

index 8f32bf91b3123c16ca683b2902ba5d8c6523d3df..db8ccf15d146fac213e1e2c6a74f2fa440866b2a 100644 (file)
--- a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
+++ b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
@@ -5,7 +5,7 @@
  defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
    def parse(html, _data) do
      with elements = [_ | _] <- get_discovery_data(html),
  defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
    def parse(html, _data) do
      with elements = [_ | _] <- get_discovery_data(html),
-         {:ok, oembed_url} <- get_oembed_url(elements),
+         oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
           {:ok, oembed_data} <- get_oembed_data(oembed_url) do
        {:ok, oembed_data}
      else
           {:ok, oembed_data} <- get_oembed_data(oembed_url) do
        {:ok, oembed_data}
      else
@@ -17,19 +17,13 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
      html |> Floki.find("link[type='application/json+oembed']")
    end
  
      html |> Floki.find("link[type='application/json+oembed']")
    end
  
-  defp get_oembed_url(nodes) do
-    {"link", attributes, _children} = nodes |> hd()
-
-    {:ok, Enum.into(attributes, %{})["href"]}
+  defp get_oembed_url([{"link", attributes, _children} | _]) do
+    Enum.find_value(attributes, fn {k, v} -> if k == "href", do: v end)
    end
  
    defp get_oembed_data(url) do
    end
  
    defp get_oembed_data(url) do
-    {:ok, %Tesla.Env{body: json}} = Pleroma.HTTP.get(url, [], adapter: [pool: :media])
-
-    {:ok, data} = Jason.decode(json)
-
-    data = data |> Map.new(fn {k, v} -> {String.to_atom(k), v} end)
-
-    {:ok, data}
+    with {:ok, %Tesla.Env{body: json}} <- Pleroma.HTTP.get(url, [], adapter: [pool: :media]) do
+      Jason.decode(json)
+    end
    end
  end
    end
  end
diff --git a/test/web/rich_media/parser_test.exs b/test/web/rich_media/parser_test.exs

index e54a13bc804a43d25c38f2e92db52c8e5fd353a0..420a612c63e91d33129962b27370b9d1991f3e24 100644 (file)
--- a/test/web/rich_media/parser_test.exs
+++ b/test/web/rich_media/parser_test.exs
@@ -60,19 +60,19 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
    test "doesn't just add a title" do
      assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/non-ogp") ==
               {:error,
    test "doesn't just add a title" do
      assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/non-ogp") ==
               {:error,
-              "Found metadata was invalid or incomplete: %{url: \"http://example.com/non-ogp\"}"}
+              "Found metadata was invalid or incomplete: %{\"url\" => \"http://example.com/non-ogp\"}"}
    end
  
    test "parses ogp" do
      assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") ==
               {:ok,
                %{
    end
  
    test "parses ogp" do
      assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") ==
               {:ok,
                %{
-                image: "http://ia.media-imdb.com/images/rock.jpg",
-                title: "The Rock",
-                description:
+                "image" => "http://ia.media-imdb.com/images/rock.jpg",
+                "title" => "The Rock",
+                "description" =>
                    "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
                    "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
-                type: "video.movie",
-                url: "http://example.com/ogp"
+                "type" => "video.movie",
+                "url" => "http://example.com/ogp"
                }}
    end
  
                }}
    end
  
@@ -80,12 +80,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
      assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp-missing-title") ==
               {:ok,
                %{
      assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp-missing-title") ==
               {:ok,
                %{
-                image: "http://ia.media-imdb.com/images/rock.jpg",
-                title: "The Rock (1996)",
-                description:
+                "image" => "http://ia.media-imdb.com/images/rock.jpg",
+                "title" => "The Rock (1996)",
+                "description" =>
                    "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
                    "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
-                type: "video.movie",
-                url: "http://example.com/ogp-missing-title"
+                "type" => "video.movie",
+                "url" => "http://example.com/ogp-missing-title"
                }}
    end
  
                }}
    end
  
@@ -93,12 +93,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
      assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/twitter-card") ==
               {:ok,
                %{
      assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/twitter-card") ==
               {:ok,
                %{
-                card: "summary",
-                site: "@flickr",
-                image: "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
-                title: "Small Island Developing States Photo Submission",
-                description: "View the album on Flickr.",
-                url: "http://example.com/twitter-card"
+                "card" => "summary",
+                "site" => "@flickr",
+                "image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
+                "title" => "Small Island Developing States Photo Submission",
+                "description" => "View the album on Flickr.",
+                "url" => "http://example.com/twitter-card"
                }}
    end
  
                }}
    end
  
@@ -106,27 +106,28 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
      assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/oembed") ==
               {:ok,
                %{
      assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/oembed") ==
               {:ok,
                %{
-                author_name: "‮‭‬bees‬",
-                author_url: "https://www.flickr.com/photos/bees/",
-                cache_age: 3600,
-                flickr_type: "photo",
-                height: "768",
-                html:
+                "author_name" => "‮‭‬bees‬",
+                "author_url" => "https://www.flickr.com/photos/bees/",
+                "cache_age" => 3600,
+                "flickr_type" => "photo",
+                "height" => "768",
+                "html" =>
                    "<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by ‮‭‬bees‬, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>",
                    "<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by ‮‭‬bees‬, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>",
-                license: "All Rights Reserved",
-                license_id: 0,
-                provider_name: "Flickr",
-                provider_url: "https://www.flickr.com/",
-                thumbnail_height: 150,
-                thumbnail_url: "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
-                thumbnail_width: 150,
-                title: "Bacon Lollys",
-                type: "photo",
-                url: "http://example.com/oembed",
-                version: "1.0",
-                web_page: "https://www.flickr.com/photos/bees/2362225867/",
-                web_page_short_url: "https://flic.kr/p/4AK2sc",
-                width: "1024"
+                "license" => "All Rights Reserved",
+                "license_id" => 0,
+                "provider_name" => "Flickr",
+                "provider_url" => "https://www.flickr.com/",
+                "thumbnail_height" => 150,
+                "thumbnail_url" =>
+                  "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
+                "thumbnail_width" => 150,
+                "title" => "Bacon Lollys",
+                "type" => "photo",
+                "url" => "http://example.com/oembed",
+                "version" => "1.0",
+                "web_page" => "https://www.flickr.com/photos/bees/2362225867/",
+                "web_page_short_url" => "https://flic.kr/p/4AK2sc",
+                "width" => "1024"
                }}
    end
  
                }}
    end
  
diff --git a/test/web/rich_media/parsers/twitter_card_test.exs b/test/web/rich_media/parsers/twitter_card_test.exs

index 87c767c15ce8b279f9e78a7cdc075c5f0825cdad..847623535b4713bec2e8b8e4ef1bc7547090d05c 100644 (file)
--- a/test/web/rich_media/parsers/twitter_card_test.exs
+++ b/test/web/rich_media/parsers/twitter_card_test.exs
@@ -19,11 +19,11 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
      assert TwitterCard.parse(html, %{}) ==
               {:ok,
                %{
      assert TwitterCard.parse(html, %{}) ==
               {:ok,
                %{
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622",
-                site: nil,
-                title:
+                "app:id:googleplay" => "com.nytimes.android",
+                "app:name:googleplay" => "NYTimes",
+                "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+                "site" => nil,
+                "title" =>
                    "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
                }}
    end
                    "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
                }}
    end
@@ -36,15 +36,15 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
      assert TwitterCard.parse(html, %{}) ==
               {:ok,
                %{
      assert TwitterCard.parse(html, %{}) ==
               {:ok,
                %{
-                card: "summary_large_image",
-                description:
+                "card" => "summary_large_image",
+                "description" =>
                    "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
                    "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
-                image:
+                "image" =>
                    "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
                    "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
-                "image:alt": "",
-                title:
+                "image:alt" => "",
+                "title" =>
                    "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
                    "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
-                url:
+                "url" =>
                    "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
                }}
    end
                    "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
                }}
    end
@@ -57,19 +57,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
      assert TwitterCard.parse(html, %{}) ==
               {:ok,
                %{
      assert TwitterCard.parse(html, %{}) ==
               {:ok,
                %{
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622",
-                card: "summary_large_image",
-                description:
+                "app:id:googleplay" => "com.nytimes.android",
+                "app:name:googleplay" => "NYTimes",
+                "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+                "card" => "summary_large_image",
+                "description" =>
                    "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
                    "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
-                image:
+                "image" =>
                    "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
                    "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
-                "image:alt": "",
-                site: nil,
-                title:
+                "image:alt" => "",
+                "site" => nil,
+                "title" =>
                    "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
                    "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
-                url:
+                "url" =>
                    "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
                }}
    end
                    "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
                }}
    end
@@ -86,11 +86,11 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
      assert TwitterCard.parse(html, %{}) ==
               {:ok,
                %{
      assert TwitterCard.parse(html, %{}) ==
               {:ok,
                %{
-                site: "@atlasobscura",
-                title:
+                "site" => "@atlasobscura",
+                "title" =>
                    "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
                    "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
-                card: "summary_large_image",
-                image: image_path
+                "card" => "summary_large_image",
+                "image" => image_path
                }}
    end
  
                }}
    end
  
@@ -102,12 +102,12 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
      assert TwitterCard.parse(html, %{}) ==
               {:ok,
                %{
      assert TwitterCard.parse(html, %{}) ==
               {:ok,
                %{
-                site: nil,
-                title:
+                "site" => nil,
+                "title" =>
                    "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
                    "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622"
+                "app:id:googleplay" => "com.nytimes.android",
+                "app:name:googleplay" => "NYTimes",
+                "app:url:googleplay" => "nytimes://reader/id/100000006583622"
                }}
    end
  end
                }}
    end
  end
author	Egor Kislitsyn <egor@kislitsyn.com>
	Tue, 9 Jun 2020 17:49:24 +0000 (21:49 +0400)
committer	rinpatch <rinpatch@sdf.org>
	Sat, 13 Jun 2020 09:08:46 +0000 (12:08 +0300)
lib/pleroma/web/mastodon_api/views/status_view.ex		patch \| blob \| history
lib/pleroma/web/rich_media/helpers.ex		patch \| blob \| history
lib/pleroma/web/rich_media/parser.ex		patch \| blob \| history
lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex		patch \| blob \| history
lib/pleroma/web/rich_media/parsers/oembed_parser.ex		patch \| blob \| history
test/web/rich_media/parser_test.exs		patch \| blob \| history
test/web/rich_media/parsers/twitter_card_test.exs		patch \| blob \| history