Fix atom leak in Rich Media Parser
authorEgor Kislitsyn <egor@kislitsyn.com>
Tue, 9 Jun 2020 17:49:24 +0000 (21:49 +0400)
committerrinpatch <rinpatch@sdf.org>
Sat, 13 Jun 2020 09:08:46 +0000 (12:08 +0300)
lib/pleroma/web/mastodon_api/views/status_view.ex
lib/pleroma/web/rich_media/helpers.ex
lib/pleroma/web/rich_media/parser.ex
lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
lib/pleroma/web/rich_media/parsers/oembed_parser.ex
test/web/rich_media/parser_test.exs
test/web/rich_media/parsers/twitter_card_test.exs

index 8e37150931a9c4c3a0ab7c1ff510e226c5091e49..2c49bedb36760f23698836d988eb952845b9c982 100644 (file)
@@ -377,8 +377,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
     page_url_data = URI.parse(page_url)
 
     page_url_data =
     page_url_data = URI.parse(page_url)
 
     page_url_data =
-      if rich_media[:url] != nil do
-        URI.merge(page_url_data, URI.parse(rich_media[:url]))
+      if is_binary(rich_media["url"]) do
+        URI.merge(page_url_data, URI.parse(rich_media["url"]))
       else
         page_url_data
       end
       else
         page_url_data
       end
@@ -386,11 +386,9 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
     page_url = page_url_data |> to_string
 
     image_url =
     page_url = page_url_data |> to_string
 
     image_url =
-      if rich_media[:image] != nil do
-        URI.merge(page_url_data, URI.parse(rich_media[:image]))
+      if is_binary(rich_media["image"]) do
+        URI.merge(page_url_data, URI.parse(rich_media["image"]))
         |> to_string
         |> to_string
-      else
-        nil
       end
 
     %{
       end
 
     %{
@@ -399,8 +397,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
       provider_url: page_url_data.scheme <> "://" <> page_url_data.host,
       url: page_url,
       image: image_url |> MediaProxy.url(),
       provider_url: page_url_data.scheme <> "://" <> page_url_data.host,
       url: page_url,
       image: image_url |> MediaProxy.url(),
-      title: rich_media[:title] || "",
-      description: rich_media[:description] || "",
+      title: rich_media["title"] || "",
+      description: rich_media["description"] || "",
       pleroma: %{
         opengraph: rich_media
       }
       pleroma: %{
         opengraph: rich_media
       }
index 9d3d7f978b10b48c9afc3960a222fb85bcfb4061..1729141e996bba5fc9de39846da505390e311f0a 100644 (file)
@@ -9,7 +9,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
   alias Pleroma.Object
   alias Pleroma.Web.RichMedia.Parser
 
   alias Pleroma.Object
   alias Pleroma.Web.RichMedia.Parser
 
-  @spec validate_page_url(any()) :: :ok | :error
+  @spec validate_page_url(URI.t() | binary()) :: :ok | :error
   defp validate_page_url(page_url) when is_binary(page_url) do
     validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld]
 
   defp validate_page_url(page_url) when is_binary(page_url) do
     validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld]
 
@@ -18,8 +18,8 @@ defmodule Pleroma.Web.RichMedia.Helpers do
     |> parse_uri(page_url)
   end
 
     |> parse_uri(page_url)
   end
 
-  defp validate_page_url(%URI{host: host, scheme: scheme, authority: authority})
-       when scheme == "https" and not is_nil(authority) do
+  defp validate_page_url(%URI{host: host, scheme: "https", authority: authority})
+       when is_binary(authority) do
     cond do
       host in Config.get([:rich_media, :ignore_hosts], []) ->
         :error
     cond do
       host in Config.get([:rich_media, :ignore_hosts], []) ->
         :error
index 40980def8198d134496ed429a3efd494352c4c95..d9b5068b117bf81a6b5d0a6b7573d4ed2fb21300 100644 (file)
@@ -91,7 +91,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
       html
       |> parse_html()
       |> maybe_parse()
       html
       |> parse_html()
       |> maybe_parse()
-      |> Map.put(:url, url)
+      |> Map.put("url", url)
       |> clean_parsed_data()
       |> check_parsed_data()
     rescue
       |> clean_parsed_data()
       |> check_parsed_data()
     rescue
@@ -111,8 +111,8 @@ defmodule Pleroma.Web.RichMedia.Parser do
     end)
   end
 
     end)
   end
 
-  defp check_parsed_data(%{title: title} = data)
-       when is_binary(title) and byte_size(title) > 0 do
+  defp check_parsed_data(%{"title" => title} = data)
+       when is_binary(title) and title != "" do
     {:ok, data}
   end
 
     {:ok, data}
   end
 
@@ -123,11 +123,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
   defp clean_parsed_data(data) do
     data
     |> Enum.reject(fn {key, val} ->
   defp clean_parsed_data(data) do
     data
     |> Enum.reject(fn {key, val} ->
-      with {:ok, _} <- Jason.encode(%{key => val}) do
-        false
-      else
-        _ -> true
-      end
+      not match?({:ok, _}, Jason.encode(%{key => val}))
     end)
     |> Map.new()
   end
     end)
     |> Map.new()
   end
index ae0f36702ef36492458e5ed1a22183077a4da7c2..2762b5902970afd06bc7e2bc0fb7a74e3f1ee47b 100644 (file)
@@ -29,19 +29,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
     {_tag, attributes, _children} = html_node
 
     data =
     {_tag, attributes, _children} = html_node
 
     data =
-      Enum.into(attributes, %{}, fn {name, value} ->
+      Map.new(attributes, fn {name, value} ->
         {name, String.trim_leading(value, "#{prefix}:")}
       end)
 
         {name, String.trim_leading(value, "#{prefix}:")}
       end)
 
-    %{String.to_atom(data[key_name]) => data[value_name]}
+    %{data[key_name] => data[value_name]}
   end
 
   end
 
-  defp maybe_put_title(%{title: _} = meta, _), do: meta
+  defp maybe_put_title(%{"title" => _} = meta, _), do: meta
 
   defp maybe_put_title(meta, html) when meta != %{} do
     case get_page_title(html) do
       "" -> meta
 
   defp maybe_put_title(meta, html) when meta != %{} do
     case get_page_title(html) do
       "" -> meta
-      title -> Map.put_new(meta, :title, title)
+      title -> Map.put_new(meta, "title", title)
     end
   end
 
     end
   end
 
index 8f32bf91b3123c16ca683b2902ba5d8c6523d3df..db8ccf15d146fac213e1e2c6a74f2fa440866b2a 100644 (file)
@@ -5,7 +5,7 @@
 defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
   def parse(html, _data) do
     with elements = [_ | _] <- get_discovery_data(html),
 defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
   def parse(html, _data) do
     with elements = [_ | _] <- get_discovery_data(html),
-         {:ok, oembed_url} <- get_oembed_url(elements),
+         oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
          {:ok, oembed_data} <- get_oembed_data(oembed_url) do
       {:ok, oembed_data}
     else
          {:ok, oembed_data} <- get_oembed_data(oembed_url) do
       {:ok, oembed_data}
     else
@@ -17,19 +17,13 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
     html |> Floki.find("link[type='application/json+oembed']")
   end
 
     html |> Floki.find("link[type='application/json+oembed']")
   end
 
-  defp get_oembed_url(nodes) do
-    {"link", attributes, _children} = nodes |> hd()
-
-    {:ok, Enum.into(attributes, %{})["href"]}
+  defp get_oembed_url([{"link", attributes, _children} | _]) do
+    Enum.find_value(attributes, fn {k, v} -> if k == "href", do: v end)
   end
 
   defp get_oembed_data(url) do
   end
 
   defp get_oembed_data(url) do
-    {:ok, %Tesla.Env{body: json}} = Pleroma.HTTP.get(url, [], adapter: [pool: :media])
-
-    {:ok, data} = Jason.decode(json)
-
-    data = data |> Map.new(fn {k, v} -> {String.to_atom(k), v} end)
-
-    {:ok, data}
+    with {:ok, %Tesla.Env{body: json}} <- Pleroma.HTTP.get(url, [], adapter: [pool: :media]) do
+      Jason.decode(json)
+    end
   end
 end
   end
 end
index e54a13bc804a43d25c38f2e92db52c8e5fd353a0..420a612c63e91d33129962b27370b9d1991f3e24 100644 (file)
@@ -60,19 +60,19 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
   test "doesn't just add a title" do
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/non-ogp") ==
              {:error,
   test "doesn't just add a title" do
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/non-ogp") ==
              {:error,
-              "Found metadata was invalid or incomplete: %{url: \"http://example.com/non-ogp\"}"}
+              "Found metadata was invalid or incomplete: %{\"url\" => \"http://example.com/non-ogp\"}"}
   end
 
   test "parses ogp" do
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") ==
              {:ok,
               %{
   end
 
   test "parses ogp" do
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp") ==
              {:ok,
               %{
-                image: "http://ia.media-imdb.com/images/rock.jpg",
-                title: "The Rock",
-                description:
+                "image" => "http://ia.media-imdb.com/images/rock.jpg",
+                "title" => "The Rock",
+                "description" =>
                   "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
                   "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
-                type: "video.movie",
-                url: "http://example.com/ogp"
+                "type" => "video.movie",
+                "url" => "http://example.com/ogp"
               }}
   end
 
               }}
   end
 
@@ -80,12 +80,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp-missing-title") ==
              {:ok,
               %{
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/ogp-missing-title") ==
              {:ok,
               %{
-                image: "http://ia.media-imdb.com/images/rock.jpg",
-                title: "The Rock (1996)",
-                description:
+                "image" => "http://ia.media-imdb.com/images/rock.jpg",
+                "title" => "The Rock (1996)",
+                "description" =>
                   "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
                   "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.",
-                type: "video.movie",
-                url: "http://example.com/ogp-missing-title"
+                "type" => "video.movie",
+                "url" => "http://example.com/ogp-missing-title"
               }}
   end
 
               }}
   end
 
@@ -93,12 +93,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/twitter-card") ==
              {:ok,
               %{
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/twitter-card") ==
              {:ok,
               %{
-                card: "summary",
-                site: "@flickr",
-                image: "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
-                title: "Small Island Developing States Photo Submission",
-                description: "View the album on Flickr.",
-                url: "http://example.com/twitter-card"
+                "card" => "summary",
+                "site" => "@flickr",
+                "image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg",
+                "title" => "Small Island Developing States Photo Submission",
+                "description" => "View the album on Flickr.",
+                "url" => "http://example.com/twitter-card"
               }}
   end
 
               }}
   end
 
@@ -106,27 +106,28 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/oembed") ==
              {:ok,
               %{
     assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/oembed") ==
              {:ok,
               %{
-                author_name: "‮‭‬bees‬",
-                author_url: "https://www.flickr.com/photos/bees/",
-                cache_age: 3600,
-                flickr_type: "photo",
-                height: "768",
-                html:
+                "author_name" => "‮‭‬bees‬",
+                "author_url" => "https://www.flickr.com/photos/bees/",
+                "cache_age" => 3600,
+                "flickr_type" => "photo",
+                "height" => "768",
+                "html" =>
                   "<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by ‮‭‬bees‬, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>",
                   "<a data-flickr-embed=\"true\" href=\"https://www.flickr.com/photos/bees/2362225867/\" title=\"Bacon Lollys by ‮‭‬bees‬, on Flickr\"><img src=\"https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_b.jpg\" width=\"1024\" height=\"768\" alt=\"Bacon Lollys\"></a><script async src=\"https://embedr.flickr.com/assets/client-code.js\" charset=\"utf-8\"></script>",
-                license: "All Rights Reserved",
-                license_id: 0,
-                provider_name: "Flickr",
-                provider_url: "https://www.flickr.com/",
-                thumbnail_height: 150,
-                thumbnail_url: "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
-                thumbnail_width: 150,
-                title: "Bacon Lollys",
-                type: "photo",
-                url: "http://example.com/oembed",
-                version: "1.0",
-                web_page: "https://www.flickr.com/photos/bees/2362225867/",
-                web_page_short_url: "https://flic.kr/p/4AK2sc",
-                width: "1024"
+                "license" => "All Rights Reserved",
+                "license_id" => 0,
+                "provider_name" => "Flickr",
+                "provider_url" => "https://www.flickr.com/",
+                "thumbnail_height" => 150,
+                "thumbnail_url" =>
+                  "https://farm4.staticflickr.com/3040/2362225867_4a87ab8baf_q.jpg",
+                "thumbnail_width" => 150,
+                "title" => "Bacon Lollys",
+                "type" => "photo",
+                "url" => "http://example.com/oembed",
+                "version" => "1.0",
+                "web_page" => "https://www.flickr.com/photos/bees/2362225867/",
+                "web_page_short_url" => "https://flic.kr/p/4AK2sc",
+                "width" => "1024"
               }}
   end
 
               }}
   end
 
index 87c767c15ce8b279f9e78a7cdc075c5f0825cdad..847623535b4713bec2e8b8e4ef1bc7547090d05c 100644 (file)
@@ -19,11 +19,11 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622",
-                site: nil,
-                title:
+                "app:id:googleplay" => "com.nytimes.android",
+                "app:name:googleplay" => "NYTimes",
+                "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+                "site" => nil,
+                "title" =>
                   "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
               }}
   end
                   "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
               }}
   end
@@ -36,15 +36,15 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
-                card: "summary_large_image",
-                description:
+                "card" => "summary_large_image",
+                "description" =>
                   "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
                   "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
-                image:
+                "image" =>
                   "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
                   "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
-                "image:alt": "",
-                title:
+                "image:alt" => "",
+                "title" =>
                   "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
                   "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
-                url:
+                "url" =>
                   "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
               }}
   end
                   "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
               }}
   end
@@ -57,19 +57,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622",
-                card: "summary_large_image",
-                description:
+                "app:id:googleplay" => "com.nytimes.android",
+                "app:name:googleplay" => "NYTimes",
+                "app:url:googleplay" => "nytimes://reader/id/100000006583622",
+                "card" => "summary_large_image",
+                "description" =>
                   "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
                   "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
-                image:
+                "image" =>
                   "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
                   "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
-                "image:alt": "",
-                site: nil,
-                title:
+                "image:alt" => "",
+                "site" => nil,
+                "title" =>
                   "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
                   "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
-                url:
+                "url" =>
                   "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
               }}
   end
                   "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
               }}
   end
@@ -86,11 +86,11 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
-                site: "@atlasobscura",
-                title:
+                "site" => "@atlasobscura",
+                "title" =>
                   "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
                   "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
-                card: "summary_large_image",
-                image: image_path
+                "card" => "summary_large_image",
+                "image" => image_path
               }}
   end
 
               }}
   end
 
@@ -102,12 +102,12 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
               %{
-                site: nil,
-                title:
+                "site" => nil,
+                "title" =>
                   "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
                   "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622"
+                "app:id:googleplay" => "com.nytimes.android",
+                "app:name:googleplay" => "NYTimes",
+                "app:url:googleplay" => "nytimes://reader/id/100000006583622"
               }}
   end
 end
               }}
   end
 end