Merge OGP parser with TwitterCard
authorEgor Kislitsyn <egor@kislitsyn.com>
Thu, 11 Jun 2020 13:57:31 +0000 (17:57 +0400)
committerEgor Kislitsyn <egor@kislitsyn.com>
Thu, 11 Jun 2020 13:57:31 +0000 (17:57 +0400)
CHANGELOG.md
config/config.exs
config/description.exs
lib/pleroma/web/rich_media/parser.ex
lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
lib/pleroma/web/rich_media/parsers/oembed_parser.ex
lib/pleroma/web/rich_media/parsers/ogp.ex
lib/pleroma/web/rich_media/parsers/twitter_card.ex
test/web/rich_media/parsers/twitter_card_test.exs

index 1cf2210f513bc9e90273d62926a5ab6b7e54c6b7..575eb67b41f5ea32516b901890a56a2de460d2f4 100644 (file)
@@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ## [unreleased]
 
 ### Changed
 ## [unreleased]
 
 ### Changed
+- OGP rich media parser merged with TwitterCard
 <details>
   <summary>API Changes</summary>
 - **Breaking:** Emoji API: changed methods and renamed routes.
 <details>
   <summary>API Changes</summary>
 - **Breaking:** Emoji API: changed methods and renamed routes.
index 9508ae07718a24ddfb906506b150154edaa5527b..cafa4082069737b56d3e7b59783a503590b892ad 100644 (file)
@@ -385,7 +385,6 @@ config :pleroma, :rich_media,
   ignore_tld: ["local", "localdomain", "lan"],
   parsers: [
     Pleroma.Web.RichMedia.Parsers.TwitterCard,
   ignore_tld: ["local", "localdomain", "lan"],
   parsers: [
     Pleroma.Web.RichMedia.Parsers.TwitterCard,
-    Pleroma.Web.RichMedia.Parsers.OGP,
     Pleroma.Web.RichMedia.Parsers.OEmbed
   ],
   ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]
     Pleroma.Web.RichMedia.Parsers.OEmbed
   ],
   ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]
index 807c945e00ca990b3f44bdecf0e77b6f2f40f154..b993959d76d4fa154ad6e4858aa78c9d082511f6 100644 (file)
@@ -2091,9 +2091,7 @@ config :pleroma, :config_description, [
         description:
           "List of Rich Media parsers. Module names are shortened (removed leading `Pleroma.Web.RichMedia.Parsers.` part), but on adding custom module you need to use full name.",
         suggestions: [
         description:
           "List of Rich Media parsers. Module names are shortened (removed leading `Pleroma.Web.RichMedia.Parsers.` part), but on adding custom module you need to use full name.",
         suggestions: [
-          Pleroma.Web.RichMedia.Parsers.MetaTagsParser,
           Pleroma.Web.RichMedia.Parsers.OEmbed,
           Pleroma.Web.RichMedia.Parsers.OEmbed,
-          Pleroma.Web.RichMedia.Parsers.OGP,
           Pleroma.Web.RichMedia.Parsers.TwitterCard
         ]
       },
           Pleroma.Web.RichMedia.Parsers.TwitterCard
         ]
       },
index 40980def8198d134496ed429a3efd494352c4c95..78e9048f3981580d8aa3e3c65f194d87016e1103 100644 (file)
@@ -105,8 +105,8 @@ defmodule Pleroma.Web.RichMedia.Parser do
   defp maybe_parse(html) do
     Enum.reduce_while(parsers(), %{}, fn parser, acc ->
       case parser.parse(html, acc) do
   defp maybe_parse(html) do
     Enum.reduce_while(parsers(), %{}, fn parser, acc ->
       case parser.parse(html, acc) do
-        {:ok, data} -> {:halt, data}
-        {:error, _msg} -> {:cont, acc}
+        data when data != %{} -> {:halt, data}
+        _ -> {:cont, acc}
       end
     end)
   end
       end
     end)
   end
index ae0f36702ef36492458e5ed1a22183077a4da7c2..c09b96eaefb102b29429550cd9622d7cd77963a4 100644 (file)
@@ -3,22 +3,15 @@
 # SPDX-License-Identifier: AGPL-3.0-only
 
 defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
 # SPDX-License-Identifier: AGPL-3.0-only
 
 defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
-  def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do
-    meta_data =
-      html
-      |> get_elements(key_name, prefix)
-      |> Enum.reduce(data, fn el, acc ->
-        attributes = normalize_attributes(el, prefix, key_name, value_name)
-
-        Map.merge(acc, attributes)
-      end)
-      |> maybe_put_title(html)
-
-    if Enum.empty?(meta_data) do
-      {:error, error_message}
-    else
-      {:ok, meta_data}
-    end
+  def parse(data, html, prefix, key_name, value_name \\ "content") do
+    html
+    |> get_elements(key_name, prefix)
+    |> Enum.reduce(data, fn el, acc ->
+      attributes = normalize_attributes(el, prefix, key_name, value_name)
+
+      Map.merge(acc, attributes)
+    end)
+    |> maybe_put_title(html)
   end
 
   defp get_elements(html, key_name, prefix) do
   end
 
   defp get_elements(html, key_name, prefix) do
index 8f32bf91b3123c16ca683b2902ba5d8c6523d3df..5d87a90e95dbde92a46e5fe93207dd21319e31a3 100644 (file)
@@ -7,9 +7,9 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
     with elements = [_ | _] <- get_discovery_data(html),
          {:ok, oembed_url} <- get_oembed_url(elements),
          {:ok, oembed_data} <- get_oembed_data(oembed_url) do
     with elements = [_ | _] <- get_discovery_data(html),
          {:ok, oembed_url} <- get_oembed_url(elements),
          {:ok, oembed_data} <- get_oembed_data(oembed_url) do
-      {:ok, oembed_data}
+      oembed_data
     else
     else
-      _e -> {:error, "No OEmbed data found"}
+      _e -> %{}
     end
   end
 
     end
   end
 
index 3e90125882e2f2d58799584bd6be4b726f4b311b..5eebe42f7da7da22b8e7b999d0bd7bbbb49b8ba7 100644 (file)
@@ -5,10 +5,9 @@
 defmodule Pleroma.Web.RichMedia.Parsers.OGP do
   def parse(html, data) do
     Pleroma.Web.RichMedia.Parsers.MetaTagsParser.parse(
 defmodule Pleroma.Web.RichMedia.Parsers.OGP do
   def parse(html, data) do
     Pleroma.Web.RichMedia.Parsers.MetaTagsParser.parse(
-      html,
       data,
       data,
+      html,
       "og",
       "og",
-      "No OGP metadata found",
       "property"
     )
   end
       "property"
     )
   end
index 09d4b526e4b55341b4699634b8984c789b888370..4a04865d2925e5f12b294b58c6f37975eee97901 100644 (file)
@@ -5,18 +5,11 @@
 defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
   alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
 
 defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
   alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
 
-  @spec parse(String.t(), map()) :: {:ok, map()} | {:error, String.t()}
+  @spec parse(list(), map()) :: map()
   def parse(html, data) do
     data
   def parse(html, data) do
     data
-    |> parse_name_attrs(html)
-    |> parse_property_attrs(html)
-  end
-
-  defp parse_name_attrs(data, html) do
-    MetaTagsParser.parse(html, data, "twitter", %{}, "name")
-  end
-
-  defp parse_property_attrs({_, data}, html) do
-    MetaTagsParser.parse(html, data, "twitter", "No twitter card metadata found", "property")
+    |> MetaTagsParser.parse(html, "og", "property")
+    |> MetaTagsParser.parse(html, "twitter", "name")
+    |> MetaTagsParser.parse(html, "twitter", "property")
   end
 end
   end
 end
index 87c767c15ce8b279f9e78a7cdc075c5f0825cdad..3ccf26651060a2b2d33c14a625ce0d87e4ffa739 100644 (file)
@@ -7,8 +7,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
   alias Pleroma.Web.RichMedia.Parsers.TwitterCard
 
   test "returns error when html not contains twitter card" do
   alias Pleroma.Web.RichMedia.Parsers.TwitterCard
 
   test "returns error when html not contains twitter card" do
-    assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) ==
-             {:error, "No twitter card metadata found"}
+    assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == %{}
   end
 
   test "parses twitter card with only name attributes" do
   end
 
   test "parses twitter card with only name attributes" do
@@ -17,15 +16,21 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
       |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
       |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
-             {:ok,
-              %{
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622",
-                site: nil,
-                title:
-                  "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
-              }}
+             %{
+               "app:id:googleplay": "com.nytimes.android",
+               "app:name:googleplay": "NYTimes",
+               "app:url:googleplay": "nytimes://reader/id/100000006583622",
+               site: nil,
+               description:
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               image:
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+               type: "article",
+               url:
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+               title:
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
+             }
   end
 
   test "parses twitter card with only property attributes" do
   end
 
   test "parses twitter card with only property attributes" do
@@ -34,19 +39,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
       |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
       |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
-             {:ok,
-              %{
-                card: "summary_large_image",
-                description:
-                  "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
-                image:
-                  "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
-                "image:alt": "",
-                title:
-                  "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
-                url:
-                  "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
-              }}
+             %{
+               card: "summary_large_image",
+               description:
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               image:
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+               "image:alt": "",
+               title:
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+               url:
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+               type: "article"
+             }
   end
 
   test "parses twitter card with name & property attributes" do
   end
 
   test "parses twitter card with name & property attributes" do
@@ -55,23 +60,23 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
       |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
       |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
-             {:ok,
-              %{
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622",
-                card: "summary_large_image",
-                description:
-                  "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
-                image:
-                  "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
-                "image:alt": "",
-                site: nil,
-                title:
-                  "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
-                url:
-                  "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
-              }}
+             %{
+               "app:id:googleplay": "com.nytimes.android",
+               "app:name:googleplay": "NYTimes",
+               "app:url:googleplay": "nytimes://reader/id/100000006583622",
+               card: "summary_large_image",
+               description:
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               image:
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+               "image:alt": "",
+               site: nil,
+               title:
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+               url:
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+               type: "article"
+             }
   end
 
   test "respect only first title tag on the page" do
   end
 
   test "respect only first title tag on the page" do
@@ -84,14 +89,17 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
       File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
       File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
-             {:ok,
-              %{
-                site: "@atlasobscura",
-                title:
-                  "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
-                card: "summary_large_image",
-                image: image_path
-              }}
+             %{
+               site: "@atlasobscura",
+               title: "The Missing Grave of Margaret Corbin, Revolutionary War Veteran",
+               card: "summary_large_image",
+               image: image_path,
+               description:
+                 "She's the only woman veteran honored with a monument at West Point. But where was she buried?",
+               site_name: "Atlas Obscura",
+               type: "article",
+               url: "http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point"
+             }
   end
 
   test "takes first founded title in html head if there is html markup error" do
   end
 
   test "takes first founded title in html head if there is html markup error" do
@@ -100,14 +108,20 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
       |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
       |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
-             {:ok,
-              %{
-                site: nil,
-                title:
-                  "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622"
-              }}
+             %{
+               site: nil,
+               title:
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+               "app:id:googleplay": "com.nytimes.android",
+               "app:name:googleplay": "NYTimes",
+               "app:url:googleplay": "nytimes://reader/id/100000006583622",
+               description:
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               image:
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+               type: "article",
+               url:
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
+             }
   end
 end
   end
 end