Merge OGP parser with TwitterCard
authorEgor Kislitsyn <egor@kislitsyn.com>
Thu, 11 Jun 2020 13:57:31 +0000 (17:57 +0400)
committerEgor Kislitsyn <egor@kislitsyn.com>
Thu, 11 Jun 2020 13:57:31 +0000 (17:57 +0400)
CHANGELOG.md
config/config.exs
config/description.exs
lib/pleroma/web/rich_media/parser.ex
lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
lib/pleroma/web/rich_media/parsers/oembed_parser.ex
lib/pleroma/web/rich_media/parsers/ogp.ex
lib/pleroma/web/rich_media/parsers/twitter_card.ex
test/web/rich_media/parsers/twitter_card_test.exs

index 1cf2210f513bc9e90273d62926a5ab6b7e54c6b7..575eb67b41f5ea32516b901890a56a2de460d2f4 100644 (file)
@@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ## [unreleased]
 
 ### Changed
+- OGP rich media parser merged with TwitterCard
 <details>
   <summary>API Changes</summary>
 - **Breaking:** Emoji API: changed methods and renamed routes.
index 9508ae07718a24ddfb906506b150154edaa5527b..cafa4082069737b56d3e7b59783a503590b892ad 100644 (file)
@@ -385,7 +385,6 @@ config :pleroma, :rich_media,
   ignore_tld: ["local", "localdomain", "lan"],
   parsers: [
     Pleroma.Web.RichMedia.Parsers.TwitterCard,
-    Pleroma.Web.RichMedia.Parsers.OGP,
     Pleroma.Web.RichMedia.Parsers.OEmbed
   ],
   ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]
index 807c945e00ca990b3f44bdecf0e77b6f2f40f154..b993959d76d4fa154ad6e4858aa78c9d082511f6 100644 (file)
@@ -2091,9 +2091,7 @@ config :pleroma, :config_description, [
         description:
           "List of Rich Media parsers. Module names are shortened (removed leading `Pleroma.Web.RichMedia.Parsers.` part), but on adding custom module you need to use full name.",
         suggestions: [
-          Pleroma.Web.RichMedia.Parsers.MetaTagsParser,
           Pleroma.Web.RichMedia.Parsers.OEmbed,
-          Pleroma.Web.RichMedia.Parsers.OGP,
           Pleroma.Web.RichMedia.Parsers.TwitterCard
         ]
       },
index 40980def8198d134496ed429a3efd494352c4c95..78e9048f3981580d8aa3e3c65f194d87016e1103 100644 (file)
@@ -105,8 +105,8 @@ defmodule Pleroma.Web.RichMedia.Parser do
   defp maybe_parse(html) do
     Enum.reduce_while(parsers(), %{}, fn parser, acc ->
       case parser.parse(html, acc) do
-        {:ok, data} -> {:halt, data}
-        {:error, _msg} -> {:cont, acc}
+        data when data != %{} -> {:halt, data}
+        _ -> {:cont, acc}
       end
     end)
   end
index ae0f36702ef36492458e5ed1a22183077a4da7c2..c09b96eaefb102b29429550cd9622d7cd77963a4 100644 (file)
@@ -3,22 +3,15 @@
 # SPDX-License-Identifier: AGPL-3.0-only
 
 defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
-  def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do
-    meta_data =
-      html
-      |> get_elements(key_name, prefix)
-      |> Enum.reduce(data, fn el, acc ->
-        attributes = normalize_attributes(el, prefix, key_name, value_name)
-
-        Map.merge(acc, attributes)
-      end)
-      |> maybe_put_title(html)
-
-    if Enum.empty?(meta_data) do
-      {:error, error_message}
-    else
-      {:ok, meta_data}
-    end
+  def parse(data, html, prefix, key_name, value_name \\ "content") do
+    html
+    |> get_elements(key_name, prefix)
+    |> Enum.reduce(data, fn el, acc ->
+      attributes = normalize_attributes(el, prefix, key_name, value_name)
+
+      Map.merge(acc, attributes)
+    end)
+    |> maybe_put_title(html)
   end
 
   defp get_elements(html, key_name, prefix) do
index 8f32bf91b3123c16ca683b2902ba5d8c6523d3df..5d87a90e95dbde92a46e5fe93207dd21319e31a3 100644 (file)
@@ -7,9 +7,9 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
     with elements = [_ | _] <- get_discovery_data(html),
          {:ok, oembed_url} <- get_oembed_url(elements),
          {:ok, oembed_data} <- get_oembed_data(oembed_url) do
-      {:ok, oembed_data}
+      oembed_data
     else
-      _e -> {:error, "No OEmbed data found"}
+      _e -> %{}
     end
   end
 
index 3e90125882e2f2d58799584bd6be4b726f4b311b..5eebe42f7da7da22b8e7b999d0bd7bbbb49b8ba7 100644 (file)
@@ -5,10 +5,9 @@
 defmodule Pleroma.Web.RichMedia.Parsers.OGP do
   def parse(html, data) do
     Pleroma.Web.RichMedia.Parsers.MetaTagsParser.parse(
-      html,
       data,
+      html,
       "og",
-      "No OGP metadata found",
       "property"
     )
   end
index 09d4b526e4b55341b4699634b8984c789b888370..4a04865d2925e5f12b294b58c6f37975eee97901 100644 (file)
@@ -5,18 +5,11 @@
 defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
   alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
 
-  @spec parse(String.t(), map()) :: {:ok, map()} | {:error, String.t()}
+  @spec parse(list(), map()) :: map()
   def parse(html, data) do
     data
-    |> parse_name_attrs(html)
-    |> parse_property_attrs(html)
-  end
-
-  defp parse_name_attrs(data, html) do
-    MetaTagsParser.parse(html, data, "twitter", %{}, "name")
-  end
-
-  defp parse_property_attrs({_, data}, html) do
-    MetaTagsParser.parse(html, data, "twitter", "No twitter card metadata found", "property")
+    |> MetaTagsParser.parse(html, "og", "property")
+    |> MetaTagsParser.parse(html, "twitter", "name")
+    |> MetaTagsParser.parse(html, "twitter", "property")
   end
 end
index 87c767c15ce8b279f9e78a7cdc075c5f0825cdad..3ccf26651060a2b2d33c14a625ce0d87e4ffa739 100644 (file)
@@ -7,8 +7,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
   alias Pleroma.Web.RichMedia.Parsers.TwitterCard
 
   test "returns error when html not contains twitter card" do
-    assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) ==
-             {:error, "No twitter card metadata found"}
+    assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == %{}
   end
 
   test "parses twitter card with only name attributes" do
@@ -17,15 +16,21 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
       |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
-             {:ok,
-              %{
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622",
-                site: nil,
-                title:
-                  "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
-              }}
+             %{
+               "app:id:googleplay": "com.nytimes.android",
+               "app:name:googleplay": "NYTimes",
+               "app:url:googleplay": "nytimes://reader/id/100000006583622",
+               site: nil,
+               description:
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               image:
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+               type: "article",
+               url:
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+               title:
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
+             }
   end
 
   test "parses twitter card with only property attributes" do
@@ -34,19 +39,19 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
       |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
-             {:ok,
-              %{
-                card: "summary_large_image",
-                description:
-                  "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
-                image:
-                  "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
-                "image:alt": "",
-                title:
-                  "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
-                url:
-                  "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
-              }}
+             %{
+               card: "summary_large_image",
+               description:
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               image:
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+               "image:alt": "",
+               title:
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+               url:
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+               type: "article"
+             }
   end
 
   test "parses twitter card with name & property attributes" do
@@ -55,23 +60,23 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
       |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
-             {:ok,
-              %{
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622",
-                card: "summary_large_image",
-                description:
-                  "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
-                image:
-                  "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
-                "image:alt": "",
-                site: nil,
-                title:
-                  "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
-                url:
-                  "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
-              }}
+             %{
+               "app:id:googleplay": "com.nytimes.android",
+               "app:name:googleplay": "NYTimes",
+               "app:url:googleplay": "nytimes://reader/id/100000006583622",
+               card: "summary_large_image",
+               description:
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               image:
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+               "image:alt": "",
+               site: nil,
+               title:
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+               url:
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+               type: "article"
+             }
   end
 
   test "respect only first title tag on the page" do
@@ -84,14 +89,17 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
       File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
-             {:ok,
-              %{
-                site: "@atlasobscura",
-                title:
-                  "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
-                card: "summary_large_image",
-                image: image_path
-              }}
+             %{
+               site: "@atlasobscura",
+               title: "The Missing Grave of Margaret Corbin, Revolutionary War Veteran",
+               card: "summary_large_image",
+               image: image_path,
+               description:
+                 "She's the only woman veteran honored with a monument at West Point. But where was she buried?",
+               site_name: "Atlas Obscura",
+               type: "article",
+               url: "http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point"
+             }
   end
 
   test "takes first founded title in html head if there is html markup error" do
@@ -100,14 +108,20 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
       |> Floki.parse_document!()
 
     assert TwitterCard.parse(html, %{}) ==
-             {:ok,
-              %{
-                site: nil,
-                title:
-                  "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
-                "app:id:googleplay": "com.nytimes.android",
-                "app:name:googleplay": "NYTimes",
-                "app:url:googleplay": "nytimes://reader/id/100000006583622"
-              }}
+             %{
+               site: nil,
+               title:
+                 "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+               "app:id:googleplay": "com.nytimes.android",
+               "app:name:googleplay": "NYTimes",
+               "app:url:googleplay": "nytimes://reader/id/100000006583622",
+               description:
+                 "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+               image:
+                 "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+               type: "article",
+               url:
+                 "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
+             }
   end
 end