When fixing this problem I incorrectly assumed a.hashtag is
the proper way for detecting hashtags, but it is just something Pleroma and
Mastodon add. Per microformats it should be detected by the presense of rel=tag.
This MR adds a check for rel=tag, but I still left a.hashtag just in case
Cachex.fetch!(:scrubber_cache, key, fn _key ->
result =
content
- |> Floki.filter_out("a.mention,a.hashtag")
+ |> Floki.filter_out("a.mention,a.hashtag,a[rel~=\"tag\"]")
|> Floki.attribute("a", "href")
|> Enum.at(0)
assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
end
+
+ test "skips microformats hashtags" do
+ user = insert(:user)
+
+ {:ok, activity} =
+ CommonAPI.post(user, %{
+ "status" =>
+ "<a href=\"https://pleroma.gov/tags/cofe\" rel=\"tag\">#cofe</a> https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140",
+ "content_type" => "text/html"
+ })
+
+ object = Object.normalize(activity)
+ {:ok, url} = HTML.extract_first_external_url(object, object.data["content"])
+
+ assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
+ end
end
end