summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gleason <alex@alexgleason.me>2021-05-04 14:15:41 -0500
committerAlex Gleason <alex@alexgleason.me>2021-05-04 14:15:41 -0500
commit8c4599c1dd9430a29c610413e2a5a623b46e4446 (patch)
treedc720ee6457c5584c7473cfd8541599084abd21a
parent63af654688e5f5bbd54e001a62e3fbc20f117dea (diff)
Fix TwitterCard tests
-rw-r--r--lib/pleroma/web/rich_media/parser/meta_tags.ex2
-rw-r--r--lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex41
-rw-r--r--lib/pleroma/web/rich_media/parsers/twitter_card.ex18
-rw-r--r--test/pleroma/web/rich_media/parsers/twitter_card_test.exs204
4 files changed, 97 insertions, 168 deletions
diff --git a/lib/pleroma/web/rich_media/parser/meta_tags.ex b/lib/pleroma/web/rich_media/parser/meta_tags.ex
index 888ac3fc4..e5c6b448d 100644
--- a/lib/pleroma/web/rich_media/parser/meta_tags.ex
+++ b/lib/pleroma/web/rich_media/parser/meta_tags.ex
@@ -3,7 +3,7 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.MetaTags do
- @moduledoc """
+ @doc """
Parses a `Floki.html_tree/0` and returns a map of raw `<meta>` tag values.
"""
@spec parse(html_tree :: Floki.html_tree()) :: map()
diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
deleted file mode 100644
index 5375037b3..000000000
--- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
+++ /dev/null
@@ -1,41 +0,0 @@
-# Pleroma: A lightweight social networking server
-# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
-# SPDX-License-Identifier: AGPL-3.0-only
-
-defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
- def parse(html, prefix, key_name, value_name \\ "content") do
- html
- |> get_elements(key_name, prefix)
- |> Enum.reduce(%{}, fn el, acc ->
- attributes = normalize_attributes(el, key_name, value_name)
- Map.merge(acc, attributes)
- end)
- end
-
- defp get_elements(html, key_names, prefix) when is_list(key_names) do
- Enum.reduce(key_names, [], fn key_name, acc ->
- acc ++ Floki.find(html, "meta[#{key_name}^='#{prefix}:']")
- end)
- end
-
- defp get_elements(html, key_name, prefix) do
- get_elements(html, [key_name], prefix)
- end
-
- defp normalize_attributes(html_node, key_names, value_name) when is_list(key_names) do
- {_tag, attributes, _children} = html_node
- data = Map.new(attributes)
-
- Enum.reduce(key_names, %{}, fn key_name, acc ->
- if data[key_name], do: Map.put(acc, data[key_name], data[value_name]), else: acc
- end)
- end
-
- defp normalize_attributes(html_node, key_name, value_name) do
- normalize_attributes(html_node, [key_name], value_name)
- end
-
- def get_page_title(html) do
- Floki.find(html, "html head title") |> List.first() |> Floki.text()
- end
-end
diff --git a/lib/pleroma/web/rich_media/parsers/twitter_card.ex b/lib/pleroma/web/rich_media/parsers/twitter_card.ex
index c10b9370e..a892d16ea 100644
--- a/lib/pleroma/web/rich_media/parsers/twitter_card.ex
+++ b/lib/pleroma/web/rich_media/parsers/twitter_card.ex
@@ -3,13 +3,21 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
- alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
+ alias Pleroma.Web.RichMedia.Parser.MetaTags
- @spec parse(list(), map()) :: map()
+ @spec parse(Floki.html_tree(), map()) :: map()
def parse(html, data) do
data
- |> Map.put(:title, MetaTagsParser.get_page_title(html))
- |> Map.put(:opengraph, MetaTagsParser.parse(html, "og", "property"))
- |> Map.put(:twitter, MetaTagsParser.parse(html, "twitter", ["name", "property"]))
+ |> Map.put(:title, get_page_title(html))
+ |> Map.put(:meta, MetaTags.parse(html))
+ end
+
+ def get_page_title(html) do
+ with [node | _] <- Floki.find(html, "html head title"),
+ title when is_binary(title) and title != "" <- Floki.text(node) do
+ title
+ else
+ _ -> nil
+ end
end
end
diff --git a/test/pleroma/web/rich_media/parsers/twitter_card_test.exs b/test/pleroma/web/rich_media/parsers/twitter_card_test.exs
index 909ce636d..1d2aa558e 100644
--- a/test/pleroma/web/rich_media/parsers/twitter_card_test.exs
+++ b/test/pleroma/web/rich_media/parsers/twitter_card_test.exs
@@ -6,8 +6,10 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
use ExUnit.Case, async: true
alias Pleroma.Web.RichMedia.Parsers.TwitterCard
- test "returns error when html not contains twitter card" do
- assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == %{}
+ test "fails gracefully with barebones HTML" do
+ html = [{"html", [], [{"head", [], []}, {"body", [], []}]}]
+ expected = %{meta: %{}, title: nil}
+ assert TwitterCard.parse(html, %{}) == expected
end
test "parses twitter card with only name attributes" do
@@ -15,29 +17,24 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers3.html")
|> Floki.parse_document!()
- expected = %{
- title:
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
- twitter: %{
- "twitter:app:id:googleplay" => "com.nytimes.android",
- "twitter:app:name:googleplay" => "NYTimes",
- "twitter:app:url:googleplay" => "nytimes://reader/id/100000006583622",
- "twitter:site" => nil
- },
- opengraph: %{
- "og:description" =>
- "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
- "og:image" =>
- "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
- "og:title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
- "og:type" => "article",
- "og:url" =>
- "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
- }
- }
-
- assert TwitterCard.parse(html, %{}) == expected
+ assert %{
+ title:
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
+ meta: %{
+ "twitter:app:id:googleplay" => "com.nytimes.android",
+ "twitter:app:name:googleplay" => "NYTimes",
+ "twitter:app:url:googleplay" => "nytimes://reader/id/100000006583622",
+ "og:description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "og:image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+ "og:title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "og:type" => "article",
+ "og:url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
+ }
+ } = TwitterCard.parse(html, %{})
end
test "parses twitter card with only property attributes" do
@@ -45,35 +42,31 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers2.html")
|> Floki.parse_document!()
- expected = %{
- title:
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
- twitter: %{
- "twitter:card" => "summary_large_image",
- "twitter:description" =>
- "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
- "twitter:image" =>
- "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
- "twitter:image:alt" => "",
- "twitter:title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
- "twitter:url" =>
- "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
- },
- opengraph: %{
- "og:description" =>
- "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
- "og:image" =>
- "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
- "og:title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
- "og:url" =>
- "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
- "og:type" => "article"
- }
- }
-
- assert TwitterCard.parse(html, %{}) == expected
+ assert %{
+ title:
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
+ meta: %{
+ "twitter:card" => "summary_large_image",
+ "twitter:description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "twitter:image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+ "twitter:image:alt" => "",
+ "twitter:title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "twitter:url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "og:description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "og:image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+ "og:title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "og:url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "og:type" => "article"
+ }
+ } = TwitterCard.parse(html, %{})
end
test "parses twitter card with name & property attributes" do
@@ -81,84 +74,53 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
|> Floki.parse_document!()
- expected = %{
- title:
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
- twitter: %{
- "twitter:app:id:googleplay" => "com.nytimes.android",
- "twitter:app:name:googleplay" => "NYTimes",
- "twitter:app:url:googleplay" => "nytimes://reader/id/100000006583622",
- "twitter:card" => "summary_large_image",
- "twitter:description" =>
- "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
- "twitter:image" =>
- "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
- "twitter:image:alt" => "",
- "twitter:site" => nil,
- "twitter:title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
- "twitter:url" =>
- "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
- },
- opengraph: %{
- "og:description" =>
- "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
- "og:image" =>
- "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
- "og:title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
- "og:url" =>
- "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
- "og:type" => "article"
- }
- }
-
- assert TwitterCard.parse(html, %{}) == expected
+ assert %{
+ title:
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
+ meta: %{
+ "twitter:app:id:googleplay" => "com.nytimes.android",
+ "twitter:app:name:googleplay" => "NYTimes",
+ "twitter:app:url:googleplay" => "nytimes://reader/id/100000006583622",
+ "twitter:card" => "summary_large_image",
+ "twitter:description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "twitter:image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+ "twitter:image:alt" => "",
+ "twitter:title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "twitter:url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "og:description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "og:image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+ "og:title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "og:url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "og:type" => "article"
+ }
+ } = TwitterCard.parse(html, %{})
end
test "respect only first title tag on the page" do
- image_path =
- "https://assets.atlasobscura.com/media/W1siZiIsInVwbG9hZHMvYXNzZXRzLzkwYzgyMzI4LThlMDUtNGRiNS05MDg3LTUzMGUxZTM5N2RmMmVkOTM5ZDM4MGM4OTIx" <>
- "YTQ5MF9EQVIgZXhodW1hdGlvbiBvZiBNYXJnYXJldCBDb3JiaW4gZ3JhdmUgMTkyNi5qcGciXSxbInAiLCJjb252ZXJ0IiwiIl0sWyJwIiwiY29udmVydCIsIi1xdWFsaXR5IDgxIC1hdXRvLW9" <>
- "yaWVudCJdLFsicCIsInRodW1iIiwiNjAweD4iXV0/DAR%20exhumation%20of%20Margaret%20Corbin%20grave%201926.jpg"
-
html =
File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
- assert TwitterCard.parse(html, %{}) ==
- %{
- "site" => "@atlasobscura",
- "title" => "The Missing Grave of Margaret Corbin, Revolutionary War Veteran",
- "card" => "summary_large_image",
- "image" => image_path,
- "description" =>
- "She's the only woman veteran honored with a monument at West Point. But where was she buried?",
- "site_name" => "Atlas Obscura",
- "type" => "article",
- "url" => "http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point"
- }
+ expected = "The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura"
+
+ assert %{title: ^expected} = TwitterCard.parse(html, %{})
end
- test "takes first founded title in html head if there is html markup error" do
+ test "takes first title found in html head if there is an html markup error" do
html =
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers4.html")
|> Floki.parse_document!()
- assert TwitterCard.parse(html, %{}) ==
- %{
- "site" => nil,
- "title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
- "app:id:googleplay" => "com.nytimes.android",
- "app:name:googleplay" => "NYTimes",
- "app:url:googleplay" => "nytimes://reader/id/100000006583622",
- "description" =>
- "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
- "image" =>
- "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
- "type" => "article",
- "url" =>
- "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
- }
+ expected =
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
+
+ assert %{title: ^expected} = TwitterCard.parse(html, %{})
end
end