summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gleason <alex@alexgleason.me>2021-05-04 13:31:02 -0500
committerAlex Gleason <alex@alexgleason.me>2021-05-04 13:32:31 -0500
commit63af654688e5f5bbd54e001a62e3fbc20f117dea (patch)
treeb9e1816db891cdf0f625cbb65f2efc01d5525351
parentb045dc6058e689eb0936fd963edaff8ba6287533 (diff)
Update TwitterCard tests and add Parser.MetaTags
-rw-r--r--lib/pleroma/web/rich_media/parser.ex7
-rw-r--r--lib/pleroma/web/rich_media/parser/meta_tags.ex30
-rw-r--r--lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex41
-rw-r--r--lib/pleroma/web/rich_media/parsers/o_embed.ex6
-rw-r--r--lib/pleroma/web/rich_media/parsers/twitter_card.ex7
-rw-r--r--test/pleroma/web/rich_media/parser/meta_tags_test.exs81
-rw-r--r--test/pleroma/web/rich_media/parsers/twitter_card_test.exs133
7 files changed, 222 insertions, 83 deletions
diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex
index dee0f61ac..d628513be 100644
--- a/lib/pleroma/web/rich_media/parser.ex
+++ b/lib/pleroma/web/rich_media/parser.ex
@@ -164,11 +164,8 @@ defmodule Pleroma.Web.RichMedia.Parser do
end
defp maybe_parse(html) do
- Enum.reduce_while(parsers(), %{}, fn parser, acc ->
- case parser.parse(html, acc) do
- data when data != %{} -> {:halt, data}
- _ -> {:cont, acc}
- end
+ Enum.reduce(parsers(), %{}, fn parser, acc ->
+ parser.parse(html, acc)
end)
end
diff --git a/lib/pleroma/web/rich_media/parser/meta_tags.ex b/lib/pleroma/web/rich_media/parser/meta_tags.ex
new file mode 100644
index 000000000..888ac3fc4
--- /dev/null
+++ b/lib/pleroma/web/rich_media/parser/meta_tags.ex
@@ -0,0 +1,30 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.RichMedia.Parser.MetaTags do
+ @moduledoc """
+ Parses a `Floki.html_tree/0` and returns a map of raw `<meta>` tag values.
+ """
+ @spec parse(html_tree :: Floki.html_tree()) :: map()
+ def parse(html_tree) do
+ html_tree
+ |> Floki.find("meta")
+ |> Enum.reduce(%{}, fn html_node, acc ->
+ case parse_node(html_node) do
+ {:ok, {name, content}} -> Map.put(acc, name, content)
+ _ -> acc
+ end
+ end)
+ end
+
+ defp parse_node({_tag, attrs, _children}) when is_list(attrs) do
+ case Map.new(attrs) do
+ %{"name" => name, "content" => content} -> {:ok, {name, content}}
+ %{"property" => name, "content" => content} -> {:ok, {name, content}}
+ _ -> {:error, :invalid_meta_tag}
+ end
+ end
+
+ defp parse_node(_), do: {:error, :invalid_meta_tag}
+end
diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
index 31c3d1e33..5375037b3 100644
--- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
+++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
@@ -3,44 +3,39 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
- def parse(data, html, prefix, key_name, value_name \\ "content") do
+ def parse(html, prefix, key_name, value_name \\ "content") do
html
|> get_elements(key_name, prefix)
- |> Enum.reduce(data, fn el, acc ->
- attributes = normalize_attributes(el, prefix, key_name, value_name)
-
+ |> Enum.reduce(%{}, fn el, acc ->
+ attributes = normalize_attributes(el, key_name, value_name)
Map.merge(acc, attributes)
end)
- |> maybe_put_title(html)
+ end
+
+ defp get_elements(html, key_names, prefix) when is_list(key_names) do
+ Enum.reduce(key_names, [], fn key_name, acc ->
+ acc ++ Floki.find(html, "meta[#{key_name}^='#{prefix}:']")
+ end)
end
defp get_elements(html, key_name, prefix) do
- html |> Floki.find("meta[#{key_name}^='#{prefix}:']")
+ get_elements(html, [key_name], prefix)
end
- defp normalize_attributes(html_node, prefix, key_name, value_name) do
+ defp normalize_attributes(html_node, key_names, value_name) when is_list(key_names) do
{_tag, attributes, _children} = html_node
+ data = Map.new(attributes)
- data =
- Map.new(attributes, fn {name, value} ->
- {name, String.trim_leading(value, "#{prefix}:")}
- end)
-
- %{data[key_name] => data[value_name]}
+ Enum.reduce(key_names, %{}, fn key_name, acc ->
+ if data[key_name], do: Map.put(acc, data[key_name], data[value_name]), else: acc
+ end)
end
- defp maybe_put_title(%{"title" => _} = meta, _), do: meta
-
- defp maybe_put_title(meta, html) when meta != %{} do
- case get_page_title(html) do
- "" -> meta
- title -> Map.put_new(meta, "title", title)
- end
+ defp normalize_attributes(html_node, key_name, value_name) do
+ normalize_attributes(html_node, [key_name], value_name)
end
- defp maybe_put_title(meta, _), do: meta
-
- defp get_page_title(html) do
+ def get_page_title(html) do
Floki.find(html, "html head title") |> List.first() |> Floki.text()
end
end
diff --git a/lib/pleroma/web/rich_media/parsers/o_embed.ex b/lib/pleroma/web/rich_media/parsers/o_embed.ex
index 8dc378c3c..4f7cd1260 100644
--- a/lib/pleroma/web/rich_media/parsers/o_embed.ex
+++ b/lib/pleroma/web/rich_media/parsers/o_embed.ex
@@ -3,13 +3,13 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
- def parse(html, _data) do
+ def parse(html, data) do
with elements = [_ | _] <- get_discovery_data(html),
oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
{:ok, oembed_data} <- get_oembed_data(oembed_url) do
- oembed_data
+ Map.put(data, :oembed, oembed_data)
else
- _e -> %{}
+ _e -> data
end
end
diff --git a/lib/pleroma/web/rich_media/parsers/twitter_card.ex b/lib/pleroma/web/rich_media/parsers/twitter_card.ex
index 31546819e..c10b9370e 100644
--- a/lib/pleroma/web/rich_media/parsers/twitter_card.ex
+++ b/lib/pleroma/web/rich_media/parsers/twitter_card.ex
@@ -8,9 +8,8 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
@spec parse(list(), map()) :: map()
def parse(html, data) do
data
- |> MetaTagsParser.parse(html, "og", "property")
- |> MetaTagsParser.parse(html, "twitter", "name")
- |> MetaTagsParser.parse(html, "twitter", "property")
- |> Map.put("type", "link")
+ |> Map.put(:title, MetaTagsParser.get_page_title(html))
+ |> Map.put(:opengraph, MetaTagsParser.parse(html, "og", "property"))
+ |> Map.put(:twitter, MetaTagsParser.parse(html, "twitter", ["name", "property"]))
end
end
diff --git a/test/pleroma/web/rich_media/parser/meta_tags_test.exs b/test/pleroma/web/rich_media/parser/meta_tags_test.exs
new file mode 100644
index 000000000..128c83a95
--- /dev/null
+++ b/test/pleroma/web/rich_media/parser/meta_tags_test.exs
@@ -0,0 +1,81 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.RichMedia.Parser.MetaTagsTest do
+ use ExUnit.Case, async: true
+ alias Pleroma.Web.RichMedia.Parser.MetaTags
+
+ test "returns a map of <meta> values" do
+ html =
+ File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
+ |> Floki.parse_document!()
+
+ expected = %{
+ "CG" => "nyregion",
+ "CN" => "experience-tech-and-society",
+ "CT" => "spotlight",
+ "PST" => "News",
+ "PT" => "article",
+ "SCG" => "",
+ "al:android:app_name" => "NYTimes",
+ "al:android:package" => "com.nytimes.android",
+ "al:android:url" => "nytimes://reader/id/100000006583622",
+ "al:ipad:app_name" => "NYTimes",
+ "al:ipad:app_store_id" => "357066198",
+ "al:ipad:url" =>
+ "nytimes://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "al:iphone:app_name" => "NYTimes",
+ "al:iphone:app_store_id" => "284862083",
+ "al:iphone:url" =>
+ "nytimes://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "article:modified" => "2019-08-02T09:30:23.000Z",
+ "article:published" => "2019-08-01T17:15:31.000Z",
+ "article:section" => "New York",
+ "article:tag" => "New York City",
+ "articleid" => "100000006583622",
+ "byl" => "By Joseph Goldstein and Ali Watkins",
+ "description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "fb:app_id" => "9869919170",
+ "image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+ "msapplication-starturl" => "https://www.nytimes.com",
+ "news_keywords" =>
+ "NYPD,Juvenile delinquency,Facial Recognition,Privacy,Government Surveillance,Police,Civil Rights,NYC",
+ "nyt_uri" => "nyt://article/9da58246-2495-505f-9abd-b5fda8e67b56",
+ "og:description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "og:image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+ "og:title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "og:type" => "article",
+ "og:url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "pdate" => "20190801",
+ "pubp_event_id" => "pubp://event/47a657bafa8a476bb36832f90ee5ac6e",
+ "robots" => "noarchive",
+ "thumbnail" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-thumbStandard.jpg",
+ "twitter:app:id:googleplay" => "com.nytimes.android",
+ "twitter:app:name:googleplay" => "NYTimes",
+ "twitter:app:url:googleplay" => "nytimes://reader/id/100000006583622",
+ "twitter:card" => "summary_large_image",
+ "twitter:description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "twitter:image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+ "twitter:image:alt" => "",
+ "twitter:title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "twitter:url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "viewport" => "width=device-width, initial-scale=1, maximum-scale=1"
+ }
+
+ assert MetaTags.parse(html) == expected
+ end
+end
diff --git a/test/pleroma/web/rich_media/parsers/twitter_card_test.exs b/test/pleroma/web/rich_media/parsers/twitter_card_test.exs
index 2aacd29a3..909ce636d 100644
--- a/test/pleroma/web/rich_media/parsers/twitter_card_test.exs
+++ b/test/pleroma/web/rich_media/parsers/twitter_card_test.exs
@@ -15,22 +15,29 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers3.html")
|> Floki.parse_document!()
- assert TwitterCard.parse(html, %{}) ==
- %{
- "app:id:googleplay" => "com.nytimes.android",
- "app:name:googleplay" => "NYTimes",
- "app:url:googleplay" => "nytimes://reader/id/100000006583622",
- "site" => nil,
- "description" =>
- "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
- "image" =>
- "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
- "type" => "article",
- "url" =>
- "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
- "title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
- }
+ expected = %{
+ title:
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
+ twitter: %{
+ "twitter:app:id:googleplay" => "com.nytimes.android",
+ "twitter:app:name:googleplay" => "NYTimes",
+ "twitter:app:url:googleplay" => "nytimes://reader/id/100000006583622",
+ "twitter:site" => nil
+ },
+ opengraph: %{
+ "og:description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "og:image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+ "og:title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "og:type" => "article",
+ "og:url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
+ }
+ }
+
+ assert TwitterCard.parse(html, %{}) == expected
end
test "parses twitter card with only property attributes" do
@@ -38,20 +45,35 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers2.html")
|> Floki.parse_document!()
- assert TwitterCard.parse(html, %{}) ==
- %{
- "card" => "summary_large_image",
- "description" =>
- "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
- "image" =>
- "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
- "image:alt" => "",
- "title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
- "url" =>
- "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
- "type" => "article"
- }
+ expected = %{
+ title:
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
+ twitter: %{
+ "twitter:card" => "summary_large_image",
+ "twitter:description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "twitter:image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+ "twitter:image:alt" => "",
+ "twitter:title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "twitter:url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
+ },
+ opengraph: %{
+ "og:description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "og:image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+ "og:title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "og:url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "og:type" => "article"
+ }
+ }
+
+ assert TwitterCard.parse(html, %{}) == expected
end
test "parses twitter card with name & property attributes" do
@@ -59,24 +81,39 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
|> Floki.parse_document!()
- assert TwitterCard.parse(html, %{}) ==
- %{
- "app:id:googleplay" => "com.nytimes.android",
- "app:name:googleplay" => "NYTimes",
- "app:url:googleplay" => "nytimes://reader/id/100000006583622",
- "card" => "summary_large_image",
- "description" =>
- "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
- "image" =>
- "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
- "image:alt" => "",
- "site" => nil,
- "title" =>
- "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
- "url" =>
- "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
- "type" => "article"
- }
+ expected = %{
+ title:
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
+ twitter: %{
+ "twitter:app:id:googleplay" => "com.nytimes.android",
+ "twitter:app:name:googleplay" => "NYTimes",
+ "twitter:app:url:googleplay" => "nytimes://reader/id/100000006583622",
+ "twitter:card" => "summary_large_image",
+ "twitter:description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "twitter:image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-videoSixteenByNineJumbo1600.jpg",
+ "twitter:image:alt" => "",
+ "twitter:site" => nil,
+ "twitter:title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "twitter:url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
+ },
+ opengraph: %{
+ "og:description" =>
+ "With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
+ "og:image" =>
+ "https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
+ "og:title" =>
+ "She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
+ "og:url" =>
+ "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
+ "og:type" => "article"
+ }
+ }
+
+ assert TwitterCard.parse(html, %{}) == expected
end
test "respect only first title tag on the page" do