Update usage of Floki.find/2 ensuring we always pass data from Floki.parse_document/1 first

author: Mark Felder <feld@FreeBSD.org> 2020-07-31 17:59:29 -0500
committer: Mark Felder <feld@FreeBSD.org> 2020-07-31 17:59:29 -0500
commit: 5710ff391f516e81cc47539634aae49c8e256147 (patch)
tree: 1e749ac7c3144d2211f27abda6289e03f0db1c45
parent: 8ca993c789fb3d41450419fc49bfa22542368ba8 (diff)
4 files changed, 8 insertions, 20 deletions
diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex
index c8a767935..ef2f78e3f 100644
--- a/lib/pleroma/web/rich_media/parser.ex
+++ b/lib/pleroma/web/rich_media/parser.ex
@@ -92,7 +92,6 @@ defmodule Pleroma.Web.RichMedia.Parser do
         Pleroma.HTTP.get(url, [{"user-agent", rich_media_agent}], adapter: opts)
 
       html
-      |> parse_html()
       |> maybe_parse()
       |> Map.put("url", url)
       |> clean_parsed_data()
@@ -103,8 +102,6 @@ defmodule Pleroma.Web.RichMedia.Parser do
     end
   end
 
-  defp parse_html(html), do: Floki.parse_document!(html)
-
   defp maybe_parse(html) do
     Enum.reduce_while(parsers(), %{}, fn parser, acc ->
       case parser.parse(html, acc) do
diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
index 3d577e254..589d81f01 100644
--- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
+++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
@@ -15,7 +15,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
   end
 
   defp get_elements(html, key_name, prefix) do
-    html |> Floki.find("meta[#{key_name}^='#{prefix}:']")
+    Floki.parse_document!(html) |> Floki.find("meta[#{key_name}^='#{prefix}:']")
   end
 
   defp normalize_attributes(html_node, prefix, key_name, value_name) do
@@ -41,6 +41,6 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
   defp maybe_put_title(meta, _), do: meta
 
   defp get_page_title(html) do
-    Floki.find(html, "html head title") |> List.first() |> Floki.text()
+    Floki.parse_document!(html) |> Floki.find("html head title") |> List.first() |> Floki.text()
   end
 end
diff --git a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
index 6bdeac89c..2f1428529 100644
--- a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
+++ b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
@@ -14,7 +14,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
   end
 
   defp get_discovery_data(html) do
-    html |> Floki.find("link[type='application/json+oembed']")
+    Floki.parse_document!(html) |> Floki.find("link[type='application/json+oembed']")
   end
 
   defp get_oembed_url([{"link", attributes, _children} | _]) do
diff --git a/test/web/rich_media/parsers/twitter_card_test.exs b/test/web/rich_media/parsers/twitter_card_test.exs
index 219f005a2..edce5078e 100644
--- a/test/web/rich_media/parsers/twitter_card_test.exs
+++ b/test/web/rich_media/parsers/twitter_card_test.exs
@@ -11,9 +11,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
   end
 
   test "parses twitter card with only name attributes" do
-    html =
-      File.read!("test/fixtures/nypd-facial-recognition-children-teenagers3.html")
-      |> Floki.parse_document!()
+    html = File.read!("test/fixtures/nypd-facial-recognition-children-teenagers3.html")
 
     assert TwitterCard.parse(html, %{}) ==
              %{
@@ -34,9 +32,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
   end
 
   test "parses twitter card with only property attributes" do
-    html =
-      File.read!("test/fixtures/nypd-facial-recognition-children-teenagers2.html")
-      |> Floki.parse_document!()
+    html = File.read!("test/fixtures/nypd-facial-recognition-children-teenagers2.html")
 
     assert TwitterCard.parse(html, %{}) ==
              %{
@@ -55,9 +51,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
   end
 
   test "parses twitter card with name & property attributes" do
-    html =
-      File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
-      |> Floki.parse_document!()
+    html = File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
 
     assert TwitterCard.parse(html, %{}) ==
              %{
@@ -85,8 +79,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
         "YTQ5MF9EQVIgZXhodW1hdGlvbiBvZiBNYXJnYXJldCBDb3JiaW4gZ3JhdmUgMTkyNi5qcGciXSxbInAiLCJjb252ZXJ0IiwiIl0sWyJwIiwiY29udmVydCIsIi1xdWFsaXR5IDgxIC1hdXRvLW9" <>
         "yaWVudCJdLFsicCIsInRodW1iIiwiNjAweD4iXV0/DAR%20exhumation%20of%20Margaret%20Corbin%20grave%201926.jpg"
 
-    html =
-      File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
+    html = File.read!("test/fixtures/margaret-corbin-grave-west-point.html")
 
     assert TwitterCard.parse(html, %{}) ==
              %{
@@ -103,9 +96,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
   end
 
   test "takes first founded title in html head if there is html markup error" do
-    html =
-      File.read!("test/fixtures/nypd-facial-recognition-children-teenagers4.html")
-      |> Floki.parse_document!()
+    html = File.read!("test/fixtures/nypd-facial-recognition-children-teenagers4.html")
 
     assert TwitterCard.parse(html, %{}) ==
              %{
author	Mark Felder <feld@FreeBSD.org>	2020-07-31 17:59:29 -0500
committer	Mark Felder <feld@FreeBSD.org>	2020-07-31 17:59:29 -0500
commit	5710ff391f516e81cc47539634aae49c8e256147 (patch)
tree	1e749ac7c3144d2211f27abda6289e03f0db1c45
parent	8ca993c789fb3d41450419fc49bfa22542368ba8 (diff)