summaryrefslogtreecommitdiff
path: root/lib/pleroma/web/rich_media
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pleroma/web/rich_media')
-rw-r--r--lib/pleroma/web/rich_media/helpers.ex40
-rw-r--r--lib/pleroma/web/rich_media/parser.ex13
-rw-r--r--lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex25
-rw-r--r--lib/pleroma/web/rich_media/parsers/oembed_parser.ex6
-rw-r--r--lib/pleroma/web/rich_media/parsers/ogp.ex11
-rw-r--r--lib/pleroma/web/rich_media/parsers/twitter_card.ex15
6 files changed, 57 insertions, 53 deletions
diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex
index 1729141e9..6210f2c5a 100644
--- a/lib/pleroma/web/rich_media/helpers.ex
+++ b/lib/pleroma/web/rich_media/helpers.ex
@@ -9,12 +9,17 @@ defmodule Pleroma.Web.RichMedia.Helpers do
alias Pleroma.Object
alias Pleroma.Web.RichMedia.Parser
+ @rich_media_options [
+ pool: :media,
+ max_body: 2_000_000
+ ]
+
@spec validate_page_url(URI.t() | binary()) :: :ok | :error
defp validate_page_url(page_url) when is_binary(page_url) do
- validate_tld = Application.get_env(:auto_linker, :opts)[:validate_tld]
+ validate_tld = Pleroma.Config.get([Pleroma.Formatter, :validate_tld])
page_url
- |> AutoLinker.Parser.url?(scheme: true, validate_tld: validate_tld)
+ |> Linkify.Parser.url?(validate_tld: validate_tld)
|> parse_uri(page_url)
end
@@ -49,11 +54,11 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|> hd
end
- def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do
+ def fetch_data_for_object(object) do
with true <- Config.get([:rich_media, :enabled]),
- %Object{} = object <- Object.normalize(activity),
false <- object.data["sensitive"] || false,
- {:ok, page_url} <- HTML.extract_first_external_url(object, object.data["content"]),
+ {:ok, page_url} <-
+ HTML.extract_first_external_url(object, object.data["content"]),
:ok <- validate_page_url(page_url),
{:ok, rich_media} <- Parser.parse(page_url) do
%{page_url: page_url, rich_media: rich_media}
@@ -62,10 +67,35 @@ defmodule Pleroma.Web.RichMedia.Helpers do
end
end
+ def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do
+ with true <- Config.get([:rich_media, :enabled]),
+ %Object{} = object <- Object.normalize(activity) do
+ fetch_data_for_object(object)
+ else
+ _ -> %{}
+ end
+ end
+
def fetch_data_for_activity(_), do: %{}
def perform(:fetch, %Activity{} = activity) do
fetch_data_for_activity(activity)
:ok
end
+
+ def rich_media_get(url) do
+ headers = [{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}]
+
+ options =
+ if Application.get_env(:tesla, :adapter) == Tesla.Adapter.Hackney do
+ Keyword.merge(@rich_media_options,
+ recv_timeout: 2_000,
+ with_body: true
+ )
+ else
+ @rich_media_options
+ end
+
+ Pleroma.HTTP.get(url, headers, options)
+ end
end
diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex
index 7b45ecb9c..ca592833f 100644
--- a/lib/pleroma/web/rich_media/parser.ex
+++ b/lib/pleroma/web/rich_media/parser.ex
@@ -3,13 +3,6 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser do
- @hackney_options [
- pool: :media,
- recv_timeout: 2_000,
- max_body: 2_000_000,
- with_body: true
- ]
-
defp parsers do
Pleroma.Config.get([:rich_media, :parsers])
end
@@ -78,7 +71,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
defp parse_url(url) do
try do
- {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: @hackney_options)
+ {:ok, %Tesla.Env{body: html}} = Pleroma.Web.RichMedia.Helpers.rich_media_get(url)
html
|> parse_html()
@@ -97,8 +90,8 @@ defmodule Pleroma.Web.RichMedia.Parser do
defp maybe_parse(html) do
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
case parser.parse(html, acc) do
- {:ok, data} -> {:halt, data}
- {:error, _msg} -> {:cont, acc}
+ data when data != %{} -> {:halt, data}
+ _ -> {:cont, acc}
end
end)
end
diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
index 2762b5902..3d577e254 100644
--- a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
+++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
@@ -3,22 +3,15 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
- def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do
- meta_data =
- html
- |> get_elements(key_name, prefix)
- |> Enum.reduce(data, fn el, acc ->
- attributes = normalize_attributes(el, prefix, key_name, value_name)
-
- Map.merge(acc, attributes)
- end)
- |> maybe_put_title(html)
-
- if Enum.empty?(meta_data) do
- {:error, error_message}
- else
- {:ok, meta_data}
- end
+ def parse(data, html, prefix, key_name, value_name \\ "content") do
+ html
+ |> get_elements(key_name, prefix)
+ |> Enum.reduce(data, fn el, acc ->
+ attributes = normalize_attributes(el, prefix, key_name, value_name)
+
+ Map.merge(acc, attributes)
+ end)
+ |> maybe_put_title(html)
end
defp get_elements(html, key_name, prefix) do
diff --git a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
index db8ccf15d..1fe6729c3 100644
--- a/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
+++ b/lib/pleroma/web/rich_media/parsers/oembed_parser.ex
@@ -7,9 +7,9 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
with elements = [_ | _] <- get_discovery_data(html),
oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
{:ok, oembed_data} <- get_oembed_data(oembed_url) do
- {:ok, oembed_data}
+ oembed_data
else
- _e -> {:error, "No OEmbed data found"}
+ _e -> %{}
end
end
@@ -22,7 +22,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
end
defp get_oembed_data(url) do
- with {:ok, %Tesla.Env{body: json}} <- Pleroma.HTTP.get(url, [], adapter: [pool: :media]) do
+ with {:ok, %Tesla.Env{body: json}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url) do
Jason.decode(json)
end
end
diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex
index 3e9012588..b3b3b059c 100644
--- a/lib/pleroma/web/rich_media/parsers/ogp.ex
+++ b/lib/pleroma/web/rich_media/parsers/ogp.ex
@@ -3,13 +3,8 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.OGP do
- def parse(html, data) do
- Pleroma.Web.RichMedia.Parsers.MetaTagsParser.parse(
- html,
- data,
- "og",
- "No OGP metadata found",
- "property"
- )
+ @deprecated "OGP parser is deprecated. Use TwitterCard instead."
+ def parse(_html, _data) do
+ %{}
end
end
diff --git a/lib/pleroma/web/rich_media/parsers/twitter_card.ex b/lib/pleroma/web/rich_media/parsers/twitter_card.ex
index 09d4b526e..4a04865d2 100644
--- a/lib/pleroma/web/rich_media/parsers/twitter_card.ex
+++ b/lib/pleroma/web/rich_media/parsers/twitter_card.ex
@@ -5,18 +5,11 @@
defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
- @spec parse(String.t(), map()) :: {:ok, map()} | {:error, String.t()}
+ @spec parse(list(), map()) :: map()
def parse(html, data) do
data
- |> parse_name_attrs(html)
- |> parse_property_attrs(html)
- end
-
- defp parse_name_attrs(data, html) do
- MetaTagsParser.parse(html, data, "twitter", %{}, "name")
- end
-
- defp parse_property_attrs({_, data}, html) do
- MetaTagsParser.parse(html, data, "twitter", "No twitter card metadata found", "property")
+ |> MetaTagsParser.parse(html, "og", "property")
+ |> MetaTagsParser.parse(html, "twitter", "name")
+ |> MetaTagsParser.parse(html, "twitter", "property")
end
end