summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gleason <alex@alexgleason.me>2021-05-04 15:42:24 -0500
committerAlex Gleason <alex@alexgleason.me>2021-05-04 15:42:24 -0500
commitebeb9c6bc97764dca46f28d2ad1e7b65ee2fbcd3 (patch)
tree85b7522ad93fe82fe925d426e05aa043577a343b
parent8c4599c1dd9430a29c610413e2a5a623b46e4446 (diff)
Refactor rich media parser to store %Embed{} instead of %Card{}
-rw-r--r--lib/pleroma/web/mastodon_api/views/status_view.ex9
-rw-r--r--lib/pleroma/web/rich_media/helpers.ex21
-rw-r--r--lib/pleroma/web/rich_media/parser.ex43
-rw-r--r--lib/pleroma/web/rich_media/parser/card.ex107
-rw-r--r--lib/pleroma/web/rich_media/parser/embed.ex10
5 files changed, 109 insertions, 81 deletions
diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex
index 5eb09f0a1..ecf81b65d 100644
--- a/lib/pleroma/web/mastodon_api/views/status_view.ex
+++ b/lib/pleroma/web/mastodon_api/views/status_view.ex
@@ -20,6 +20,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
alias Pleroma.Web.MastodonAPI.StatusView
alias Pleroma.Web.MediaProxy
alias Pleroma.Web.PleromaAPI.EmojiReactionController
+ alias Pleroma.Web.RichMedia.Parser.Card
+ alias Pleroma.Web.RichMedia.Parser.Embed
import Pleroma.Web.ActivityPub.Visibility, only: [get_visibility: 1, visible_for_user?: 2]
@@ -367,10 +369,13 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
nil
end
- def render("card.json", %{rich_media: rich_media, page_url: _page_url}) do
- rich_media
+ def render("card.json", %Embed{url: _, meta: _} = embed) do
+ embed
+ |> Card.parse()
+ |> Card.to_map()
end
+ def render("card.json", %Card{} = card), do: Card.to_map(card)
def render("card.json", _), do: nil
def render("attachment.json", %{attachment: attachment}) do
diff --git a/lib/pleroma/web/rich_media/helpers.ex b/lib/pleroma/web/rich_media/helpers.ex
index 249730aea..7021c70ff 100644
--- a/lib/pleroma/web/rich_media/helpers.ex
+++ b/lib/pleroma/web/rich_media/helpers.ex
@@ -8,7 +8,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
alias Pleroma.HTML
alias Pleroma.Object
alias Pleroma.Web.RichMedia.Parser
- alias Pleroma.Web.RichMedia.Parser.Card
+ alias Pleroma.Web.RichMedia.Parser.Embed
@options [
pool: :media,
@@ -58,26 +58,15 @@ defmodule Pleroma.Web.RichMedia.Helpers do
|> hd
end
- defp strip_card(%Card{} = card) do
- card
- |> Map.from_struct()
- |> Map.new(fn {k, v} -> {Atom.to_string(k), v} end)
- end
-
- defp strip_card(%{} = card) do
- Map.new(card, fn {k, v} -> {Atom.to_string(k), v} end)
- end
-
def fetch_data_for_object(object) do
with true <- Config.get([:rich_media, :enabled]),
{:ok, page_url} <-
HTML.extract_first_external_url_from_object(object),
:ok <- validate_page_url(page_url),
- {:ok, rich_media} <- Parser.parse(page_url),
- rich_media <- strip_card(rich_media) do
- %{page_url: page_url, rich_media: rich_media}
+ {:ok, %Embed{} = embed} <- Parser.parse(page_url) do
+ embed
else
- _ -> %{}
+ _ -> nil
end
end
@@ -86,7 +75,7 @@ defmodule Pleroma.Web.RichMedia.Helpers do
%Object{} = object <- Object.normalize(activity, fetch: false) do
fetch_data_for_object(object)
else
- _ -> %{}
+ _ -> nil
end
end
diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex
index d628513be..0eaf3274e 100644
--- a/lib/pleroma/web/rich_media/parser.ex
+++ b/lib/pleroma/web/rich_media/parser.ex
@@ -5,6 +5,7 @@
defmodule Pleroma.Web.RichMedia.Parser do
require Logger
alias Pleroma.Web.RichMedia.Parser.Card
+ alias Pleroma.Web.RichMedia.Parser.Embed
@cachex Pleroma.Config.get([:cachex, :provider], Cachex)
@@ -133,7 +134,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
def parse_url(url) do
case maybe_fetch_oembed(url) do
- {:ok, %Card{} = card} -> {:ok, card}
+ {:ok, %Embed{} = embed} -> {:ok, embed}
_ -> fetch_document(url)
end
end
@@ -143,8 +144,9 @@ defmodule Pleroma.Web.RichMedia.Parser do
{:ok, %Tesla.Env{body: json}} <-
Pleroma.Web.RichMedia.Helpers.oembed_get(oembed_url),
{:ok, data} <- Jason.decode(json),
- %Card{} = card <- Card.from_oembed(data, url) do
- {:ok, card}
+ embed <- %Embed{url: url, oembed: data},
+ {:ok, %Card{}} <- Card.validate(embed) do
+ {:ok, embed}
else
{:error, error} -> {:error, error}
error -> {:error, error}
@@ -153,36 +155,19 @@ defmodule Pleroma.Web.RichMedia.Parser do
defp fetch_document(url) do
with {:ok, %Tesla.Env{body: html}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url),
- {:ok, html} <- Floki.parse_document(html) do
- html
- |> maybe_parse()
- |> Map.put("url", url)
- |> clean_parsed_data()
- |> Card.from_discovery(url)
- |> check_card()
+ {:ok, html} <- Floki.parse_document(html),
+ %Embed{} = embed <- parse_embed(html, url),
+ {:ok, %Card{}} <- Card.validate(embed) do
+ {:ok, embed}
+ else
+ {:error, error} -> {:error, error}
+ error -> {:error, error}
end
end
- defp maybe_parse(html) do
- Enum.reduce(parsers(), %{}, fn parser, acc ->
+ defp parse_embed(html, url) do
+ Enum.reduce(parsers(), %Embed{url: url}, fn parser, acc ->
parser.parse(html, acc)
end)
end
-
- defp check_card(%Card{title: title} = card)
- when is_binary(title) and title != "" do
- {:ok, card}
- end
-
- defp check_card(card) do
- {:error, {:invalid_metadata, card}}
- end
-
- defp clean_parsed_data(data) do
- data
- |> Enum.reject(fn {key, val} ->
- not match?({:ok, _}, Jason.encode(%{key => val}))
- end)
- |> Map.new()
- end
end
diff --git a/lib/pleroma/web/rich_media/parser/card.ex b/lib/pleroma/web/rich_media/parser/card.ex
index da0acc789..399b41195 100644
--- a/lib/pleroma/web/rich_media/parser/card.ex
+++ b/lib/pleroma/web/rich_media/parser/card.ex
@@ -3,6 +3,9 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parser.Card do
+ alias Pleroma.Web.RichMedia.Parser.Card
+ alias Pleroma.Web.RichMedia.Parser.Embed
+
@types ["link", "photo", "video", "rich"]
# https://docs.joinmastodon.org/entities/card/
@@ -21,12 +24,13 @@ defmodule Pleroma.Web.RichMedia.Parser.Card do
embed_url: "",
blurhash: nil
- def from_oembed(%{"type" => type, "title" => title} = oembed, url) when type in @types do
- %__MODULE__{
+ def parse(%{url: url, oembed: %{"type" => type, "title" => title} = oembed} = embed)
+ when type in @types do
+ %Card{
url: url,
title: title,
- description: "",
- type: type,
+ description: get_description(embed),
+ type: oembed["type"],
author_name: oembed["author_name"],
author_url: oembed["author_url"],
provider_name: oembed["provider_name"],
@@ -39,39 +43,74 @@ defmodule Pleroma.Web.RichMedia.Parser.Card do
}
end
- def from_oembed(_oembed, _url), do: nil
-
- def from_discovery(%{"type" => "link"} = rich_media, page_url) do
- page_url_data = URI.parse(page_url)
-
- page_url_data =
- if is_binary(rich_media["url"]) do
- URI.merge(page_url_data, URI.parse(rich_media["url"]))
- else
- page_url_data
- end
-
- page_url = page_url_data |> to_string
-
- image_url =
- if is_binary(rich_media["image"]) do
- URI.merge(page_url_data, URI.parse(rich_media["image"]))
- |> to_string
- end
-
- %__MODULE__{
- type: "link",
- provider_name: page_url_data.host,
- provider_url: page_url_data.scheme <> "://" <> page_url_data.host,
- url: page_url,
- image: image_url |> proxy(),
- title: rich_media["title"] || "",
- description: rich_media["description"] || ""
- }
+ def parse(%{url: url} = embed) do
+ title = get_title(embed)
+
+ if is_binary(title) do
+ %Card{
+ url: url,
+ title: title,
+ description: get_description(embed),
+ type: "link",
+ image: get_image(embed) |> proxy()
+ }
+ else
+ nil
+ end
+ end
+
+ def parse(_), do: nil
+
+ defp get_title(embed) do
+ case embed do
+ %{meta: %{"twitter:title" => title}} when is_binary(title) and title != "" -> title
+ %{meta: %{"og:title" => title}} when is_binary(title) and title != "" -> title
+ %{title: title} when is_binary(title) and title != "" -> title
+ _ -> ""
+ end
+ end
+
+ defp get_description(%{meta: meta}) do
+ case meta do
+ %{"twitter:description" => desc} when is_binary(desc) and desc != "" -> desc
+ %{"og:description" => desc} when is_binary(desc) and desc != "" -> desc
+ %{"description" => desc} when is_binary(desc) and desc != "" -> desc
+ _ -> ""
+ end
+ end
+
+ defp get_image(%{meta: meta}) do
+ case meta do
+ %{"twitter:image" => image} when is_binary(image) and image != "" -> image
+ %{"og:image" => image} when is_binary(image) and image != "" -> image
+ _ -> ""
+ end
+ end
+
+ def to_map(%Card{} = card) do
+ card
+ |> Map.from_struct()
+ |> stringify_keys()
end
- def from_discovery(rich_media, url), do: from_oembed(rich_media, url)
+ def to_map(%{} = card), do: stringify_keys(card)
+
+ defp stringify_keys(%{} = map), do: Map.new(map, fn {k, v} -> {Atom.to_string(k), v} end)
defp proxy(url) when is_binary(url), do: Pleroma.Web.MediaProxy.url(url)
defp proxy(_), do: nil
+
+ def validate(%Card{type: type, title: title} = card)
+ when type in @types and is_binary(title) and title != "" do
+ {:ok, card}
+ end
+
+ def validate(%Embed{} = embed) do
+ case Card.parse(embed) do
+ %Card{} = card -> validate(card)
+ card -> {:error, {:invalid_metadata, card}}
+ end
+ end
+
+ def validate(card), do: {:error, {:invalid_metadata, card}}
end
diff --git a/lib/pleroma/web/rich_media/parser/embed.ex b/lib/pleroma/web/rich_media/parser/embed.ex
new file mode 100644
index 000000000..509e557e6
--- /dev/null
+++ b/lib/pleroma/web/rich_media/parser/embed.ex
@@ -0,0 +1,10 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.RichMedia.Parser.Embed do
+ @moduledoc """
+ Represents embedded content, including scraped markup and OEmbed.
+ """
+ defstruct url: nil, meta: nil, oembed: nil
+end