summaryrefslogtreecommitdiff
path: root/lib/pleroma/web/rich_media/helpers.ex
blob: b7852c6e3fec025ec84bbe42511b4085ed11a6f7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# Pleroma: A lightweight social networking server
# Copyright _ 2017-2020 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only

defmodule Pleroma.Web.RichMedia.Helpers do
  alias Pleroma.Activity
  alias Pleroma.Config
  alias Pleroma.HTML
  alias Pleroma.Object
  alias Pleroma.Web.RichMedia.Parser

  @rich_media_options [
    pool: :media,
    max_body: 2_000_000
  ]

  @spec validate_page_url(URI.t() | binary()) :: :ok | :error
  defp validate_page_url(page_url) when is_binary(page_url) do
    validate_tld = Pleroma.Config.get([Pleroma.Formatter, :validate_tld])

    page_url
    |> Linkify.Parser.url?(validate_tld: validate_tld)
    |> parse_uri(page_url)
  end

  defp validate_page_url(%URI{host: host, scheme: "https", authority: authority})
       when is_binary(authority) do
    cond do
      host in Config.get([:rich_media, :ignore_hosts], []) ->
        :error

      get_tld(host) in Config.get([:rich_media, :ignore_tld], []) ->
        :error

      true ->
        :ok
    end
  end

  defp validate_page_url(_), do: :error

  defp parse_uri(true, url) do
    url
    |> URI.parse()
    |> validate_page_url
  end

  defp parse_uri(_, _), do: :error

  defp get_tld(host) do
    host
    |> String.split(".")
    |> Enum.reverse()
    |> hd
  end

  def fetch_data_for_object(object) do
    with true <- Config.get([:rich_media, :enabled]),
         false <- object.data["sensitive"] || false,
         {:ok, page_url} <-
           HTML.extract_first_external_url_from_object(object),
         :ok <- validate_page_url(page_url),
         {:ok, rich_media} <- Parser.parse(page_url) do
      %{page_url: page_url, rich_media: rich_media}
    else
      _ -> %{}
    end
  end

  def fetch_data_for_activity(%Activity{data: %{"type" => "Create"}} = activity) do
    with true <- Config.get([:rich_media, :enabled]),
         %Object{} = object <- Object.normalize(activity) do
      fetch_data_for_object(object)
    else
      _ -> %{}
    end
  end

  def fetch_data_for_activity(_), do: %{}

  def perform(:fetch, %Activity{} = activity) do
    fetch_data_for_activity(activity)
    :ok
  end

  def rich_media_get(url) do
    headers = [{"user-agent", Pleroma.Application.user_agent() <> "; Bot"}]

    options =
      if Application.get_env(:tesla, :adapter) == Tesla.Adapter.Hackney do
        Keyword.merge(@rich_media_options,
          recv_timeout: 2_000,
          with_body: true
        )
      else
        @rich_media_options
      end

    head_check =
      case Pleroma.HTTP.head(url, headers, adapter: options) do
        # If the HEAD request didn't reach the server for whatever reason,
        # we assume the GET that comes right after won't either
        {:error, _} = e ->
          e

        {:ok, %Tesla.Env{status: 200, headers: headers}} ->
          with :ok <- check_content_type(headers),
               :ok <- check_content_length(headers),
               do: :ok

        _ ->
          :ok
      end

    with :ok <- head_check, do: Pleroma.HTTP.get(url, headers, adapter: options)
  end

  defp check_content_type(headers) do
    case List.keyfind(headers, "content-type", 0) do
      {_, content_type} ->
        case Plug.Conn.Utils.media_type(content_type) do
          {:ok, "text", "html", _} -> :ok
          _ -> {:error, {:content_type, content_type}}
        end

      _ ->
        :ok
    end
  end

  @max_body @rich_media_options[:max_body]
  defp check_content_length(headers) do
    case List.keyfind(headers, "content-length", 0) do
      {_, maybe_content_length} ->
        case Integer.parse(maybe_content_length) do
          {content_length, ""} when content_length <= @max_body -> :ok
          {_, ""} -> {:error, :body_too_large}
          _ -> :ok
        end

      _ ->
        :ok
    end
  end
end