summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrinpatch <rin@patch.cx>2021-03-15 09:35:46 +0000
committerrinpatch <rin@patch.cx>2021-03-15 09:35:46 +0000
commit8194622a72d863e71428bf7dba9a2a962b163d4e (patch)
treecd10436a46df60542201f8c60e68aa936f771ad9
parent19fbe5b860789fb6f4958de71f9ca3ed655ee00d (diff)
parentcb734566093f406fc3db12de2408fc166486f417 (diff)
Merge branch 'feature/object-hashtags-rework' into 'develop'
Hashtags extraction from objects. Background migration infrastructure. Closes #1840 and #2455 See merge request pleroma/pleroma!3213
-rw-r--r--CHANGELOG.md1
-rw-r--r--config/config.exs4
-rw-r--r--config/description.exs36
-rw-r--r--docs/configuration/cheatsheet.md7
-rw-r--r--lib/mix/tasks/pleroma/database.ex31
-rw-r--r--lib/pleroma/activity.ex2
-rw-r--r--lib/pleroma/activity/ir/topics.ex10
-rw-r--r--lib/pleroma/application.ex10
-rw-r--r--lib/pleroma/config.ex4
-rw-r--r--lib/pleroma/data_migration.ex45
-rw-r--r--lib/pleroma/delivery.ex1
-rw-r--r--lib/pleroma/ecto_enums.ex8
-rw-r--r--lib/pleroma/hashtag.ex106
-rw-r--r--lib/pleroma/migrators/hashtags_table_migrator.ex208
-rw-r--r--lib/pleroma/migrators/support/base_migrator.ex210
-rw-r--r--lib/pleroma/migrators/support/base_migrator_state.ex117
-rw-r--r--lib/pleroma/object.ex84
-rw-r--r--lib/pleroma/pagination.ex3
-rw-r--r--lib/pleroma/repo.ex8
-rw-r--r--lib/pleroma/web/activity_pub/activity_pub.ex255
-rw-r--r--lib/pleroma/web/activity_pub/mrf/simple_policy.ex8
-rw-r--r--lib/pleroma/web/activity_pub/transmogrifier.ex29
-rw-r--r--lib/pleroma/web/feed/feed_view.ex1
-rw-r--r--lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex43
-rw-r--r--lib/pleroma/web/mastodon_api/views/status_view.ex6
-rw-r--r--lib/pleroma/web/templates/feed/feed/_activity.atom.eex2
-rw-r--r--lib/pleroma/web/templates/feed/feed/_activity.rss.eex2
-rw-r--r--lib/pleroma/web/templates/feed/feed/_tag_activity.atom.eex2
-rw-r--r--mix.exs1
-rw-r--r--mix.lock1
-rw-r--r--priv/repo/migrations/20190711042021_create_safe_jsonb_set.exs2
-rw-r--r--priv/repo/migrations/20201221202251_create_hashtags.exs13
-rw-r--r--priv/repo/migrations/20201221202252_remove_data_from_hashtags.exs15
-rw-r--r--priv/repo/migrations/20201221203824_create_hashtags_objects.exs13
-rw-r--r--priv/repo/migrations/20210105195018_create_data_migrations.exs17
-rw-r--r--priv/repo/migrations/20210106183301_data_migration_create_populate_hashtags_table.exs16
-rw-r--r--priv/repo/migrations/20210111172254_create_data_migration_failed_ids.exs14
-rw-r--r--priv/repo/migrations/20210222183840_remove_hashtags_objects_duplicate_index.exs11
-rw-r--r--priv/repo/migrations/20210222184616_change_hashtags_name_to_text.exs15
-rw-r--r--test/pleroma/activity/ir/topics_test.exs15
-rw-r--r--test/pleroma/hashtag_test.exs17
-rw-r--r--test/pleroma/object_test.exs27
-rw-r--r--test/pleroma/web/activity_pub/activity_pub_test.exs72
-rw-r--r--test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs6
-rw-r--r--test/pleroma/web/common_api_test.exs2
-rw-r--r--test/pleroma/web/mastodon_api/views/status_view_test.exs4
46 files changed, 1344 insertions, 160 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 50484aaef..87eb7b48f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,6 +50,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Pleroma API: Reroute `/api/pleroma/*` to `/api/v1/pleroma/*`
</details>
+- Improved hashtag timeline performance (requires a background migration).
### Added
diff --git a/config/config.exs b/config/config.exs
index 66aee3264..23ada389b 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -654,6 +654,10 @@ config :pleroma, :oauth2,
config :pleroma, :database, rum_enabled: false
+config :pleroma, :features, improved_hashtag_timeline: :auto
+
+config :pleroma, :populate_hashtags_table, fault_rate_allowance: 0.01
+
config :pleroma, :env, Mix.env()
config :http_signatures,
diff --git a/config/description.exs b/config/description.exs
index d9b15e684..41e5e4056 100644
--- a/config/description.exs
+++ b/config/description.exs
@@ -461,6 +461,42 @@ config :pleroma, :config_description, [
},
%{
group: :pleroma,
+ key: :features,
+ type: :group,
+ description: "Customizable features",
+ children: [
+ %{
+ key: :improved_hashtag_timeline,
+ type: {:dropdown, :atom},
+ description:
+ "Setting to force toggle / force disable improved hashtags timeline. `:enabled` forces hashtags to be fetched from `hashtags` table for hashtags timeline. `:disabled` forces object-embedded hashtags to be used (slower). Keep it `:auto` for automatic behaviour (it is auto-set to `:enabled` [unless overridden] when HashtagsTableMigrator completes).",
+ suggestions: [:auto, :enabled, :disabled]
+ }
+ ]
+ },
+ %{
+ group: :pleroma,
+ key: :populate_hashtags_table,
+ type: :group,
+ description: "`populate_hashtags_table` background migration settings",
+ children: [
+ %{
+ key: :fault_rate_allowance,
+ type: :float,
+ description:
+ "Max accepted rate of objects that failed in the migration. Any value from 0.0 which tolerates no errors to 1.0 which will enable the feature even if hashtags transfer failed for all records.",
+ suggestions: [0.01]
+ },
+ %{
+ key: :sleep_interval_ms,
+ type: :integer,
+ description:
+ "Sleep interval between each chunk of processed records in order to decrease the load on the system (defaults to 0 and should be keep default on most instances)."
+ }
+ ]
+ },
+ %{
+ group: :pleroma,
key: :instance,
type: :group,
description: "Instance-related settings",
diff --git a/docs/configuration/cheatsheet.md b/docs/configuration/cheatsheet.md
index 028c5e91d..d085f0f44 100644
--- a/docs/configuration/cheatsheet.md
+++ b/docs/configuration/cheatsheet.md
@@ -65,6 +65,13 @@ To add configuration to your config file, you can copy it from the base config.
* `show_reactions`: Let favourites and emoji reactions be viewed through the API (default: `true`).
* `password_reset_token_validity`: The time after which reset tokens aren't accepted anymore, in seconds (default: one day).
+## :database
+* `improved_hashtag_timeline`: Setting to force toggle / force disable improved hashtags timeline. `:enabled` forces hashtags to be fetched from `hashtags` table for hashtags timeline. `:disabled` forces object-embedded hashtags to be used (slower). Keep it `:auto` for automatic behaviour (it is auto-set to `:enabled` [unless overridden] when HashtagsTableMigrator completes).
+
+## Background migrations
+* `populate_hashtags_table/sleep_interval_ms`: Sleep interval between each chunk of processed records in order to decrease the load on the system (defaults to 0 and should be keep default on most instances).
+* `populate_hashtags_table/fault_rate_allowance`: Max rate of failed objects to actually processed objects in order to enable the feature (any value from 0.0 which tolerates no errors to 1.0 which will enable the feature even if hashtags transfer failed for all records).
+
## Welcome
* `direct_message`: - welcome message sent as a direct message.
* `enabled`: Enables the send a direct message to a newly registered user. Defaults to `false`.
diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex
index 2403ed581..e7f4b67a4 100644
--- a/lib/mix/tasks/pleroma/database.ex
+++ b/lib/mix/tasks/pleroma/database.ex
@@ -8,10 +8,13 @@ defmodule Mix.Tasks.Pleroma.Database do
alias Pleroma.Object
alias Pleroma.Repo
alias Pleroma.User
+
require Logger
require Pleroma.Constants
+
import Ecto.Query
import Mix.Pleroma
+
use Mix.Task
@shortdoc "A collection of database related tasks"
@@ -214,4 +217,32 @@ defmodule Mix.Tasks.Pleroma.Database do
shell_info('Done.')
end
end
+
+ # Rolls back a specific migration (leaving subsequent migrations applied).
+ # WARNING: imposes a risk of unrecoverable data loss — proceed at your own responsibility.
+ # Based on https://stackoverflow.com/a/53825840
+ def run(["rollback", version]) do
+ prompt = "SEVERE WARNING: this operation may result in unrecoverable data loss. Continue?"
+
+ if shell_prompt(prompt, "n") in ~w(Yn Y y) do
+ {_, result, _} =
+ Ecto.Migrator.with_repo(Pleroma.Repo, fn repo ->
+ version = String.to_integer(version)
+ re = ~r/^#{version}_.*\.exs/
+ path = Ecto.Migrator.migrations_path(repo)
+
+ with {_, "" <> file} <- {:find, Enum.find(File.ls!(path), &String.match?(&1, re))},
+ {_, [{mod, _} | _]} <- {:compile, Code.compile_file(Path.join(path, file))},
+ {_, :ok} <- {:rollback, Ecto.Migrator.down(repo, version, mod)} do
+ {:ok, "Reversed migration: #{file}"}
+ else
+ {:find, _} -> {:error, "No migration found with version prefix: #{version}"}
+ {:compile, e} -> {:error, "Problem compiling migration module: #{inspect(e)}"}
+ {:rollback, e} -> {:error, "Problem reversing migration: #{inspect(e)}"}
+ end
+ end)
+
+ shell_info(inspect(result))
+ end
+ end
end
diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex
index 6542e684e..d59403884 100644
--- a/lib/pleroma/activity.ex
+++ b/lib/pleroma/activity.ex
@@ -113,6 +113,7 @@ defmodule Pleroma.Activity do
from([a] in query,
left_join: b in Bookmark,
on: b.user_id == ^user.id and b.activity_id == a.id,
+ as: :bookmark,
preload: [bookmark: b]
)
end
@@ -123,6 +124,7 @@ defmodule Pleroma.Activity do
from([a] in query,
left_join: r in ReportNote,
on: a.id == r.activity_id,
+ as: :report_note,
preload: [report_notes: r]
)
end
diff --git a/lib/pleroma/activity/ir/topics.ex b/lib/pleroma/activity/ir/topics.ex
index d94395fc1..7a603a615 100644
--- a/lib/pleroma/activity/ir/topics.ex
+++ b/lib/pleroma/activity/ir/topics.ex
@@ -48,14 +48,12 @@ defmodule Pleroma.Activity.Ir.Topics do
tags
end
- defp hashtags_to_topics(%{data: %{"tag" => tags}}) do
- tags
- |> Enum.filter(&is_bitstring(&1))
- |> Enum.map(fn tag -> "hashtag:" <> tag end)
+ defp hashtags_to_topics(object) do
+ object
+ |> Object.hashtags()
+ |> Enum.map(fn hashtag -> "hashtag:" <> hashtag end)
end
- defp hashtags_to_topics(_), do: []
-
defp remote_topics(%{local: true}), do: []
defp remote_topics(%{actor: actor}) when is_binary(actor),
diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex
index c853a2bb4..06d399b2e 100644
--- a/lib/pleroma/application.ex
+++ b/lib/pleroma/application.ex
@@ -103,9 +103,7 @@ defmodule Pleroma.Application do
task_children(@mix_env) ++
dont_run_in_test(@mix_env) ++
chat_child(chat_enabled?()) ++
- [
- Pleroma.Gopher.Server
- ]
+ [Pleroma.Gopher.Server]
# See http://elixir-lang.org/docs/stable/elixir/Supervisor.html
# for other strategies and supported options
@@ -230,6 +228,12 @@ defmodule Pleroma.Application do
keys: :duplicate,
partitions: System.schedulers_online()
]}
+ ] ++ background_migrators()
+ end
+
+ defp background_migrators do
+ [
+ Pleroma.Migrators.HashtagsTableMigrator
]
end
diff --git a/lib/pleroma/config.ex b/lib/pleroma/config.ex
index 2e15a3719..54e332595 100644
--- a/lib/pleroma/config.ex
+++ b/lib/pleroma/config.ex
@@ -99,4 +99,8 @@ defmodule Pleroma.Config do
def oauth_consumer_strategies, do: get([:auth, :oauth_consumer_strategies], [])
def oauth_consumer_enabled?, do: oauth_consumer_strategies() != []
+
+ def feature_enabled?(feature_name) do
+ get([:features, feature_name]) not in [nil, false, :disabled, :auto]
+ end
end
diff --git a/lib/pleroma/data_migration.ex b/lib/pleroma/data_migration.ex
new file mode 100644
index 000000000..1377af16e
--- /dev/null
+++ b/lib/pleroma/data_migration.ex
@@ -0,0 +1,45 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.DataMigration do
+ use Ecto.Schema
+
+ alias Pleroma.DataMigration
+ alias Pleroma.DataMigration.State
+ alias Pleroma.Repo
+
+ import Ecto.Changeset
+ import Ecto.Query
+
+ schema "data_migrations" do
+ field(:name, :string)
+ field(:state, State, default: :pending)
+ field(:feature_lock, :boolean, default: false)
+ field(:params, :map, default: %{})
+ field(:data, :map, default: %{})
+
+ timestamps()
+ end
+
+ def changeset(data_migration, params \\ %{}) do
+ data_migration
+ |> cast(params, [:name, :state, :feature_lock, :params, :data])
+ |> validate_required([:name])
+ |> unique_constraint(:name)
+ end
+
+ def update_one_by_id(id, params \\ %{}) do
+ with {1, _} <-
+ from(dm in DataMigration, where: dm.id == ^id)
+ |> Repo.update_all(set: params) do
+ :ok
+ end
+ end
+
+ def get_by_name(name) do
+ Repo.get_by(DataMigration, name: name)
+ end
+
+ def populate_hashtags_table, do: get_by_name("populate_hashtags_table")
+end
diff --git a/lib/pleroma/delivery.ex b/lib/pleroma/delivery.ex
index e8d536767..511d5cf58 100644
--- a/lib/pleroma/delivery.ex
+++ b/lib/pleroma/delivery.ex
@@ -9,7 +9,6 @@ defmodule Pleroma.Delivery do
alias Pleroma.Object
alias Pleroma.Repo
alias Pleroma.User
- alias Pleroma.User
import Ecto.Changeset
import Ecto.Query
diff --git a/lib/pleroma/ecto_enums.ex b/lib/pleroma/ecto_enums.ex
index f198cccb7..2a9addabc 100644
--- a/lib/pleroma/ecto_enums.ex
+++ b/lib/pleroma/ecto_enums.ex
@@ -17,3 +17,11 @@ defenum(Pleroma.FollowingRelationship.State,
follow_accept: 2,
follow_reject: 3
)
+
+defenum(Pleroma.DataMigration.State,
+ pending: 1,
+ running: 2,
+ complete: 3,
+ failed: 4,
+ manual: 5
+)
diff --git a/lib/pleroma/hashtag.ex b/lib/pleroma/hashtag.ex
new file mode 100644
index 000000000..53e2e9c89
--- /dev/null
+++ b/lib/pleroma/hashtag.ex
@@ -0,0 +1,106 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Hashtag do
+ use Ecto.Schema
+
+ import Ecto.Changeset
+ import Ecto.Query
+
+ alias Ecto.Multi
+ alias Pleroma.Hashtag
+ alias Pleroma.Object
+ alias Pleroma.Repo
+
+ schema "hashtags" do
+ field(:name, :string)
+
+ many_to_many(:objects, Object, join_through: "hashtags_objects", on_replace: :delete)
+
+ timestamps()
+ end
+
+ def normalize_name(name) do
+ name
+ |> String.downcase()
+ |> String.trim()
+ end
+
+ def get_or_create_by_name(name) do
+ changeset = changeset(%Hashtag{}, %{name: name})
+
+ Repo.insert(
+ changeset,
+ on_conflict: [set: [name: get_field(changeset, :name)]],
+ conflict_target: :name,
+ returning: true
+ )
+ end
+
+ def get_or_create_by_names(names) when is_list(names) do
+ names = Enum.map(names, &normalize_name/1)
+ timestamp = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second)
+
+ structs =
+ Enum.map(names, fn name ->
+ %Hashtag{}
+ |> changeset(%{name: name})
+ |> Map.get(:changes)
+ |> Map.merge(%{inserted_at: timestamp, updated_at: timestamp})
+ end)
+
+ try do
+ with {:ok, %{query_op: hashtags}} <-
+ Multi.new()
+ |> Multi.insert_all(:insert_all_op, Hashtag, structs,
+ on_conflict: :nothing,
+ conflict_target: :name
+ )
+ |> Multi.run(:query_op, fn _repo, _changes ->
+ {:ok, Repo.all(from(ht in Hashtag, where: ht.name in ^names))}
+ end)
+ |> Repo.transaction() do
+ {:ok, hashtags}
+ else
+ {:error, _name, value, _changes_so_far} -> {:error, value}
+ end
+ rescue
+ e -> {:error, e}
+ end
+ end
+
+ def changeset(%Hashtag{} = struct, params) do
+ struct
+ |> cast(params, [:name])
+ |> update_change(:name, &normalize_name/1)
+ |> validate_required([:name])
+ |> unique_constraint(:name)
+ end
+
+ def unlink(%Object{id: object_id}) do
+ with {_, hashtag_ids} <-
+ from(hto in "hashtags_objects",
+ where: hto.object_id == ^object_id,
+ select: hto.hashtag_id
+ )
+ |> Repo.delete_all(),
+ {:ok, unreferenced_count} <- delete_unreferenced(hashtag_ids) do
+ {:ok, length(hashtag_ids), unreferenced_count}
+ end
+ end
+
+ @delete_unreferenced_query """
+ DELETE FROM hashtags WHERE id IN
+ (SELECT hashtags.id FROM hashtags
+ LEFT OUTER JOIN hashtags_objects
+ ON hashtags_objects.hashtag_id = hashtags.id
+ WHERE hashtags_objects.hashtag_id IS NULL AND hashtags.id = ANY($1));
+ """
+
+ def delete_unreferenced(ids) do
+ with {:ok, %{num_rows: deleted_count}} <- Repo.query(@delete_unreferenced_query, [ids]) do
+ {:ok, deleted_count}
+ end
+ end
+end
diff --git a/lib/pleroma/migrators/hashtags_table_migrator.ex b/lib/pleroma/migrators/hashtags_table_migrator.ex
new file mode 100644
index 000000000..b84058e11
--- /dev/null
+++ b/lib/pleroma/migrators/hashtags_table_migrator.ex
@@ -0,0 +1,208 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Migrators.HashtagsTableMigrator do
+ defmodule State do
+ use Pleroma.Migrators.Support.BaseMigratorState
+
+ @impl Pleroma.Migrators.Support.BaseMigratorState
+ defdelegate data_migration(), to: Pleroma.DataMigration, as: :populate_hashtags_table
+ end
+
+ use Pleroma.Migrators.Support.BaseMigrator
+
+ alias Pleroma.Hashtag
+ alias Pleroma.Migrators.Support.BaseMigrator
+ alias Pleroma.Object
+
+ @impl BaseMigrator
+ def feature_config_path, do: [:features, :improved_hashtag_timeline]
+
+ @impl BaseMigrator
+ def fault_rate_allowance, do: Config.get([:populate_hashtags_table, :fault_rate_allowance], 0)
+
+ @impl BaseMigrator
+ def perform do
+ data_migration_id = data_migration_id()
+ max_processed_id = get_stat(:max_processed_id, 0)
+
+ Logger.info("Transferring embedded hashtags to `hashtags` (from oid: #{max_processed_id})...")
+
+ query()
+ |> where([object], object.id > ^max_processed_id)
+ |> Repo.chunk_stream(100, :batches, timeout: :infinity)
+ |> Stream.each(fn objects ->
+ object_ids = Enum.map(objects, & &1.id)
+
+ results = Enum.map(objects, &transfer_object_hashtags(&1))
+
+ failed_ids =
+ results
+ |> Enum.filter(&(elem(&1, 0) == :error))
+ |> Enum.map(&elem(&1, 1))
+
+ # Count of objects with hashtags: `{:noop, id}` is returned for objects having other AS2 tags
+ chunk_affected_count =
+ results
+ |> Enum.filter(&(elem(&1, 0) == :ok))
+ |> length()
+
+ for failed_id <- failed_ids do
+ _ =
+ Repo.query(
+ "INSERT INTO data_migration_failed_ids(data_migration_id, record_id) " <>
+ "VALUES ($1, $2) ON CONFLICT DO NOTHING;",
+ [data_migration_id, failed_id]
+ )
+ end
+
+ _ =
+ Repo.query(
+ "DELETE FROM data_migration_failed_ids " <>
+ "WHERE data_migration_id = $1 AND record_id = ANY($2)",
+ [data_migration_id, object_ids -- failed_ids]
+ )
+
+ max_object_id = Enum.at(object_ids, -1)
+
+ put_stat(:max_processed_id, max_object_id)
+ increment_stat(:iteration_processed_count, length(object_ids))
+ increment_stat(:processed_count, length(object_ids))
+ increment_stat(:failed_count, length(failed_ids))
+ increment_stat(:affected_count, chunk_affected_count)
+ put_stat(:records_per_second, records_per_second())
+ persist_state()
+
+ # A quick and dirty approach to controlling the load this background migration imposes
+ sleep_interval = Config.get([:populate_hashtags_table, :sleep_interval_ms], 0)
+ Process.sleep(sleep_interval)
+ end)
+ |> Stream.run()
+ end
+
+ @impl BaseMigrator
+ def query do
+ # Note: most objects have Mention-type AS2 tags and no hashtags (but we can't filter them out)
+ # Note: not checking activity type, expecting remove_non_create_objects_hashtags/_ to clean up
+ from(
+ object in Object,
+ where:
+ fragment("(?)->'tag' IS NOT NULL AND (?)->'tag' != '[]'::jsonb", object.data, object.data),
+ select: %{
+ id: object.id,
+ tag: fragment("(?)->'tag'", object.data)
+ }
+ )
+ |> join(:left, [o], hashtags_objects in fragment("SELECT object_id FROM hashtags_objects"),
+ on: hashtags_objects.object_id == o.id
+ )
+ |> where([_o, hashtags_objects], is_nil(hashtags_objects.object_id))
+ end
+
+ @spec transfer_object_hashtags(Map.t()) :: {:noop | :ok | :error, integer()}
+ defp transfer_object_hashtags(object) do
+ embedded_tags = if Map.has_key?(object, :tag), do: object.tag, else: object.data["tag"]
+ hashtags = Object.object_data_hashtags(%{"tag" => embedded_tags})
+
+ if Enum.any?(hashtags) do
+ transfer_object_hashtags(object, hashtags)
+ else
+ {:noop, object.id}
+ end
+ end
+
+ defp transfer_object_hashtags(object, hashtags) do
+ Repo.transaction(fn ->
+ with {:ok, hashtag_records} <- Hashtag.get_or_create_by_names(hashtags) do
+ maps = Enum.map(hashtag_records, &%{hashtag_id: &1.id, object_id: object.id})
+ base_error = "ERROR when inserting hashtags_objects for object with id #{object.id}"
+
+ try do
+ with {rows_count, _} when is_integer(rows_count) <-
+ Repo.insert_all("hashtags_objects", maps, on_conflict: :nothing) do
+ object.id
+ else
+ e ->
+ Logger.error("#{base_error}: #{inspect(e)}")
+ Repo.rollback(object.id)
+ end
+ rescue
+ e ->
+ Logger.error("#{base_error}: #{inspect(e)}")
+ Repo.rollback(object.id)
+ end
+ else
+ e ->
+ error = "ERROR: could not create hashtags for object #{object.id}: #{inspect(e)}"
+ Logger.error(error)
+ Repo.rollback(object.id)
+ end
+ end)
+ end
+
+ @impl BaseMigrator
+ def retry_failed do
+ data_migration_id = data_migration_id()
+
+ failed_objects_query()
+ |> Repo.chunk_stream(100, :one)
+ |> Stream.each(fn object ->
+ with {res, _} when res != :error <- transfer_object_hashtags(object) do
+ _ =
+ Repo.query(
+ "DELETE FROM data_migration_failed_ids " <>
+ "WHERE data_migration_id = $1 AND record_id = $2",
+ [data_migration_id, object.id]
+ )
+ end
+ end)
+ |> Stream.run()
+
+ put_stat(:failed_count, failures_count())
+ persist_state()
+
+ force_continue()
+ end
+
+ defp failed_objects_query do
+ from(o in Object)
+ |> join(:inner, [o], dmf in fragment("SELECT * FROM data_migration_failed_ids"),
+ on: dmf.record_id == o.id
+ )
+ |> where([_o, dmf], dmf.data_migration_id == ^data_migration_id())
+ |> order_by([o], asc: o.id)
+ end
+
+ @doc """
+ Service func to delete `hashtags_objects` for legacy objects not associated with Create activity.
+ Also deletes unreferenced `hashtags` records (might occur after deletion of `hashtags_objects`).
+ """
+ def delete_non_create_activities_hashtags do
+ hashtags_objects_cleanup_query = """
+ DELETE FROM hashtags_objects WHERE object_id IN
+ (SELECT DISTINCT objects.id FROM objects
+ JOIN hashtags_objects ON hashtags_objects.object_id = objects.id LEFT JOIN activities
+ ON COALESCE(activities.data->'object'->>'id', activities.data->>'object') =
+ (objects.data->>'id')
+ AND activities.data->>'type' = 'Create'
+ WHERE activities.id IS NULL);
+ """
+
+ hashtags_cleanup_query = """
+ DELETE FROM hashtags WHERE id IN
+ (SELECT hashtags.id FROM hashtags
+ LEFT OUTER JOIN hashtags_objects
+ ON hashtags_objects.hashtag_id = hashtags.id
+ WHERE hashtags_objects.hashtag_id IS NULL);
+ """
+
+ {:ok, %{num_rows: hashtags_objects_count}} =
+ Repo.query(hashtags_objects_cleanup_query, [], timeout: :infinity)
+
+ {:ok, %{num_rows: hashtags_count}} =
+ Repo.query(hashtags_cleanup_query, [], timeout: :infinity)
+
+ {:ok, hashtags_objects_count, hashtags_count}
+ end
+end
diff --git a/lib/pleroma/migrators/support/base_migrator.ex b/lib/pleroma/migrators/support/base_migrator.ex
new file mode 100644
index 000000000..1f8a5402b
--- /dev/null
+++ b/lib/pleroma/migrators/support/base_migrator.ex
@@ -0,0 +1,210 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Migrators.Support.BaseMigrator do
+ @moduledoc """
+ Base background migrator functionality.
+ """
+
+ @callback perform() :: any()
+ @callback retry_failed() :: any()
+ @callback feature_config_path() :: list(atom())
+ @callback query() :: Ecto.Query.t()
+ @callback fault_rate_allowance() :: integer() | float()
+
+ defmacro __using__(_opts) do
+ quote do
+ use GenServer
+
+ require Logger
+
+ import Ecto.Query
+
+ alias __MODULE__.State
+ alias Pleroma.Config
+ alias Pleroma.Repo
+
+ @behaviour Pleroma.Migrators.Support.BaseMigrator
+
+ defdelegate data_migration(), to: State
+ defdelegate data_migration_id(), to: State
+ defdelegate state(), to: State
+ defdelegate persist_state(), to: State, as: :persist_to_db
+ defdelegate get_stat(key, value \\ nil), to: State, as: :get_data_key
+ defdelegate put_stat(key, value), to: State, as: :put_data_key
+ defdelegate increment_stat(key, increment), to: State, as: :increment_data_key
+
+ @reg_name {:global, __MODULE__}
+
+ def whereis, do: GenServer.whereis(@reg_name)
+
+ def start_link(_) do
+ case whereis() do
+ nil ->
+ GenServer.start_link(__MODULE__, nil, name: @reg_name)
+
+ pid ->
+ {:ok, pid}
+ end
+ end
+
+ @impl true
+ def init(_) do
+ {:ok, nil, {:continue, :init_state}}
+ end
+
+ @impl true
+ def handle_continue(:init_state, _state) do
+ {:ok, _} = State.start_link(nil)
+
+ data_migration = data_migration()
+ manual_migrations = Config.get([:instance, :manual_data_migrations], [])
+
+ cond do
+ Config.get(:env) == :test ->
+ update_status(:noop)
+
+ is_nil(data_migration) ->
+ message = "Data migration does not exist."
+ update_status(:failed, message)
+ Logger.error("#{__MODULE__}: #{message}")
+
+ data_migration.state == :manual or data_migration.name in manual_migrations ->
+ message = "Data migration is in manual execution or manual fix mode."
+ update_status(:manual, message)
+ Logger.warn("#{__MODULE__}: #{message}")
+
+ data_migration.state == :complete ->
+ on_complete(data_migration)
+
+ true ->
+ send(self(), :perform)
+ end
+
+ {:noreply, nil}
+ end
+
+ @impl true
+ def handle_info(:perform, state) do
+ State.reinit()
+
+ update_status(:running)
+ put_stat(:iteration_processed_count, 0)
+ put_stat(:started_at, NaiveDateTime.utc_now())
+
+ perform()
+
+ fault_rate = fault_rate()
+ put_stat(:fault_rate, fault_rate)
+ fault_rate_allowance = fault_rate_allowance()
+
+ cond do
+ fault_rate == 0 ->
+ set_complete()
+
+ is_float(fault_rate) and fault_rate <= fault_rate_allowance ->
+ message = """
+ Done with fault rate of #{fault_rate} which doesn't exceed #{fault_rate_allowance}.
+ Putting data migration to manual fix mode. Try running `#{__MODULE__}.retry_failed/0`.
+ """
+
+ Logger.warn("#{__MODULE__}: #{message}")
+ update_status(:manual, message)
+ on_complete(data_migration())
+
+ true ->
+ message = "Too many failures. Try running `#{__MODULE__}.retry_failed/0`."
+ Logger.error("#{__MODULE__}: #{message}")
+ update_status(:failed, message)
+ end
+
+ persist_state()
+ {:noreply, state}
+ end
+
+ defp on_complete(data_migration) do
+ if data_migration.feature_lock || feature_state() == :disabled do
+ Logger.warn(
+ "#{__MODULE__}: migration complete but feature is locked; consider enabling."
+ )
+
+ :noop
+ else
+ Config.put(feature_config_path(), :enabled)
+ :ok
+ end
+ end
+
+ @doc "Approximate count for current iteration (including processed records count)"
+ def count(force \\ false, timeout \\ :infinity) do
+ stored_count = get_stat(:count)
+
+ if stored_count && !force do
+ stored_count
+ else
+ processed_count = get_stat(:processed_count, 0)
+ max_processed_id = get_stat(:max_processed_id, 0)
+ query = where(query(), [entity], entity.id > ^max_processed_id)
+
+ count = Repo.aggregate(query, :count, :id, timeout: timeout) + processed_count
+ put_stat(:count, count)
+ persist_state()
+
+ count
+ end
+ end
+
+ def failures_count do
+ with {:ok, %{rows: [[count]]}} <-
+ Repo.query(
+ "SELECT COUNT(record_id) FROM data_migration_failed_ids WHERE data_migration_id = $1;",
+ [data_migration_id()]
+ ) do
+ count
+ end
+ end
+
+ def feature_state, do: Config.get(feature_config_path())
+
+ def force_continue do
+ send(whereis(), :perform)
+ end
+
+ def force_restart do
+ :ok = State.reset()
+ force_continue()
+ end
+
+ def set_complete do
+ update_status(:complete)
+ persist_state()
+ on_complete(data_migration())
+ end
+
+ defp update_status(status, message \\ nil) do
+ put_stat(:state, status)
+ put_stat(:message, message)
+ end
+
+ defp fault_rate do
+ with failures_count when is_integer(failures_count) <- failures_count() do
+ failures_count / Enum.max([get_stat(:affected_count, 0), 1])
+ else
+ _ -> :error
+ end
+ end
+
+ defp records_per_second do
+ get_stat(:iteration_processed_count, 0) / Enum.max([running_time(), 1])
+ end
+
+ defp running_time do
+ NaiveDateTime.diff(
+ NaiveDateTime.utc_now(),
+ get_stat(:started_at, NaiveDateTime.utc_now())
+ )
+ end
+ end
+ end
+end
diff --git a/lib/pleroma/migrators/support/base_migrator_state.ex b/lib/pleroma/migrators/support/base_migrator_state.ex
new file mode 100644
index 000000000..b698587f2
--- /dev/null
+++ b/lib/pleroma/migrators/support/base_migrator_state.ex
@@ -0,0 +1,117 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Migrators.Support.BaseMigratorState do
+ @moduledoc """
+ Base background migrator state functionality.
+ """
+
+ @callback data_migration() :: Pleroma.DataMigration.t()
+
+ defmacro __using__(_opts) do
+ quote do
+ use Agent
+
+ alias Pleroma.DataMigration
+
+ @behaviour Pleroma.Migrators.Support.BaseMigratorState
+ @reg_name {:global, __MODULE__}
+
+ def start_link(_) do
+ Agent.start_link(fn -> load_state_from_db() end, name: @reg_name)
+ end
+
+ def data_migration, do: raise("data_migration/0 is not implemented")
+ defoverridable data_migration: 0
+
+ defp load_state_from_db do
+ data_migration = data_migration()
+
+ data =
+ if data_migration do
+ Map.new(data_migration.data, fn {k, v} -> {String.to_atom(k), v} end)
+ else
+ %{}
+ end
+
+ %{
+ data_migration_id: data_migration && data_migration.id,
+ data: data
+ }
+ end
+
+ def persist_to_db do
+ %{data_migration_id: data_migration_id, data: data} = state()
+
+ if data_migration_id do
+ DataMigration.update_one_by_id(data_migration_id, data: data)
+ else
+ {:error, :nil_data_migration_id}
+ end
+ end
+
+ def reset do
+ %{data_migration_id: data_migration_id} = state()
+
+ with false <- is_nil(data_migration_id),
+ :ok <-
+ DataMigration.update_one_by_id(data_migration_id,
+ state: :pending,
+ data: %{}
+ ) do
+ reinit()
+ else
+ true -> {:error, :nil_data_migration_id}
+ e -> e
+ end
+ end
+
+ def reinit do
+ Agent.update(@reg_name, fn _state -> load_state_from_db() end)
+ end
+
+ def state do
+ Agent.get(@reg_name, & &1)
+ end
+
+ def get_data_key(key, default \\ nil) do
+ get_in(state(), [:data, key]) || default
+ end
+
+ def put_data_key(key, value) do
+ _ = persist_non_data_change(key, value)
+
+ Agent.update(@reg_name, fn state ->
+ put_in(state, [:data, key], value)
+ end)
+ end
+
+ def increment_data_key(key, increment \\ 1) do
+ Agent.update(@reg_name, fn state ->
+ initial_value = get_in(state, [:data, key]) || 0
+ updated_value = initial_value + increment
+ put_in(state, [:data, key], updated_value)
+ end)
+ end
+
+ defp persist_non_data_change(:state, value) do
+ with true <- get_data_key(:state) != value,
+ true <- value in Pleroma.DataMigration.State.__valid_values__(),
+ %{data_migration_id: data_migration_id} when not is_nil(data_migration_id) <-
+ state() do
+ DataMigration.update_one_by_id(data_migration_id, state: value)
+ else
+ false -> :ok
+ _ -> {:error, :nil_data_migration_id}
+ end
+ end
+
+ defp persist_non_data_change(_, _) do
+ nil
+ end
+
+ def data_migration_id, do: Map.get(state(), :data_migration_id)
+ end
+ end
+end
diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex
index aaf123840..3ba749d1a 100644
--- a/lib/pleroma/object.ex
+++ b/lib/pleroma/object.ex
@@ -10,6 +10,7 @@ defmodule Pleroma.Object do
alias Pleroma.Activity
alias Pleroma.Config
+ alias Pleroma.Hashtag
alias Pleroma.Object
alias Pleroma.Object.Fetcher
alias Pleroma.ObjectTombstone
@@ -28,6 +29,8 @@ defmodule Pleroma.Object do
schema "objects" do
field(:data, :map)
+ many_to_many(:hashtags, Hashtag, join_through: "hashtags_objects", on_replace: :delete)
+
timestamps()
end
@@ -49,7 +52,8 @@ defmodule Pleroma.Object do
end
def create(data) do
- Object.change(%Object{}, %{data: data})
+ %Object{}
+ |> Object.change(%{data: data})
|> Repo.insert()
end
@@ -58,8 +62,41 @@ defmodule Pleroma.Object do
|> cast(params, [:data])
|> validate_required([:data])
|> unique_constraint(:ap_id, name: :objects_unique_apid_index)
+ # Expecting `maybe_handle_hashtags_change/1` to run last:
+ |> maybe_handle_hashtags_change(struct)
+ end
+
+ # Note: not checking activity type (assuming non-legacy objects are associated with Create act.)
+ defp maybe_handle_hashtags_change(changeset, struct) do
+ with %Ecto.Changeset{valid?: true} <- changeset,
+ data_hashtags_change = get_change(changeset, :data),
+ {_, true} <- {:changed, hashtags_changed?(struct, data_hashtags_change)},
+ {:ok, hashtag_records} <-
+ data_hashtags_change
+ |> object_data_hashtags()
+ |> Hashtag.get_or_create_by_names() do
+ put_assoc(changeset, :hashtags, hashtag_records)
+ else
+ %{valid?: false} ->
+ changeset
+
+ {:changed, false} ->
+ changeset
+
+ {:error, _} ->
+ validate_change(changeset, :data, fn _, _ ->
+ [data: "error referencing hashtags"]
+ end)
+ end
+ end
+
+ defp hashtags_changed?(%Object{} = struct, %{"tag" => _} = data) do
+ Enum.sort(embedded_hashtags(struct)) !=
+ Enum.sort(object_data_hashtags(data))
end
+ defp hashtags_changed?(_, _), do: false
+
def get_by_id(nil), do: nil
def get_by_id(id), do: Repo.get(Object, id)
@@ -187,9 +224,13 @@ defmodule Pleroma.Object do
def swap_object_with_tombstone(object) do
tombstone = make_tombstone(object)
- object
- |> Object.change(%{data: tombstone})
- |> Repo.update()
+ with {:ok, object} <-
+ object
+ |> Object.change(%{data: tombstone})
+ |> Repo.update() do
+ Hashtag.unlink(object)
+ {:ok, object}
+ end
end
def delete(%Object{data: %{"id" => id}} = object) do
@@ -349,4 +390,39 @@ defmodule Pleroma.Object do
def self_replies(object, opts \\ []),
do: replies(object, Keyword.put(opts, :self_only, true))
+
+ def tags(%Object{data: %{"tag" => tags}}) when is_list(tags), do: tags
+
+ def tags(_), do: []
+
+ def hashtags(%Object{} = object) do
+ # Note: always using embedded hashtags regardless whether they are migrated to hashtags table
+ # (embedded hashtags stay in sync anyways, and we avoid extra joins and preload hassle)
+ embedded_hashtags(object)
+ end
+
+ def embedded_hashtags(%Object{data: data}) do
+ object_data_hashtags(data)
+ end
+
+ def embedded_hashtags(_), do: []
+
+ def object_data_hashtags(%{"tag" => tags}) when is_list(tags) do
+ tags
+ |> Enum.filter(fn
+ %{"type" => "Hashtag"} = data -> Map.has_key?(data, "name")
+ plain_text when is_bitstring(plain_text) -> true
+ _ -> false
+ end)
+ |> Enum.map(fn
+ %{"name" => "#" <> hashtag} -> String.downcase(hashtag)
+ %{"name" => hashtag} -> String.downcase(hashtag)
+ hashtag when is_bitstring(hashtag) -> String.downcase(hashtag)
+ end)
+ |> Enum.uniq()
+ # Note: "" elements (plain text) might occur in `data.tag` for incoming objects
+ |> Enum.filter(&(&1 not in [nil, ""]))
+ end
+
+ def object_data_hashtags(_), do: []
end
diff --git a/lib/pleroma/pagination.ex b/lib/pleroma/pagination.ex
index 0d24e1010..33e45a0eb 100644
--- a/lib/pleroma/pagination.ex
+++ b/lib/pleroma/pagination.ex
@@ -93,6 +93,7 @@ defmodule Pleroma.Pagination do
max_id: :string,
offset: :integer,
limit: :integer,
+ skip_extra_order: :boolean,
skip_order: :boolean
}
@@ -114,6 +115,8 @@ defmodule Pleroma.Pagination do
defp restrict(query, :order, %{skip_order: true}, _), do: query
+ defp restrict(%{order_bys: [_ | _]} = query, :order, %{skip_extra_order: true}, _), do: query
+
defp restrict(query, :order, %{min_id: _}, table_binding) do
order_by(
query,
diff --git a/lib/pleroma/repo.ex b/lib/pleroma/repo.ex
index 4556352d0..b8ea06e33 100644
--- a/lib/pleroma/repo.ex
+++ b/lib/pleroma/repo.ex
@@ -8,6 +8,8 @@ defmodule Pleroma.Repo do
adapter: Ecto.Adapters.Postgres,
migration_timestamps: [type: :naive_datetime_usec]
+ use Ecto.Explain
+
import Ecto.Query
require Logger
@@ -63,8 +65,8 @@ defmodule Pleroma.Repo do
iex> Pleroma.Repo.chunk_stream(Pleroma.Activity.Queries.by_actor(ap_id), 500, :batches)
"""
@spec chunk_stream(Ecto.Query.t(), integer(), atom()) :: Enumerable.t()
- def chunk_stream(query, chunk_size, returns_as \\ :one) do
- # We don't actually need start and end funcitons of resource streaming,
+ def chunk_stream(query, chunk_size, returns_as \\ :one, query_options \\ []) do
+ # We don't actually need start and end functions of resource streaming,
# but it seems to be the only way to not fetch records one-by-one and
# have individual records be the elements of the stream, instead of
# lists of records
@@ -76,7 +78,7 @@ defmodule Pleroma.Repo do
|> order_by(asc: :id)
|> where([r], r.id > ^last_id)
|> limit(^chunk_size)
- |> all()
+ |> all(query_options)
|> case do
[] ->
{:halt, last_id}
diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex
index 5b45e2ca1..230faf024 100644
--- a/lib/pleroma/web/activity_pub/activity_pub.ex
+++ b/lib/pleroma/web/activity_pub/activity_pub.ex
@@ -10,6 +10,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
alias Pleroma.Conversation
alias Pleroma.Conversation.Participation
alias Pleroma.Filter
+ alias Pleroma.Hashtag
alias Pleroma.Maps
alias Pleroma.Notification
alias Pleroma.Object
@@ -465,6 +466,23 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
|> Repo.one()
end
+ defp fetch_paginated_optimized(query, opts, pagination) do
+ # Note: tag-filtering funcs may apply "ORDER BY objects.id DESC",
+ # and extra sorting on "activities.id DESC NULLS LAST" would worse the query plan
+ opts = Map.put(opts, :skip_extra_order, true)
+
+ Pagination.fetch_paginated(query, opts, pagination)
+ end
+
+ def fetch_activities(recipients, opts \\ %{}, pagination \\ :keyset) do
+ list_memberships = Pleroma.List.memberships(opts[:user])
+
+ fetch_activities_query(recipients ++ list_memberships, opts)
+ |> fetch_paginated_optimized(opts, pagination)
+ |> Enum.reverse()
+ |> maybe_update_cc(list_memberships, opts[:user])
+ end
+
@spec fetch_public_or_unlisted_activities(map(), Pagination.type()) :: [Activity.t()]
def fetch_public_or_unlisted_activities(opts \\ %{}, pagination \\ :keyset) do
opts = Map.delete(opts, :user)
@@ -472,7 +490,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
[Constants.as_public()]
|> fetch_activities_query(opts)
|> restrict_unlisted(opts)
- |> Pagination.fetch_paginated(opts, pagination)
+ |> fetch_paginated_optimized(opts, pagination)
end
@spec fetch_public_activities(map(), Pagination.type()) :: [Activity.t()]
@@ -693,51 +711,143 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
defp restrict_since(query, _), do: query
- defp restrict_tag_reject(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
- raise "Can't use the child object without preloading!"
+ defp restrict_embedded_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do
+ raise_on_missing_preload()
+ end
+
+ defp restrict_embedded_tag_all(query, %{tag_all: [_ | _] = tag_all}) do
+ from(
+ [_activity, object] in query,
+ where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all)
+ )
+ end
+
+ defp restrict_embedded_tag_all(query, %{tag_all: tag}) when is_binary(tag) do
+ restrict_embedded_tag_any(query, %{tag: tag})
+ end
+
+ defp restrict_embedded_tag_all(query, _), do: query
+
+ defp restrict_embedded_tag_any(_query, %{tag: _tag, skip_preload: true}) do
+ raise_on_missing_preload()
+ end
+
+ defp restrict_embedded_tag_any(query, %{tag: [_ | _] = tag_any}) do
+ from(
+ [_activity, object] in query,
+ where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag_any)
+ )
end
- defp restrict_tag_reject(query, %{tag_reject: [_ | _] = tag_reject}) do
+ defp restrict_embedded_tag_any(query, %{tag: tag}) when is_binary(tag) do
+ restrict_embedded_tag_any(query, %{tag: [tag]})
+ end
+
+ defp restrict_embedded_tag_any(query, _), do: query
+
+ defp restrict_embedded_tag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
+ raise_on_missing_preload()
+ end
+
+ defp restrict_embedded_tag_reject_any(query, %{tag_reject: [_ | _] = tag_reject}) do
from(
[_activity, object] in query,
where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject)
)
end
- defp restrict_tag_reject(query, _), do: query
+ defp restrict_embedded_tag_reject_any(query, %{tag_reject: tag_reject})
+ when is_binary(tag_reject) do
+ restrict_embedded_tag_reject_any(query, %{tag_reject: [tag_reject]})
+ end
- defp restrict_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do
- raise "Can't use the child object without preloading!"
+ defp restrict_embedded_tag_reject_any(query, _), do: query
+
+ defp object_ids_query_for_tags(tags) do
+ from(hto in "hashtags_objects")
+ |> join(:inner, [hto], ht in Pleroma.Hashtag, on: hto.hashtag_id == ht.id)
+ |> where([hto, ht], ht.name in ^tags)
+ |> select([hto], hto.object_id)
+ |> distinct([hto], true)
+ end
+
+ defp restrict_hashtag_all(_query, %{tag_all: _tag, skip_preload: true}) do
+ raise_on_missing_preload()
+ end
+
+ defp restrict_hashtag_all(query, %{tag_all: [single_tag]}) do
+ restrict_hashtag_any(query, %{tag: single_tag})
end
- defp restrict_tag_all(query, %{tag_all: [_ | _] = tag_all}) do
+ defp restrict_hashtag_all(query, %{tag_all: [_ | _] = tags}) do
from(
[_activity, object] in query,
- where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all)
+ where:
+ fragment(
+ """
+ (SELECT array_agg(hashtags.name) FROM hashtags JOIN hashtags_objects
+ ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?)
+ AND hashtags_objects.object_id = ?) @> ?
+ """,
+ ^tags,
+ object.id,
+ ^tags
+ )
)
end
- defp restrict_tag_all(query, _), do: query
+ defp restrict_hashtag_all(query, %{tag_all: tag}) when is_binary(tag) do
+ restrict_hashtag_all(query, %{tag_all: [tag]})
+ end
- defp restrict_tag(_query, %{tag: _tag, skip_preload: true}) do
- raise "Can't use the child object without preloading!"
+ defp restrict_hashtag_all(query, _), do: query
+
+ defp restrict_hashtag_any(_query, %{tag: _tag, skip_preload: true}) do
+ raise_on_missing_preload()
end
- defp restrict_tag(query, %{tag: tag}) when is_list(tag) do
+ defp restrict_hashtag_any(query, %{tag: [_ | _] = tags}) do
+ hashtag_ids =
+ from(ht in Hashtag, where: ht.name in ^tags, select: ht.id)
+ |> Repo.all()
+
+ # Note: NO extra ordering should be done on "activities.id desc nulls last" for optimal plan
from(
[_activity, object] in query,
- where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag)
+ join: hto in "hashtags_objects",
+ on: hto.object_id == object.id,
+ where: hto.hashtag_id in ^hashtag_ids,
+ distinct: [desc: object.id],
+ order_by: [desc: object.id]
)
end
- defp restrict_tag(query, %{tag: tag}) when is_binary(tag) do
+ defp restrict_hashtag_any(query, %{tag: tag}) when is_binary(tag) do
+ restrict_hashtag_any(query, %{tag: [tag]})
+ end
+
+ defp restrict_hashtag_any(query, _), do: query
+
+ defp restrict_hashtag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
+ raise_on_missing_preload()
+ end
+
+ defp restrict_hashtag_reject_any(query, %{tag_reject: [_ | _] = tags_reject}) do
from(
[_activity, object] in query,
- where: fragment("(?)->'tag' \\? (?)", object.data, ^tag)
+ where: object.id not in subquery(object_ids_query_for_tags(tags_reject))
)
end
- defp restrict_tag(query, _), do: query
+ defp restrict_hashtag_reject_any(query, %{tag_reject: tag_reject}) when is_binary(tag_reject) do
+ restrict_hashtag_reject_any(query, %{tag_reject: [tag_reject]})
+ end
+
+ defp restrict_hashtag_reject_any(query, _), do: query
+
+ defp raise_on_missing_preload do
+ raise "Can't use the child object without preloading!"
+ end
defp restrict_recipients(query, [], _user), do: query
@@ -1098,6 +1208,26 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
defp maybe_order(query, _), do: query
+ defp normalize_fetch_activities_query_opts(opts) do
+ Enum.reduce([:tag, :tag_all, :tag_reject], opts, fn key, opts ->
+ case opts[key] do
+ value when is_bitstring(value) ->
+ Map.put(opts, key, Hashtag.normalize_name(value))
+
+ value when is_list(value) ->
+ normalized_value =
+ value
+ |> Enum.map(&Hashtag.normalize_name/1)
+ |> Enum.uniq()
+
+ Map.put(opts, key, normalized_value)
+
+ _ ->
+ opts
+ end
+ end)
+ end
+
defp fetch_activities_query_ap_ids_ops(opts) do
source_user = opts[:muting_user]
ap_id_relationships = if source_user, do: [:mute, :reblog_mute], else: []
@@ -1121,6 +1251,8 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
end
def fetch_activities_query(recipients, opts \\ %{}) do
+ opts = normalize_fetch_activities_query_opts(opts)
+
{restrict_blocked_opts, restrict_muted_opts, restrict_muted_reblogs_opts} =
fetch_activities_query_ap_ids_ops(opts)
@@ -1128,50 +1260,51 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
skip_thread_containment: Config.get([:instance, :skip_thread_containment])
}
- Activity
- |> maybe_preload_objects(opts)
- |> maybe_preload_bookmarks(opts)
- |> maybe_preload_report_notes(opts)
- |> maybe_set_thread_muted_field(opts)
- |> maybe_order(opts)
- |> restrict_recipients(recipients, opts[:user])
- |> restrict_replies(opts)
- |> restrict_tag(opts)
- |> restrict_tag_reject(opts)
- |> restrict_tag_all(opts)
- |> restrict_since(opts)
- |> restrict_local(opts)
- |> restrict_remote(opts)
- |> restrict_actor(opts)
- |> restrict_type(opts)
- |> restrict_state(opts)
- |> restrict_favorited_by(opts)
- |> restrict_blocked(restrict_blocked_opts)
- |> restrict_muted(restrict_muted_opts)
- |> restrict_filtered(opts)
- |> restrict_media(opts)
- |> restrict_visibility(opts)
- |> restrict_thread_visibility(opts, config)
- |> restrict_reblogs(opts)
- |> restrict_pinned(opts)
- |> restrict_muted_reblogs(restrict_muted_reblogs_opts)
- |> restrict_instance(opts)
- |> restrict_announce_object_actor(opts)
- |> restrict_filtered(opts)
- |> Activity.restrict_deactivated_users()
- |> exclude_poll_votes(opts)
- |> exclude_chat_messages(opts)
- |> exclude_invisible_actors(opts)
- |> exclude_visibility(opts)
- end
-
- def fetch_activities(recipients, opts \\ %{}, pagination \\ :keyset) do
- list_memberships = Pleroma.List.memberships(opts[:user])
-
- fetch_activities_query(recipients ++ list_memberships, opts)
- |> Pagination.fetch_paginated(opts, pagination)
- |> Enum.reverse()
- |> maybe_update_cc(list_memberships, opts[:user])
+ query =
+ Activity
+ |> maybe_preload_objects(opts)
+ |> maybe_preload_bookmarks(opts)
+ |> maybe_preload_report_notes(opts)
+ |> maybe_set_thread_muted_field(opts)
+ |> maybe_order(opts)
+ |> restrict_recipients(recipients, opts[:user])
+ |> restrict_replies(opts)
+ |> restrict_since(opts)
+ |> restrict_local(opts)
+ |> restrict_remote(opts)
+ |> restrict_actor(opts)
+ |> restrict_type(opts)
+ |> restrict_state(opts)
+ |> restrict_favorited_by(opts)
+ |> restrict_blocked(restrict_blocked_opts)
+ |> restrict_muted(restrict_muted_opts)
+ |> restrict_filtered(opts)
+ |> restrict_media(opts)
+ |> restrict_visibility(opts)
+ |> restrict_thread_visibility(opts, config)
+ |> restrict_reblogs(opts)
+ |> restrict_pinned(opts)
+ |> restrict_muted_reblogs(restrict_muted_reblogs_opts)
+ |> restrict_instance(opts)
+ |> restrict_announce_object_actor(opts)
+ |> restrict_filtered(opts)
+ |> Activity.restrict_deactivated_users()
+ |> exclude_poll_votes(opts)
+ |> exclude_chat_messages(opts)
+ |> exclude_invisible_actors(opts)
+ |> exclude_visibility(opts)
+
+ if Config.feature_enabled?(:improved_hashtag_timeline) do
+ query
+ |> restrict_hashtag_any(opts)
+ |> restrict_hashtag_all(opts)
+ |> restrict_hashtag_reject_any(opts)
+ else
+ query
+ |> restrict_embedded_tag_any(opts)
+ |> restrict_embedded_tag_all(opts)
+ |> restrict_embedded_tag_reject_any(opts)
+ end
end
@doc """
diff --git a/lib/pleroma/web/activity_pub/mrf/simple_policy.ex b/lib/pleroma/web/activity_pub/mrf/simple_policy.ex
index bb3838d2c..0b1be8c51 100644
--- a/lib/pleroma/web/activity_pub/mrf/simple_policy.ex
+++ b/lib/pleroma/web/activity_pub/mrf/simple_policy.ex
@@ -74,9 +74,11 @@ defmodule Pleroma.Web.ActivityPub.MRF.SimplePolicy do
object =
if MRF.subdomain_match?(media_nsfw, actor_host) do
- tags = (child_object["tag"] || []) ++ ["nsfw"]
- child_object = Map.put(child_object, "tag", tags)
- child_object = Map.put(child_object, "sensitive", true)
+ child_object =
+ child_object
+ |> Map.put("tag", (child_object["tag"] || []) ++ ["nsfw"])
+ |> Map.put("sensitive", true)
+
Map.put(object, "object", child_object)
else
object
diff --git a/lib/pleroma/web/activity_pub/transmogrifier.ex b/lib/pleroma/web/activity_pub/transmogrifier.ex
index 4d9a5617e..0a701334f 100644
--- a/lib/pleroma/web/activity_pub/transmogrifier.ex
+++ b/lib/pleroma/web/activity_pub/transmogrifier.ex
@@ -32,18 +32,18 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier do
"""
def fix_object(object, options \\ []) do
object
- |> strip_internal_fields
- |> fix_actor
- |> fix_url
- |> fix_attachments
- |> fix_context
+ |> strip_internal_fields()
+ |> fix_actor()
+ |> fix_url()
+ |> fix_attachments()
+ |> fix_context()
|> fix_in_reply_to(options)
- |> fix_emoji
- |> fix_tag
- |> set_sensitive
- |> fix_content_map
- |> fix_addressing
- |> fix_summary
+ |> fix_emoji()
+ |> fix_tag()
+ |> set_sensitive()
+ |> fix_content_map()
+ |> fix_addressing()
+ |> fix_summary()
|> fix_type(options)
end
@@ -315,10 +315,9 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier do
tags =
tag
|> Enum.filter(fn data -> data["type"] == "Hashtag" and data["name"] end)
- |> Enum.map(fn %{"name" => name} ->
- name
- |> String.slice(1..-1)
- |> String.downcase()
+ |> Enum.map(fn
+ %{"name" => "#" <> hashtag} -> String.downcase(hashtag)
+ %{"name" => hashtag} -> String.downcase(hashtag)
end)
Map.put(object, "tag", tag ++ tags)
diff --git a/lib/pleroma/web/feed/feed_view.ex b/lib/pleroma/web/feed/feed_view.ex
index df97d2f46..66940f311 100644
--- a/lib/pleroma/web/feed/feed_view.ex
+++ b/lib/pleroma/web/feed/feed_view.ex
@@ -32,6 +32,7 @@ defmodule Pleroma.Web.Feed.FeedView do
%{
activity: activity,
+ object: object,
data: Map.get(object, :data),
actor: actor
}
diff --git a/lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex b/lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex
index cef299aa4..c611958be 100644
--- a/lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex
+++ b/lib/pleroma/web/mastodon_api/controllers/timeline_controller.ex
@@ -133,34 +133,25 @@ defmodule Pleroma.Web.MastodonAPI.TimelineController do
end
defp hashtag_fetching(params, user, local_only) do
- tags =
+ # Note: not sanitizing tag options at this stage (may be mix-cased, have duplicates etc.)
+ tags_any =
[params[:tag], params[:any]]
|> List.flatten()
- |> Enum.uniq()
- |> Enum.reject(&is_nil/1)
- |> Enum.map(&String.downcase/1)
-
- tag_all =
- params
- |> Map.get(:all, [])
- |> Enum.map(&String.downcase/1)
-
- tag_reject =
- params
- |> Map.get(:none, [])
- |> Enum.map(&String.downcase/1)
-
- _activities =
- params
- |> Map.put(:type, "Create")
- |> Map.put(:local_only, local_only)
- |> Map.put(:blocking_user, user)
- |> Map.put(:muting_user, user)
- |> Map.put(:user, user)
- |> Map.put(:tag, tags)
- |> Map.put(:tag_all, tag_all)
- |> Map.put(:tag_reject, tag_reject)
- |> ActivityPub.fetch_public_activities()
+ |> Enum.filter(& &1)
+
+ tag_all = Map.get(params, :all, [])
+ tag_reject = Map.get(params, :none, [])
+
+ params
+ |> Map.put(:type, "Create")
+ |> Map.put(:local_only, local_only)
+ |> Map.put(:blocking_user, user)
+ |> Map.put(:muting_user, user)
+ |> Map.put(:user, user)
+ |> Map.put(:tag, tags_any)
+ |> Map.put(:tag_all, tag_all)
+ |> Map.put(:tag_reject, tag_reject)
+ |> ActivityPub.fetch_public_activities()
end
# GET /api/v1/timelines/tag/:tag
diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex
index f3f54e03d..d30c9fa68 100644
--- a/lib/pleroma/web/mastodon_api/views/status_view.ex
+++ b/lib/pleroma/web/mastodon_api/views/status_view.ex
@@ -198,8 +198,10 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do
like_count = object.data["like_count"] || 0
announcement_count = object.data["announcement_count"] || 0
- tags = object.data["tag"] || []
- sensitive = object.data["sensitive"] || Enum.member?(tags, "nsfw")
+ hashtags = Object.hashtags(object)
+ sensitive = object.data["sensitive"] || Enum.member?(hashtags, "nsfw")
+
+ tags = Object.tags(object)
tag_mentions =
tags
diff --git a/lib/pleroma/web/templates/feed/feed/_activity.atom.eex b/lib/pleroma/web/templates/feed/feed/_activity.atom.eex
index 3fd150c4e..6688830ba 100644
--- a/lib/pleroma/web/templates/feed/feed/_activity.atom.eex
+++ b/lib/pleroma/web/templates/feed/feed/_activity.atom.eex
@@ -22,7 +22,7 @@
<link type="text/html" href='<%= @data["external_url"] %>' rel="alternate"/>
<% end %>
- <%= for tag <- @data["tag"] || [] do %>
+ <%= for tag <- Pleroma.Object.hashtags(@object) do %>
<category term="<%= tag %>"></category>
<% end %>
diff --git a/lib/pleroma/web/templates/feed/feed/_activity.rss.eex b/lib/pleroma/web/templates/feed/feed/_activity.rss.eex
index 947bbb099..592b9dcdc 100644
--- a/lib/pleroma/web/templates/feed/feed/_activity.rss.eex
+++ b/lib/pleroma/web/templates/feed/feed/_activity.rss.eex
@@ -22,7 +22,7 @@
<link rel="ostatus:conversation"><%= activity_context(@activity) %></link>
- <%= for tag <- @data["tag"] || [] do %>
+ <%= for tag <- Pleroma.Object.hashtags(@object) do %>
<category term="<%= tag %>"></category>
<% end %>
diff --git a/lib/pleroma/web/templates/feed/feed/_tag_activity.atom.eex b/lib/pleroma/web/templates/feed/feed/_tag_activity.atom.eex
index cf5874a91..c2de28fe4 100644
--- a/lib/pleroma/web/templates/feed/feed/_tag_activity.atom.eex
+++ b/lib/pleroma/web/templates/feed/feed/_tag_activity.atom.eex
@@ -41,7 +41,7 @@
<% end %>
<% end %>
- <%= for tag <- @data["tag"] || [] do %>
+ <%= for tag <- Pleroma.Object.hashtags(@object) do %>
<category term="<%= tag %>"></category>
<% end %>
diff --git a/mix.exs b/mix.exs
index ec6e92df7..87ee01073 100644
--- a/mix.exs
+++ b/mix.exs
@@ -121,6 +121,7 @@ defmodule Pleroma.Mixfile do
{:phoenix_pubsub, "~> 2.0"},
{:phoenix_ecto, "~> 4.0"},
{:ecto_enum, "~> 1.4"},
+ {:ecto_explain, "~> 0.1.2"},
{:ecto_sql, "~> 3.4.4"},
{:postgrex, ">= 0.15.5"},
{:oban, "~> 2.3.4"},
diff --git a/mix.lock b/mix.lock
index 99be81826..6d0635360 100644
--- a/mix.lock
+++ b/mix.lock
@@ -31,6 +31,7 @@
"earmark_parser": {:hex, :earmark_parser, "1.4.10", "6603d7a603b9c18d3d20db69921527f82ef09990885ed7525003c7fe7dc86c56", [:mix], [], "hexpm", "8e2d5370b732385db2c9b22215c3f59c84ac7dda7ed7e544d7c459496ae519c0"},
"ecto": {:hex, :ecto, "3.4.6", "08f7afad3257d6eb8613309af31037e16c36808dfda5a3cd0cb4e9738db030e4", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "6f13a9e2a62e75c2dcfc7207bfc65645ab387af8360db4c89fee8b5a4bf3f70b"},
"ecto_enum": {:hex, :ecto_enum, "1.4.0", "d14b00e04b974afc69c251632d1e49594d899067ee2b376277efd8233027aec8", [:mix], [{:ecto, ">= 3.0.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:ecto_sql, "> 3.0.0", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:mariaex, ">= 0.0.0", [hex: :mariaex, repo: "hexpm", optional: true]}, {:postgrex, ">= 0.0.0", [hex: :postgrex, repo: "hexpm", optional: true]}], "hexpm", "8fb55c087181c2b15eee406519dc22578fa60dd82c088be376d0010172764ee4"},
+ "ecto_explain": {:hex, :ecto_explain, "0.1.2", "a9d504cbd4adc809911f796d5ef7ebb17a576a6d32286c3d464c015bd39d5541", [:mix], [], "hexpm", "1d0e7798ae30ecf4ce34e912e5354a0c1c832b7ebceba39298270b9a9f316330"},
"ecto_sql": {:hex, :ecto_sql, "3.4.5", "30161f81b167d561a9a2df4329c10ae05ff36eca7ccc84628f2c8b9fa1e43323", [:mix], [{:db_connection, "~> 2.2", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.4.3", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.3.0 or ~> 0.4.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.15.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.0", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "31990c6a3579b36a3c0841d34a94c275e727de8b84f58509da5f1b2032c98ac2"},
"eimp": {:hex, :eimp, "1.0.14", "fc297f0c7e2700457a95a60c7010a5f1dcb768a083b6d53f49cd94ab95a28f22", [:rebar3], [{:p1_utils, "1.0.18", [hex: :p1_utils, repo: "hexpm", optional: false]}], "hexpm", "501133f3112079b92d9e22da8b88bf4f0e13d4d67ae9c15c42c30bd25ceb83b6"},
"elixir_make": {:hex, :elixir_make, "0.6.2", "7dffacd77dec4c37b39af867cedaabb0b59f6a871f89722c25b28fcd4bd70530", [:mix], [], "hexpm", "03e49eadda22526a7e5279d53321d1cced6552f344ba4e03e619063de75348d9"},
diff --git a/priv/repo/migrations/20190711042021_create_safe_jsonb_set.exs b/priv/repo/migrations/20190711042021_create_safe_jsonb_set.exs
index 43d616705..bfac09f9e 100644
--- a/priv/repo/migrations/20190711042021_create_safe_jsonb_set.exs
+++ b/priv/repo/migrations/20190711042021_create_safe_jsonb_set.exs
@@ -9,7 +9,7 @@ defmodule Pleroma.Repo.Migrations.CreateSafeJsonbSet do
begin
result := jsonb_set(target, path, coalesce(new_value, 'null'::jsonb), create_missing);
if result is NULL then
- raise 'jsonb_set tried to wipe the object, please report this incindent to Pleroma bug tracker. https://git.pleroma.social/pleroma/pleroma/issues/new';
+ raise 'jsonb_set tried to wipe the object, please report this incident to Pleroma bug tracker. https://git.pleroma.social/pleroma/pleroma/issues/new';
return target;
else
return result;
diff --git a/priv/repo/migrations/20201221202251_create_hashtags.exs b/priv/repo/migrations/20201221202251_create_hashtags.exs
new file mode 100644
index 000000000..8d2e9ae66
--- /dev/null
+++ b/priv/repo/migrations/20201221202251_create_hashtags.exs
@@ -0,0 +1,13 @@
+defmodule Pleroma.Repo.Migrations.CreateHashtags do
+ use Ecto.Migration
+
+ def change do
+ create_if_not_exists table(:hashtags) do
+ add(:name, :citext, null: false)
+
+ timestamps()
+ end
+
+ create_if_not_exists(unique_index(:hashtags, [:name]))
+ end
+end
diff --git a/priv/repo/migrations/20201221202252_remove_data_from_hashtags.exs b/priv/repo/migrations/20201221202252_remove_data_from_hashtags.exs
new file mode 100644
index 000000000..0442c3b87
--- /dev/null
+++ b/priv/repo/migrations/20201221202252_remove_data_from_hashtags.exs
@@ -0,0 +1,15 @@
+defmodule Pleroma.Repo.Migrations.RemoveDataFromHashtags do
+ use Ecto.Migration
+
+ def up do
+ alter table(:hashtags) do
+ remove_if_exists(:data, :map)
+ end
+ end
+
+ def down do
+ alter table(:hashtags) do
+ add_if_not_exists(:data, :map, default: %{})
+ end
+ end
+end
diff --git a/priv/repo/migrations/20201221203824_create_hashtags_objects.exs b/priv/repo/migrations/20201221203824_create_hashtags_objects.exs
new file mode 100644
index 000000000..581f32b3c
--- /dev/null
+++ b/priv/repo/migrations/20201221203824_create_hashtags_objects.exs
@@ -0,0 +1,13 @@
+defmodule Pleroma.Repo.Migrations.CreateHashtagsObjects do
+ use Ecto.Migration
+
+ def change do
+ create_if_not_exists table(:hashtags_objects, primary_key: false) do
+ add(:hashtag_id, references(:hashtags), null: false, primary_key: true)
+ add(:object_id, references(:objects), null: false, primary_key: true)
+ end
+
+ # Note: PK index: "hashtags_objects_pkey" PRIMARY KEY, btree (hashtag_id, object_id)
+ create_if_not_exists(index(:hashtags_objects, [:object_id]))
+ end
+end
diff --git a/priv/repo/migrations/20210105195018_create_data_migrations.exs b/priv/repo/migrations/20210105195018_create_data_migrations.exs
new file mode 100644
index 000000000..5f2e8d96c
--- /dev/null
+++ b/priv/repo/migrations/20210105195018_create_data_migrations.exs
@@ -0,0 +1,17 @@
+defmodule Pleroma.Repo.Migrations.CreateDataMigrations do
+ use Ecto.Migration
+
+ def change do
+ create_if_not_exists table(:data_migrations) do
+ add(:name, :string, null: false)
+ add(:state, :integer, default: 1)
+ add(:feature_lock, :boolean, default: false)
+ add(:params, :map, default: %{})
+ add(:data, :map, default: %{})
+
+ timestamps()
+ end
+
+ create_if_not_exists(unique_index(:data_migrations, [:name]))
+ end
+end
diff --git a/priv/repo/migrations/20210106183301_data_migration_create_populate_hashtags_table.exs b/priv/repo/migrations/20210106183301_data_migration_create_populate_hashtags_table.exs
new file mode 100644
index 000000000..cf3cf26a0
--- /dev/null
+++ b/priv/repo/migrations/20210106183301_data_migration_create_populate_hashtags_table.exs
@@ -0,0 +1,16 @@
+defmodule Pleroma.Repo.Migrations.DataMigrationCreatePopulateHashtagsTable do
+ use Ecto.Migration
+
+ def up do
+ dt = NaiveDateTime.utc_now()
+
+ execute(
+ "INSERT INTO data_migrations(name, inserted_at, updated_at) " <>
+ "VALUES ('populate_hashtags_table', '#{dt}', '#{dt}') ON CONFLICT DO NOTHING;"
+ )
+ end
+
+ def down do
+ execute("DELETE FROM data_migrations WHERE name = 'populate_hashtags_table';")
+ end
+end
diff --git a/priv/repo/migrations/20210111172254_create_data_migration_failed_ids.exs b/priv/repo/migrations/20210111172254_create_data_migration_failed_ids.exs
new file mode 100644
index 000000000..18afa74ac
--- /dev/null
+++ b/priv/repo/migrations/20210111172254_create_data_migration_failed_ids.exs
@@ -0,0 +1,14 @@
+defmodule Pleroma.Repo.Migrations.CreateDataMigrationFailedIds do
+ use Ecto.Migration
+
+ def change do
+ create_if_not_exists table(:data_migration_failed_ids, primary_key: false) do
+ add(:data_migration_id, references(:data_migrations), null: false, primary_key: true)
+ add(:record_id, :bigint, null: false, primary_key: true)
+ end
+
+ create_if_not_exists(
+ unique_index(:data_migration_failed_ids, [:data_migration_id, :record_id])
+ )
+ end
+end
diff --git a/priv/repo/migrations/20210222183840_remove_hashtags_objects_duplicate_index.exs b/priv/repo/migrations/20210222183840_remove_hashtags_objects_duplicate_index.exs
new file mode 100644
index 000000000..6c4a2dfdc
--- /dev/null
+++ b/priv/repo/migrations/20210222183840_remove_hashtags_objects_duplicate_index.exs
@@ -0,0 +1,11 @@
+defmodule Pleroma.Repo.Migrations.RemoveHashtagsObjectsDuplicateIndex do
+ use Ecto.Migration
+
+ @moduledoc "Removes `hashtags_objects_hashtag_id_object_id_index` index (duplicate of PK index)."
+
+ def up do
+ drop_if_exists(unique_index(:hashtags_objects, [:hashtag_id, :object_id]))
+ end
+
+ def down, do: nil
+end
diff --git a/priv/repo/migrations/20210222184616_change_hashtags_name_to_text.exs b/priv/repo/migrations/20210222184616_change_hashtags_name_to_text.exs
new file mode 100644
index 000000000..8940b6ca3
--- /dev/null
+++ b/priv/repo/migrations/20210222184616_change_hashtags_name_to_text.exs
@@ -0,0 +1,15 @@
+defmodule Pleroma.Repo.Migrations.ChangeHashtagsNameToText do
+ use Ecto.Migration
+
+ def up do
+ alter table(:hashtags) do
+ modify(:name, :text)
+ end
+ end
+
+ def down do
+ alter table(:hashtags) do
+ modify(:name, :citext)
+ end
+ end
+end
diff --git a/test/pleroma/activity/ir/topics_test.exs b/test/pleroma/activity/ir/topics_test.exs
index 6b848e04d..9c8e5d932 100644
--- a/test/pleroma/activity/ir/topics_test.exs
+++ b/test/pleroma/activity/ir/topics_test.exs
@@ -11,6 +11,8 @@ defmodule Pleroma.Activity.Ir.TopicsTest do
require Pleroma.Constants
+ import Mock
+
describe "poll answer" do
test "produce no topics" do
activity = %Activity{object: %Object{data: %{"type" => "Answer"}}}
@@ -77,14 +79,13 @@ defmodule Pleroma.Activity.Ir.TopicsTest do
refute Enum.member?(topics, "public:local:media")
end
- test "converts tags to hash tags", %{activity: %{object: %{data: data} = object} = activity} do
- tagged_data = Map.put(data, "tag", ["foo", "bar"])
- activity = %{activity | object: %{object | data: tagged_data}}
-
- topics = Topics.get_activity_topics(activity)
+ test "converts tags to hash tags", %{activity: activity} do
+ with_mock(Object, [:passthrough], hashtags: fn _ -> ["foo", "bar"] end) do
+ topics = Topics.get_activity_topics(activity)
- assert Enum.member?(topics, "hashtag:foo")
- assert Enum.member?(topics, "hashtag:bar")
+ assert Enum.member?(topics, "hashtag:foo")
+ assert Enum.member?(topics, "hashtag:bar")
+ end
end
test "only converts strings to hash tags", %{
diff --git a/test/pleroma/hashtag_test.exs b/test/pleroma/hashtag_test.exs
new file mode 100644
index 000000000..0264dea0b
--- /dev/null
+++ b/test/pleroma/hashtag_test.exs
@@ -0,0 +1,17 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.HashtagTest do
+ use Pleroma.DataCase
+
+ alias Pleroma.Hashtag
+
+ describe "changeset validations" do
+ test "ensure non-blank :name" do
+ changeset = Hashtag.changeset(%Hashtag{}, %{name: ""})
+
+ assert {:name, {"can't be blank", [validation: :required]}} in changeset.errors
+ end
+ end
+end
diff --git a/test/pleroma/object_test.exs b/test/pleroma/object_test.exs
index db7678d5d..8320660a5 100644
--- a/test/pleroma/object_test.exs
+++ b/test/pleroma/object_test.exs
@@ -5,10 +5,13 @@
defmodule Pleroma.ObjectTest do
use Pleroma.DataCase
use Oban.Testing, repo: Pleroma.Repo
+
import ExUnit.CaptureLog
import Pleroma.Factory
import Tesla.Mock
+
alias Pleroma.Activity
+ alias Pleroma.Hashtag
alias Pleroma.Object
alias Pleroma.Repo
alias Pleroma.Tests.ObanHelpers
@@ -417,4 +420,28 @@ defmodule Pleroma.ObjectTest do
assert updated_object.data["like_count"] == 1
end
end
+
+ describe ":hashtags association" do
+ test "Hashtag records are created with Object record and updated on its change" do
+ user = insert(:user)
+
+ {:ok, %{object: object}} =
+ CommonAPI.post(user, %{status: "some text #hashtag1 #hashtag2 ..."})
+
+ assert [%Hashtag{name: "hashtag1"}, %Hashtag{name: "hashtag2"}] =
+ Enum.sort_by(object.hashtags, & &1.name)
+
+ {:ok, object} = Object.update_data(object, %{"tag" => []})
+
+ assert [] = object.hashtags
+
+ object = Object.get_by_id(object.id) |> Repo.preload(:hashtags)
+ assert [] = object.hashtags
+
+ {:ok, object} = Object.update_data(object, %{"tag" => ["abc", "def"]})
+
+ assert [%Hashtag{name: "abc"}, %Hashtag{name: "def"}] =
+ Enum.sort_by(object.hashtags, & &1.name)
+ end
+ end
end
diff --git a/test/pleroma/web/activity_pub/activity_pub_test.exs b/test/pleroma/web/activity_pub/activity_pub_test.exs
index f4023856c..1e1e74074 100644
--- a/test/pleroma/web/activity_pub/activity_pub_test.exs
+++ b/test/pleroma/web/activity_pub/activity_pub_test.exs
@@ -213,32 +213,64 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubTest do
test "it fetches the appropriate tag-restricted posts" do
user = insert(:user)
- {:ok, status_one} = CommonAPI.post(user, %{status: ". #test"})
+ {:ok, status_one} = CommonAPI.post(user, %{status: ". #TEST"})
{:ok, status_two} = CommonAPI.post(user, %{status: ". #essais"})
- {:ok, status_three} = CommonAPI.post(user, %{status: ". #test #reject"})
+ {:ok, status_three} = CommonAPI.post(user, %{status: ". #test #Reject"})
- fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"})
+ {:ok, status_four} = CommonAPI.post(user, %{status: ". #Any1 #any2"})
+ {:ok, status_five} = CommonAPI.post(user, %{status: ". #Any2 #any1"})
- fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["test", "essais"]})
+ for hashtag_timeline_strategy <- [:enabled, :disabled] do
+ clear_config([:features, :improved_hashtag_timeline], hashtag_timeline_strategy)
- fetch_three =
- ActivityPub.fetch_activities([], %{
- type: "Create",
- tag: ["test", "essais"],
- tag_reject: ["reject"]
- })
+ fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"})
- fetch_four =
- ActivityPub.fetch_activities([], %{
- type: "Create",
- tag: ["test"],
- tag_all: ["test", "reject"]
- })
+ fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["TEST", "essais"]})
+
+ fetch_three =
+ ActivityPub.fetch_activities([], %{
+ type: "Create",
+ tag: ["test", "Essais"],
+ tag_reject: ["reject"]
+ })
+
+ fetch_four =
+ ActivityPub.fetch_activities([], %{
+ type: "Create",
+ tag: ["test"],
+ tag_all: ["test", "REJECT"]
+ })
- assert fetch_one == [status_one, status_three]
- assert fetch_two == [status_one, status_two, status_three]
- assert fetch_three == [status_one, status_two]
- assert fetch_four == [status_three]
+ # Testing that deduplication (if needed) is done on DB (not Ecto) level; :limit is important
+ fetch_five =
+ ActivityPub.fetch_activities([], %{
+ type: "Create",
+ tag: ["ANY1", "any2"],
+ limit: 2
+ })
+
+ fetch_six =
+ ActivityPub.fetch_activities([], %{
+ type: "Create",
+ tag: ["any1", "Any2"],
+ tag_all: [],
+ tag_reject: []
+ })
+
+ # Regression test: passing empty lists as filter options shouldn't affect the results
+ assert fetch_five == fetch_six
+
+ [fetch_one, fetch_two, fetch_three, fetch_four, fetch_five] =
+ Enum.map([fetch_one, fetch_two, fetch_three, fetch_four, fetch_five], fn statuses ->
+ Enum.map(statuses, fn s -> Repo.preload(s, object: :hashtags) end)
+ end)
+
+ assert fetch_one == [status_one, status_three]
+ assert fetch_two == [status_one, status_two, status_three]
+ assert fetch_three == [status_one, status_two]
+ assert fetch_four == [status_three]
+ assert fetch_five == [status_four, status_five]
+ end
end
describe "insertion" do
diff --git a/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs b/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs
index 31586abc9..deb956410 100644
--- a/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs
+++ b/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs
@@ -39,7 +39,8 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier.NoteHandlingTest do
{:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data)
object = Object.normalize(data["object"], fetch: false)
- assert "test" in object.data["tag"]
+ assert "test" in Object.tags(object)
+ assert Object.hashtags(object) == ["test"]
end
test "it cleans up incoming notices which are not really DMs" do
@@ -220,7 +221,8 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier.NoteHandlingTest do
{:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data)
object = Object.normalize(data["object"], fetch: false)
- assert Enum.at(object.data["tag"], 2) == "moo"
+ assert Enum.at(Object.tags(object), 2) == "moo"
+ assert Object.hashtags(object) == ["moo"]
end
test "it works for incoming notices with contentMap" do
diff --git a/test/pleroma/web/common_api_test.exs b/test/pleroma/web/common_api_test.exs
index adfe58def..9d005697c 100644
--- a/test/pleroma/web/common_api_test.exs
+++ b/test/pleroma/web/common_api_test.exs
@@ -493,7 +493,7 @@ defmodule Pleroma.Web.CommonAPITest do
object = Object.normalize(activity, fetch: false)
- assert object.data["tag"] == ["2hu"]
+ assert Object.tags(object) == ["2hu"]
end
test "it adds emoji in the object" do
diff --git a/test/pleroma/web/mastodon_api/views/status_view_test.exs b/test/pleroma/web/mastodon_api/views/status_view_test.exs
index 2de3afc4f..4172cc294 100644
--- a/test/pleroma/web/mastodon_api/views/status_view_test.exs
+++ b/test/pleroma/web/mastodon_api/views/status_view_test.exs
@@ -262,8 +262,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusViewTest do
mentions: [],
tags: [
%{
- name: "#{object_data["tag"]}",
- url: "http://localhost:4001/tag/#{object_data["tag"]}"
+ name: "#{hd(object_data["tag"])}",
+ url: "http://localhost:4001/tag/#{hd(object_data["tag"])}"
}
],
application: nil,