Fix unnecessary queries when batch-removing statuses, 100x faster (#15387)
This commit is contained in:
		
					parent
					
						
							
								67ebd61f11
							
						
					
				
			
			
				commit
				
					
						9915d11c0d
					
				
			
		
					 7 changed files with 168 additions and 100 deletions
				
			
		|  | @ -36,7 +36,7 @@ class Favourite < ApplicationRecord | |||
|   end | ||||
| 
 | ||||
|   def decrement_cache_counters | ||||
|     return if association(:status).loaded? && (status.marked_for_destruction? || status.marked_for_mass_destruction?) | ||||
|     return if association(:status).loaded? && status.marked_for_destruction? | ||||
|     status&.decrement_count!(:favourites_count) | ||||
|   end | ||||
| end | ||||
|  |  | |||
|  | @ -228,14 +228,6 @@ class Status < ApplicationRecord | |||
|     @emojis = CustomEmoji.from_text(fields.join(' '), account.domain) | ||||
|   end | ||||
| 
 | ||||
|   def mark_for_mass_destruction! | ||||
|     @marked_for_mass_destruction = true | ||||
|   end | ||||
| 
 | ||||
|   def marked_for_mass_destruction? | ||||
|     @marked_for_mass_destruction | ||||
|   end | ||||
| 
 | ||||
|   def replies_count | ||||
|     status_stat&.replies_count || 0 | ||||
|   end | ||||
|  | @ -430,7 +422,7 @@ class Status < ApplicationRecord | |||
|   end | ||||
| 
 | ||||
|   def decrement_counter_caches | ||||
|     return if direct_visibility? || marked_for_mass_destruction? | ||||
|     return if direct_visibility? | ||||
| 
 | ||||
|     account&.decrement_count!(:statuses_count) | ||||
|     reblog&.decrement_count!(:reblogs_count) if reblog? | ||||
|  | @ -440,7 +432,7 @@ class Status < ApplicationRecord | |||
|   def unlink_from_conversations | ||||
|     return unless direct_visibility? | ||||
| 
 | ||||
|     mentioned_accounts = mentions.includes(:account).map(&:account) | ||||
|     mentioned_accounts = (association(:mentions).loaded? ? mentions : mentions.includes(:account)).map(&:account) | ||||
|     inbox_owners       = mentioned_accounts.select(&:local?) + (account.local? ? [account] : []) | ||||
| 
 | ||||
|     inbox_owners.each do |inbox_owner| | ||||
|  |  | |||
|  | @ -3,29 +3,45 @@ | |||
| class BatchedRemoveStatusService < BaseService | ||||
|   include Redisable | ||||
| 
 | ||||
|   # Delete given statuses and reblogs of them | ||||
|   # Remove statuses from home feeds | ||||
|   # Push delete events to streaming API for home feeds and public feeds | ||||
|   # @param [Enumerable<Status>] statuses A preferably batched array of statuses | ||||
|   # Delete multiple statuses and reblogs of them as efficiently as possible | ||||
|   # @param [Enumerable<Status>] statuses An array of statuses | ||||
|   # @param [Hash] options | ||||
|   # @option [Boolean] :skip_side_effects | ||||
|   # @option [Boolean] :skip_side_effects Do not modify feeds and send updates to streaming API | ||||
|   def call(statuses, **options) | ||||
|     statuses = Status.where(id: statuses.map(&:id)).includes(:account).flat_map { |status| [status] + status.reblogs.includes(:account).to_a } | ||||
|     ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, reblogs: :account]) | ||||
| 
 | ||||
|     @mentions = statuses.each_with_object({}) { |s, h| h[s.id] = s.active_mentions.includes(:account).to_a } | ||||
|     @tags     = statuses.each_with_object({}) { |s, h| h[s.id] = s.tags.pluck(:name) } | ||||
|     statuses_and_reblogs = statuses.flat_map { |status| [status] + status.reblogs } | ||||
| 
 | ||||
|     @json_payloads = statuses.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) } | ||||
|     # The conversations for direct visibility statuses also need | ||||
|     # to be manually updated. This part is not efficient but we | ||||
|     # rely on direct visibility statuses being relatively rare. | ||||
|     statuses_with_account_conversations = statuses.select(&:direct_visibility?) | ||||
| 
 | ||||
|     statuses.each do |status| | ||||
|       status.mark_for_mass_destruction! | ||||
|       status.destroy | ||||
|     ActiveRecord::Associations::Preloader.new.preload(statuses_with_account_conversations, [mentions: :account]) | ||||
| 
 | ||||
|     statuses_with_account_conversations.each do |status| | ||||
|       status.send(:unlink_from_conversations) | ||||
|     end | ||||
| 
 | ||||
|     # We do not batch all deletes into one to avoid having a long-running | ||||
|     # transaction lock the database, but we use the delete method instead | ||||
|     # of destroy to avoid all callbacks. We rely on foreign keys to | ||||
|     # cascade the delete faster without loading the associations. | ||||
|     statuses_and_reblogs.each(&:delete) | ||||
| 
 | ||||
|     # Since we skipped all callbacks, we also need to manually | ||||
|     # deindex the statuses | ||||
|     Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) | ||||
| 
 | ||||
|     return if options[:skip_side_effects] | ||||
| 
 | ||||
|     ActiveRecord::Associations::Preloader.new.preload(statuses_and_reblogs, :tags) | ||||
| 
 | ||||
|     @tags          = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = s.tags.map { |tag| tag.name.mb_chars.downcase } } | ||||
|     @json_payloads = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) } | ||||
| 
 | ||||
|     # Batch by source account | ||||
|     statuses.group_by(&:account_id).each_value do |account_statuses| | ||||
|     statuses_and_reblogs.group_by(&:account_id).each_value do |account_statuses| | ||||
|       account = account_statuses.first.account | ||||
| 
 | ||||
|       next unless account | ||||
|  | @ -35,27 +51,31 @@ class BatchedRemoveStatusService < BaseService | |||
|     end | ||||
| 
 | ||||
|     # Cannot be batched | ||||
|     statuses.each do |status| | ||||
|       unpush_from_public_timelines(status) | ||||
|     redis.pipelined do | ||||
|       statuses_and_reblogs.each do |status| | ||||
|         unpush_from_public_timelines(status) | ||||
|       end | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   private | ||||
| 
 | ||||
|   def unpush_from_home_timelines(account, statuses) | ||||
|     recipients = account.followers_for_local_distribution.to_a | ||||
| 
 | ||||
|     recipients << account if account.local? | ||||
| 
 | ||||
|     recipients.each do |follower| | ||||
|     account.followers_for_local_distribution.includes(:user).find_each do |follower| | ||||
|       statuses.each do |status| | ||||
|         FeedManager.instance.unpush_from_home(follower, status) | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     return unless account.local? | ||||
| 
 | ||||
|     statuses.each do |status| | ||||
|       FeedManager.instance.unpush_from_home(account, status) | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   def unpush_from_list_timelines(account, statuses) | ||||
|     account.lists_for_local_distribution.select(:id, :account_id).each do |list| | ||||
|     account.lists_for_local_distribution.select(:id, :account_id).includes(account: :user).find_each do |list| | ||||
|       statuses.each do |status| | ||||
|         FeedManager.instance.unpush_from_list(list, status) | ||||
|       end | ||||
|  | @ -67,26 +87,17 @@ class BatchedRemoveStatusService < BaseService | |||
| 
 | ||||
|     payload = @json_payloads[status.id] | ||||
| 
 | ||||
|     redis.pipelined do | ||||
|       redis.publish('timeline:public', payload) | ||||
|       if status.local? | ||||
|         redis.publish('timeline:public:local', payload) | ||||
|       else | ||||
|         redis.publish('timeline:public:remote', payload) | ||||
|       end | ||||
|       if status.media_attachments.any? | ||||
|         redis.publish('timeline:public:media', payload) | ||||
|         if status.local? | ||||
|           redis.publish('timeline:public:local:media', payload) | ||||
|         else | ||||
|           redis.publish('timeline:public:remote:media', payload) | ||||
|         end | ||||
|       end | ||||
|     redis.publish('timeline:public', payload) | ||||
|     redis.publish(status.local? ? 'timeline:public:local' : 'timeline:public:remote', payload) | ||||
| 
 | ||||
|       @tags[status.id].each do |hashtag| | ||||
|         redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}", payload) | ||||
|         redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}:local", payload) if status.local? | ||||
|       end | ||||
|     if status.media_attachments.any? | ||||
|       redis.publish('timeline:public:media', payload) | ||||
|       redis.publish(status.local? ? 'timeline:public:local:media' : 'timeline:public:remote:media', payload) | ||||
|     end | ||||
| 
 | ||||
|     @tags[status.id].each do |hashtag| | ||||
|       redis.publish("timeline:hashtag:#{hashtag}", payload) | ||||
|       redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local? | ||||
|     end | ||||
|   end | ||||
| end | ||||
|  |  | |||
|  | @ -6,15 +6,21 @@ class DeleteAccountService < BaseService | |||
|   ASSOCIATIONS_ON_SUSPEND = %w( | ||||
|     account_pins | ||||
|     active_relationships | ||||
|     aliases | ||||
|     block_relationships | ||||
|     blocked_by_relationships | ||||
|     bookmarks | ||||
|     conversation_mutes | ||||
|     conversations | ||||
|     custom_filters | ||||
|     devices | ||||
|     domain_blocks | ||||
|     favourites | ||||
|     featured_tags | ||||
|     follow_requests | ||||
|     identity_proofs | ||||
|     list_accounts | ||||
|     migrations | ||||
|     mute_relationships | ||||
|     muted_by_relationships | ||||
|     notifications | ||||
|  | @ -25,6 +31,29 @@ class DeleteAccountService < BaseService | |||
|     status_pins | ||||
|   ).freeze | ||||
| 
 | ||||
|   # The following associations have no important side-effects | ||||
|   # in callbacks and all of their own associations are secured | ||||
|   # by foreign keys, making them safe to delete without loading | ||||
|   # into memory | ||||
|   ASSOCIATIONS_WITHOUT_SIDE_EFFECTS = %w( | ||||
|     account_pins | ||||
|     aliases | ||||
|     conversation_mutes | ||||
|     conversations | ||||
|     custom_filters | ||||
|     devices | ||||
|     domain_blocks | ||||
|     featured_tags | ||||
|     follow_requests | ||||
|     identity_proofs | ||||
|     migrations | ||||
|     mute_relationships | ||||
|     muted_by_relationships | ||||
|     notifications | ||||
|     scheduled_statuses | ||||
|     status_pins | ||||
|   ) | ||||
| 
 | ||||
|   ASSOCIATIONS_ON_DESTROY = %w( | ||||
|     reports | ||||
|     targeted_moderation_notes | ||||
|  | @ -55,19 +84,25 @@ class DeleteAccountService < BaseService | |||
| 
 | ||||
|     @options[:skip_activitypub] = true if @options[:skip_side_effects] | ||||
| 
 | ||||
|     reject_follows! | ||||
|     undo_follows! | ||||
|     purge_user! | ||||
|     purge_profile! | ||||
|     distribute_activities! | ||||
|     purge_content! | ||||
|     fulfill_deletion_request! | ||||
|   end | ||||
| 
 | ||||
|   private | ||||
| 
 | ||||
|   def reject_follows! | ||||
|     return if @account.local? || !@account.activitypub? || @options[:skip_activitypub] | ||||
|   def distribute_activities! | ||||
|     return if skip_activitypub? | ||||
| 
 | ||||
|     if @account.local? | ||||
|       delete_actor! | ||||
|     elsif @account.activitypub? | ||||
|       reject_follows! | ||||
|       undo_follows! | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   def reject_follows! | ||||
|     # When deleting a remote account, the account obviously doesn't | ||||
|     # actually become deleted on its origin server, i.e. unlike a | ||||
|     # locally deleted account it continues to have access to its home | ||||
|  | @ -81,8 +116,6 @@ class DeleteAccountService < BaseService | |||
|   end | ||||
| 
 | ||||
|   def undo_follows! | ||||
|     return if @account.local? || !@account.activitypub? || @options[:skip_activitypub] | ||||
| 
 | ||||
|     # When deleting a remote account, the account obviously doesn't | ||||
|     # actually become deleted on its origin server, but following relationships | ||||
|     # are severed on our end. Therefore, make the remote server aware that the | ||||
|  | @ -97,7 +130,7 @@ class DeleteAccountService < BaseService | |||
|   def purge_user! | ||||
|     return if !@account.local? || @account.user.nil? | ||||
| 
 | ||||
|     if @options[:reserve_email] | ||||
|     if keep_user_record? | ||||
|       @account.user.disable! | ||||
|       @account.user.invites.where(uses: 0).destroy_all | ||||
|     else | ||||
|  | @ -106,34 +139,52 @@ class DeleteAccountService < BaseService | |||
|   end | ||||
| 
 | ||||
|   def purge_content! | ||||
|     distribute_delete_actor! if @account.local? && !@options[:skip_side_effects] | ||||
|     purge_user! | ||||
|     purge_profile! | ||||
|     purge_statuses! | ||||
|     purge_media_attachments! | ||||
|     purge_polls! | ||||
|     purge_generated_notifications! | ||||
|     purge_other_associations! | ||||
| 
 | ||||
|     @account.destroy unless keep_account_record? | ||||
|   end | ||||
| 
 | ||||
|   def purge_statuses! | ||||
|     @account.statuses.reorder(nil).find_in_batches do |statuses| | ||||
|       statuses.reject! { |status| reported_status_ids.include?(status.id) } if @options[:reserve_username] | ||||
|       BatchedRemoveStatusService.new.call(statuses, skip_side_effects: @options[:skip_side_effects]) | ||||
|     end | ||||
|       statuses.reject! { |status| reported_status_ids.include?(status.id) } if keep_account_record? | ||||
| 
 | ||||
|       BatchedRemoveStatusService.new.call(statuses, skip_side_effects: skip_side_effects?) | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   def purge_media_attachments! | ||||
|     @account.media_attachments.reorder(nil).find_each do |media_attachment| | ||||
|       next if @options[:reserve_username] && reported_status_ids.include?(media_attachment.status_id) | ||||
|       next if keep_account_record? && reported_status_ids.include?(media_attachment.status_id) | ||||
| 
 | ||||
|       media_attachment.destroy | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   def purge_polls! | ||||
|     @account.polls.reorder(nil).find_each do |poll| | ||||
|       next if @options[:reserve_username] && reported_status_ids.include?(poll.status_id) | ||||
|       next if keep_account_record? && reported_status_ids.include?(poll.status_id) | ||||
| 
 | ||||
|       # We can safely delete the poll rather than destroy it, as any non-reported | ||||
|       # status should have been deleted already, as long as we take care of | ||||
|       # notifications. | ||||
|       Notification.where(poll: poll).delete_all | ||||
|       poll.delete | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   def purge_generated_notifications! | ||||
|     # By deleting polls and statuses without callbacks, we've left behind | ||||
|     # polymorphically associated notifications generated by this account | ||||
| 
 | ||||
|     Notification.where(from_account: @account).in_batches.delete_all | ||||
|   end | ||||
| 
 | ||||
|   def purge_other_associations! | ||||
|     associations_for_destruction.each do |association_name| | ||||
|       destroy_all(@account.public_send(association_name)) | ||||
|       purge_association(association_name) | ||||
|     end | ||||
| 
 | ||||
|     @account.destroy unless @options[:reserve_username] | ||||
|   end | ||||
| 
 | ||||
|   def purge_profile! | ||||
|  | @ -141,7 +192,7 @@ class DeleteAccountService < BaseService | |||
|     # there is no point wasting time updating | ||||
|     # its values first | ||||
| 
 | ||||
|     return unless @options[:reserve_username] | ||||
|     return unless keep_account_record? | ||||
| 
 | ||||
|     @account.silenced_at       = nil | ||||
|     @account.suspended_at      = @options[:suspended_at] || Time.now.utc | ||||
|  | @ -156,6 +207,7 @@ class DeleteAccountService < BaseService | |||
|     @account.followers_count   = 0 | ||||
|     @account.following_count   = 0 | ||||
|     @account.moved_to_account  = nil | ||||
|     @account.also_known_as     = [] | ||||
|     @account.trust_level       = :untrusted | ||||
|     @account.avatar.destroy | ||||
|     @account.header.destroy | ||||
|  | @ -166,11 +218,17 @@ class DeleteAccountService < BaseService | |||
|     @account.deletion_request&.destroy | ||||
|   end | ||||
| 
 | ||||
|   def destroy_all(association) | ||||
|     association.in_batches.destroy_all | ||||
|   def purge_association(association_name) | ||||
|     association = @account.public_send(association_name) | ||||
| 
 | ||||
|     if ASSOCIATIONS_WITHOUT_SIDE_EFFECTS.include?(association_name) | ||||
|       association.in_batches.delete_all | ||||
|     else | ||||
|       association.in_batches.destroy_all | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   def distribute_delete_actor! | ||||
|   def delete_actor! | ||||
|     ActivityPub::DeliveryWorker.push_bulk(delivery_inboxes) do |inbox_url| | ||||
|       [delete_actor_json, @account.id, inbox_url] | ||||
|     end | ||||
|  | @ -197,10 +255,26 @@ class DeleteAccountService < BaseService | |||
|   end | ||||
| 
 | ||||
|   def associations_for_destruction | ||||
|     if @options[:reserve_username] | ||||
|     if keep_account_record? | ||||
|       ASSOCIATIONS_ON_SUSPEND | ||||
|     else | ||||
|       ASSOCIATIONS_ON_SUSPEND + ASSOCIATIONS_ON_DESTROY | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   def keep_user_record? | ||||
|     @options[:reserve_email] | ||||
|   end | ||||
| 
 | ||||
|   def keep_account_record? | ||||
|     @options[:reserve_username] | ||||
|   end | ||||
| 
 | ||||
|   def skip_side_effects? | ||||
|     @options[:skip_side_effects] | ||||
|   end | ||||
| 
 | ||||
|   def skip_activitypub? | ||||
|     @options[:skip_activitypub] | ||||
|   end | ||||
| end | ||||
|  |  | |||
|  | @ -12,6 +12,10 @@ Chewy.settings = { | |||
|   sidekiq: { queue: 'pull' }, | ||||
| } | ||||
| 
 | ||||
| # We use our own async strategy even outside the request-response | ||||
| # cycle, which takes care of checking if ElasticSearch is enabled | ||||
| # or not. However, mind that for the Rails console, the :urgent | ||||
| # strategy is set automatically with no way to override it. | ||||
| Chewy.root_strategy              = :custom_sidekiq | ||||
| Chewy.request_strategy           = :custom_sidekiq | ||||
| Chewy.use_after_commit_callbacks = false | ||||
|  | @ -37,6 +41,7 @@ Elasticsearch::Transport::Client.prepend Module.new { | |||
|     super arguments | ||||
|   end | ||||
| } | ||||
| 
 | ||||
| Elasticsearch::API::Indices::IndicesClient.prepend Module.new { | ||||
|   def create(arguments = {}) | ||||
|     arguments[:include_type_name] = true | ||||
|  |  | |||
|  | @ -2,29 +2,10 @@ | |||
| 
 | ||||
| module Chewy | ||||
|   class Strategy | ||||
|     class CustomSidekiq < Base | ||||
|       class Worker | ||||
|         include ::Sidekiq::Worker | ||||
| 
 | ||||
|         sidekiq_options queue: 'pull' | ||||
| 
 | ||||
|         def perform(type, ids, options = {}) | ||||
|           options[:refresh] = !Chewy.disable_refresh_async if Chewy.disable_refresh_async | ||||
|           type.constantize.import!(ids, options) | ||||
|         end | ||||
|     class CustomSidekiq < Sidekiq | ||||
|       def update(_type, _objects, _options = {}) | ||||
|         super if Chewy.enabled? | ||||
|       end | ||||
| 
 | ||||
|       def update(type, objects, _options = {}) | ||||
|         return unless Chewy.enabled? | ||||
| 
 | ||||
|         ids = type.root.id ? Array.wrap(objects) : type.adapter.identify(objects) | ||||
| 
 | ||||
|         return if ids.empty? | ||||
| 
 | ||||
|         Worker.perform_async(type.name, ids) | ||||
|       end | ||||
| 
 | ||||
|       def leave; end | ||||
|     end | ||||
|   end | ||||
| end | ||||
|  |  | |||
|  | @ -26,6 +26,11 @@ RSpec.describe BatchedRemoveStatusService, type: :service do | |||
|     subject.call([status1, status2]) | ||||
|   end | ||||
| 
 | ||||
|   it 'removes statuses' do | ||||
|     expect { Status.find(status1.id) }.to raise_error ActiveRecord::RecordNotFound | ||||
|     expect { Status.find(status2.id) }.to raise_error ActiveRecord::RecordNotFound | ||||
|   end | ||||
| 
 | ||||
|   it 'removes statuses from author\'s home feed' do | ||||
|     expect(HomeFeed.new(alice).get(10)).to_not include([status1.id, status2.id]) | ||||
|   end | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue