Improve account deletion performances further (#15407)
* Delete status records by batches of 50 * Do not precompute values that are only used once * Do not generate redis events for removal of public toots older than two weeks * Filter reported toots a priori for polls and status deletion * Do not process reblogs when cleaning up public timelines As in Mastodon proper, reblogs don't appear in public TLs * Clean the deleted account's own feed in one go * Refactor Account#clean_feed_manager and List#clean_feed_manager * Delete instead of destroy a few more associations * Fix preloading Co-authored-by: Claire <claire.github-309c@sitedethib.com>
This commit is contained in:
		
					parent
					
						
							
								f18349640b
							
						
					
				
			
			
				commit
				
					
						3249d35bdc
					
				
			
		
					 7 changed files with 53 additions and 81 deletions
				
			
		|  | @ -230,6 +230,36 @@ class FeedManager | |||
|     end | ||||
|   end | ||||
| 
 | ||||
|   # Completely clear multiple feeds at once | ||||
|   # @param [Symbol] type | ||||
|   # @param [Array<Integer>] ids | ||||
|   # @return [void] | ||||
|   def clean_feeds!(type, ids) | ||||
|     reblogged_id_sets = {} | ||||
| 
 | ||||
|     redis.pipelined do | ||||
|       ids.each do |feed_id| | ||||
|         redis.del(key(type, feed_id)) | ||||
|         reblog_key = key(type, feed_id, 'reblogs') | ||||
|         # We collect a future for this: we don't block while getting | ||||
|         # it, but we can iterate over it later. | ||||
|         reblogged_id_sets[feed_id] = redis.zrange(reblog_key, 0, -1) | ||||
|         redis.del(reblog_key) | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     # Remove all of the reblog tracking keys we just removed the | ||||
|     # references to. | ||||
|     redis.pipelined do | ||||
|       reblogged_id_sets.each do |feed_id, future| | ||||
|         future.value.each do |reblogged_id| | ||||
|           reblog_set_key = key(type, feed_id, "reblogs:#{reblogged_id}") | ||||
|           redis.del(reblog_set_key) | ||||
|         end | ||||
|       end | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   private | ||||
| 
 | ||||
|   # Trim a feed to maximum size by removing older items | ||||
|  |  | |||
|  | @ -578,17 +578,6 @@ class Account < ApplicationRecord | |||
|   end | ||||
| 
 | ||||
|   def clean_feed_manager | ||||
|     reblog_key       = FeedManager.instance.key(:home, id, 'reblogs') | ||||
|     reblogged_id_set = Redis.current.zrange(reblog_key, 0, -1) | ||||
| 
 | ||||
|     Redis.current.pipelined do | ||||
|       Redis.current.del(FeedManager.instance.key(:home, id)) | ||||
|       Redis.current.del(reblog_key) | ||||
| 
 | ||||
|       reblogged_id_set.each do |reblogged_id| | ||||
|         reblog_set_key = FeedManager.instance.key(:home, id, "reblogs:#{reblogged_id}") | ||||
|         Redis.current.del(reblog_set_key) | ||||
|       end | ||||
|     end | ||||
|     FeedManager.instance.clean_feeds!(:home, [id]) | ||||
|   end | ||||
| end | ||||
|  |  | |||
|  | @ -34,17 +34,6 @@ class List < ApplicationRecord | |||
|   private | ||||
| 
 | ||||
|   def clean_feed_manager | ||||
|     reblog_key       = FeedManager.instance.key(:list, id, 'reblogs') | ||||
|     reblogged_id_set = Redis.current.zrange(reblog_key, 0, -1) | ||||
| 
 | ||||
|     Redis.current.pipelined do | ||||
|       Redis.current.del(FeedManager.instance.key(:list, id)) | ||||
|       Redis.current.del(reblog_key) | ||||
| 
 | ||||
|       reblogged_id_set.each do |reblogged_id| | ||||
|         reblog_set_key = FeedManager.instance.key(:list, id, "reblogs:#{reblogged_id}") | ||||
|         Redis.current.del(reblog_set_key) | ||||
|       end | ||||
|     end | ||||
|     FeedManager.instance.clean_feeds!(:list, [id]) | ||||
|   end | ||||
| end | ||||
|  |  | |||
|  | @ -8,7 +8,7 @@ class BatchedRemoveStatusService < BaseService | |||
|   # @param [Hash] options | ||||
|   # @option [Boolean] :skip_side_effects Do not modify feeds and send updates to streaming API | ||||
|   def call(statuses, **options) | ||||
|     ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, reblogs: :account]) | ||||
|     ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, :tags, reblogs: :account]) | ||||
| 
 | ||||
|     statuses_and_reblogs = statuses.flat_map { |status| [status] + status.reblogs } | ||||
| 
 | ||||
|  | @ -27,7 +27,7 @@ class BatchedRemoveStatusService < BaseService | |||
|     # transaction lock the database, but we use the delete method instead | ||||
|     # of destroy to avoid all callbacks. We rely on foreign keys to | ||||
|     # cascade the delete faster without loading the associations. | ||||
|     statuses_and_reblogs.each(&:delete) | ||||
|     statuses_and_reblogs.each_slice(50) { |slice| Status.where(id: slice.map(&:id)).delete_all } | ||||
| 
 | ||||
|     # Since we skipped all callbacks, we also need to manually | ||||
|     # deindex the statuses | ||||
|  | @ -35,11 +35,6 @@ class BatchedRemoveStatusService < BaseService | |||
| 
 | ||||
|     return if options[:skip_side_effects] | ||||
| 
 | ||||
|     ActiveRecord::Associations::Preloader.new.preload(statuses_and_reblogs, :tags) | ||||
| 
 | ||||
|     @tags          = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = s.tags.map { |tag| tag.name.mb_chars.downcase } } | ||||
|     @json_payloads = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) } | ||||
| 
 | ||||
|     # Batch by source account | ||||
|     statuses_and_reblogs.group_by(&:account_id).each_value do |account_statuses| | ||||
|       account = account_statuses.first.account | ||||
|  | @ -51,8 +46,9 @@ class BatchedRemoveStatusService < BaseService | |||
|     end | ||||
| 
 | ||||
|     # Cannot be batched | ||||
|     @status_id_cutoff = Mastodon::Snowflake.id_at(2.weeks.ago) | ||||
|     redis.pipelined do | ||||
|       statuses_and_reblogs.each do |status| | ||||
|       statuses.each do |status| | ||||
|         unpush_from_public_timelines(status) | ||||
|       end | ||||
|     end | ||||
|  | @ -66,12 +62,6 @@ class BatchedRemoveStatusService < BaseService | |||
|         FeedManager.instance.unpush_from_home(follower, status) | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     return unless account.local? | ||||
| 
 | ||||
|     statuses.each do |status| | ||||
|       FeedManager.instance.unpush_from_home(account, status) | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   def unpush_from_list_timelines(account, statuses) | ||||
|  | @ -83,9 +73,9 @@ class BatchedRemoveStatusService < BaseService | |||
|   end | ||||
| 
 | ||||
|   def unpush_from_public_timelines(status) | ||||
|     return unless status.public_visibility? | ||||
|     return unless status.public_visibility? && status.id > @status_id_cutoff | ||||
| 
 | ||||
|     payload = @json_payloads[status.id] | ||||
|     payload = Oj.dump(event: :delete, payload: status.id.to_s) | ||||
| 
 | ||||
|     redis.publish('timeline:public', payload) | ||||
|     redis.publish(status.local? ? 'timeline:public:local' : 'timeline:public:remote', payload) | ||||
|  | @ -95,7 +85,7 @@ class BatchedRemoveStatusService < BaseService | |||
|       redis.publish(status.local? ? 'timeline:public:local:media' : 'timeline:public:remote:media', payload) | ||||
|     end | ||||
| 
 | ||||
|     @tags[status.id].each do |hashtag| | ||||
|     status.tags.map { |tag| tag.name.mb_chars.downcase }.each do |hashtag| | ||||
|       redis.publish("timeline:hashtag:#{hashtag}", payload) | ||||
|       redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local? | ||||
|     end | ||||
|  |  | |||
|  | @ -46,10 +46,12 @@ class DeleteAccountService < BaseService | |||
|     featured_tags | ||||
|     follow_requests | ||||
|     identity_proofs | ||||
|     list_accounts | ||||
|     migrations | ||||
|     mute_relationships | ||||
|     muted_by_relationships | ||||
|     notifications | ||||
|     owned_lists | ||||
|     scheduled_statuses | ||||
|     status_pins | ||||
|   ) | ||||
|  | @ -145,15 +147,14 @@ class DeleteAccountService < BaseService | |||
|     purge_media_attachments! | ||||
|     purge_polls! | ||||
|     purge_generated_notifications! | ||||
|     purge_feeds! | ||||
|     purge_other_associations! | ||||
| 
 | ||||
|     @account.destroy unless keep_account_record? | ||||
|   end | ||||
| 
 | ||||
|   def purge_statuses! | ||||
|     @account.statuses.reorder(nil).find_in_batches do |statuses| | ||||
|       statuses.reject! { |status| reported_status_ids.include?(status.id) } if keep_account_record? | ||||
| 
 | ||||
|     @account.statuses.reorder(nil).where.not(id: reported_status_ids).in_batches do |statuses| | ||||
|       BatchedRemoveStatusService.new.call(statuses, skip_side_effects: skip_side_effects?) | ||||
|     end | ||||
|   end | ||||
|  | @ -167,11 +168,7 @@ class DeleteAccountService < BaseService | |||
|   end | ||||
| 
 | ||||
|   def purge_polls! | ||||
|     @account.polls.reorder(nil).find_each do |poll| | ||||
|       next if keep_account_record? && reported_status_ids.include?(poll.status_id) | ||||
| 
 | ||||
|       poll.delete | ||||
|     end | ||||
|     @account.polls.reorder(nil).where.not(status_id: reported_status_ids).in_batches.delete_all | ||||
|   end | ||||
| 
 | ||||
|   def purge_generated_notifications! | ||||
|  | @ -187,6 +184,13 @@ class DeleteAccountService < BaseService | |||
|     end | ||||
|   end | ||||
| 
 | ||||
|   def purge_feeds! | ||||
|     return unless @account.local? | ||||
| 
 | ||||
|     FeedManager.instance.clean_feeds!(:home, [@account.id]) | ||||
|     FeedManager.instance.clean_feeds!(:list, @account.owned_lists.pluck(:id)) | ||||
|   end | ||||
| 
 | ||||
|   def purge_profile! | ||||
|     # If the account is going to be destroyed | ||||
|     # there is no point wasting time updating | ||||
|  |  | |||
|  | @ -14,37 +14,11 @@ class Scheduler::FeedCleanupScheduler | |||
|   private | ||||
| 
 | ||||
|   def clean_home_feeds! | ||||
|     clean_feeds!(inactive_account_ids, :home) | ||||
|     feed_manager.clean_feeds!(:home, inactive_account_ids) | ||||
|   end | ||||
| 
 | ||||
|   def clean_list_feeds! | ||||
|     clean_feeds!(inactive_list_ids, :list) | ||||
|   end | ||||
| 
 | ||||
|   def clean_feeds!(ids, type) | ||||
|     reblogged_id_sets = {} | ||||
| 
 | ||||
|     redis.pipelined do | ||||
|       ids.each do |feed_id| | ||||
|         redis.del(feed_manager.key(type, feed_id)) | ||||
|         reblog_key = feed_manager.key(type, feed_id, 'reblogs') | ||||
|         # We collect a future for this: we don't block while getting | ||||
|         # it, but we can iterate over it later. | ||||
|         reblogged_id_sets[feed_id] = redis.zrange(reblog_key, 0, -1) | ||||
|         redis.del(reblog_key) | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     # Remove all of the reblog tracking keys we just removed the | ||||
|     # references to. | ||||
|     redis.pipelined do | ||||
|       reblogged_id_sets.each do |feed_id, future| | ||||
|         future.value.each do |reblogged_id| | ||||
|           reblog_set_key = feed_manager.key(type, feed_id, "reblogs:#{reblogged_id}") | ||||
|           redis.del(reblog_set_key) | ||||
|         end | ||||
|       end | ||||
|     end | ||||
|     feed_manager.clean_feeds!(:list, inactive_list_ids) | ||||
|   end | ||||
| 
 | ||||
|   def inactive_account_ids | ||||
|  |  | |||
|  | @ -43,10 +43,6 @@ RSpec.describe BatchedRemoveStatusService, type: :service do | |||
|     expect(Redis.current).to have_received(:publish).with("timeline:#{jeff.id}", any_args).at_least(:once) | ||||
|   end | ||||
| 
 | ||||
|   it 'notifies streaming API of author' do | ||||
|     expect(Redis.current).to have_received(:publish).with("timeline:#{alice.id}", any_args).at_least(:once) | ||||
|   end | ||||
| 
 | ||||
|   it 'notifies streaming API of public timeline' do | ||||
|     expect(Redis.current).to have_received(:publish).with('timeline:public', any_args).at_least(:once) | ||||
|   end | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue