Add retention policy for cached content and media (#19232)

This commit is contained in:
Eugen Rochko 2022-09-27 03:08:19 +02:00 committed by GitHub
parent 3e0999cd11
commit 5c9abdeff1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 559 additions and 135 deletions

View File

@ -7,9 +7,7 @@ class RedisConfiguration
@pool = ConnectionPool.new(size: new_pool_size) { new.connection }
end
def with
pool.with { |redis| yield redis }
end
delegate :with, to: :pool
def pool
@pool ||= establish_pool(pool_size)
@ -17,7 +15,7 @@ class RedisConfiguration
def pool_size
if Sidekiq.server?
Sidekiq.options[:concurrency]
Sidekiq[:concurrency]
else
ENV['MAX_THREADS'] || 5
end

3
app/lib/vacuum.rb Normal file
View File

@ -0,0 +1,3 @@
# frozen_string_literal: true
module Vacuum; end

View File

@ -1,13 +1,18 @@
# frozen_string_literal: true
class Scheduler::DoorkeeperCleanupScheduler
include Sidekiq::Worker
sidekiq_options retry: 0
class Vacuum::AccessTokensVacuum
def perform
vacuum_revoked_access_tokens!
vacuum_revoked_access_grants!
end
private
def vacuum_revoked_access_tokens!
Doorkeeper::AccessToken.where('revoked_at IS NOT NULL').where('revoked_at < NOW()').delete_all
end
def vacuum_revoked_access_grants!
Doorkeeper::AccessGrant.where('revoked_at IS NOT NULL').where('revoked_at < NOW()').delete_all
SystemKey.expired.delete_all
end
end

View File

@ -0,0 +1,25 @@
# frozen_string_literal: true
class Vacuum::BackupsVacuum
def initialize(retention_period)
@retention_period = retention_period
end
def perform
vacuum_expired_backups! if retention_period?
end
private
def vacuum_expired_backups!
backups_past_retention_period.in_batches.destroy_all
end
def backups_past_retention_period
Backup.unscoped.where(Backup.arel_table[:created_at].lt(@retention_period.ago))
end
def retention_period?
@retention_period.present?
end
end

View File

@ -0,0 +1,34 @@
# frozen_string_literal: true
class Vacuum::FeedsVacuum
def perform
vacuum_inactive_home_feeds!
vacuum_inactive_list_feeds!
end
private
def vacuum_inactive_home_feeds!
inactive_users.select(:id, :account_id).find_in_batches do |users|
feed_manager.clean_feeds!(:home, users.map(&:account_id))
end
end
def vacuum_inactive_list_feeds!
inactive_users_lists.select(:id).find_in_batches do |lists|
feed_manager.clean_feeds!(:list, lists.map(&:id))
end
end
def inactive_users
User.confirmed.inactive
end
def inactive_users_lists
List.where(account_id: inactive_users.select(:account_id))
end
def feed_manager
FeedManager.instance
end
end

View File

@ -0,0 +1,40 @@
# frozen_string_literal: true
class Vacuum::MediaAttachmentsVacuum
TTL = 1.day.freeze
def initialize(retention_period)
@retention_period = retention_period
end
def perform
vacuum_cached_files! if retention_period?
vacuum_orphaned_records!
end
private
def vacuum_cached_files!
media_attachments_past_retention_period.find_each do |media_attachment|
media_attachment.file.destroy
media_attachment.thumbnail.destroy
media_attachment.save
end
end
def vacuum_orphaned_records!
orphaned_media_attachments.in_batches.destroy_all
end
def media_attachments_past_retention_period
MediaAttachment.unscoped.remote.cached.where(MediaAttachment.arel_table[:created_at].lt(@retention_period.ago)).where(MediaAttachment.arel_table[:updated_at].lt(@retention_period.ago))
end
def orphaned_media_attachments
MediaAttachment.unscoped.unattached.where(MediaAttachment.arel_table[:created_at].lt(TTL.ago))
end
def retention_period?
@retention_period.present?
end
end

View File

@ -0,0 +1,39 @@
# frozen_string_literal: true
class Vacuum::PreviewCardsVacuum
TTL = 1.day.freeze
def initialize(retention_period)
@retention_period = retention_period
end
def perform
vacuum_cached_images! if retention_period?
vacuum_orphaned_records!
end
private
def vacuum_cached_images!
preview_cards_past_retention_period.find_each do |preview_card|
preview_card.image.destroy
preview_card.save
end
end
def vacuum_orphaned_records!
orphaned_preview_cards.in_batches.destroy_all
end
def preview_cards_past_retention_period
PreviewCard.cached.where(PreviewCard.arel_table[:updated_at].lt(@retention_period.ago))
end
def orphaned_preview_cards
PreviewCard.where('NOT EXISTS (SELECT 1 FROM preview_cards_statuses WHERE preview_cards_statuses.preview_card_id = preview_cards.id)').where(PreviewCard.arel_table[:created_at].lt(TTL.ago))
end
def retention_period?
@retention_period.present?
end
end

View File

@ -0,0 +1,54 @@
# frozen_string_literal: true
class Vacuum::StatusesVacuum
include Redisable
def initialize(retention_period)
@retention_period = retention_period
end
def perform
vacuum_statuses! if retention_period?
end
private
def vacuum_statuses!
statuses_scope.find_in_batches do |statuses|
# Side-effects not covered by foreign keys, such
# as the search index, must be handled first.
remove_from_account_conversations(statuses)
remove_from_search_index(statuses)
# Foreign keys take care of most associated records
# for us. Media attachments will be orphaned.
Status.where(id: statuses.map(&:id)).delete_all
end
end
def statuses_scope
Status.unscoped.kept.where(account: Account.remote).where(Status.arel_table[:id].lt(retention_period_as_id)).select(:id, :visibility)
end
def retention_period_as_id
Mastodon::Snowflake.id_at(@retention_period.ago, with_random: false)
end
def analyze_statuses!
ActiveRecord::Base.connection.execute('ANALYZE statuses')
end
def remove_from_account_conversations(statuses)
Status.where(id: statuses.select(&:direct_visibility?).map(&:id)).includes(:account, mentions: :account).each(&:unlink_from_conversations)
end
def remove_from_search_index(statuses)
with_redis { |redis| redis.sadd('chewy:queue:StatusesIndex', statuses.map(&:id)) } if Chewy.enabled?
end
def retention_period?
@retention_period.present?
end
end

View File

@ -0,0 +1,13 @@
# frozen_string_literal: true
class Vacuum::SystemKeysVacuum
def perform
vacuum_expired_system_keys!
end
private
def vacuum_expired_system_keys!
SystemKey.expired.delete_all
end
end

View File

@ -0,0 +1,25 @@
# frozen_string_literal: true
class ContentRetentionPolicy
def self.current
new
end
def media_cache_retention_period
retention_period Setting.media_cache_retention_period
end
def content_cache_retention_period
retention_period Setting.content_cache_retention_period
end
def backups_retention_period
retention_period Setting.backups_retention_period
end
private
def retention_period(value)
value.days if value.is_a?(Integer) && value.positive?
end
end

View File

@ -32,6 +32,9 @@ class Form::AdminSettings
show_domain_blocks_rationale
noindex
require_invite_text
media_cache_retention_period
content_cache_retention_period
backups_retention_period
).freeze
BOOLEAN_KEYS = %i(
@ -64,6 +67,7 @@ class Form::AdminSettings
validates :bootstrap_timeline_accounts, existing_username: { multiple: true }
validates :show_domain_blocks, inclusion: { in: %w(disabled users all) }
validates :show_domain_blocks_rationale, inclusion: { in: %w(disabled users all) }
validates :media_cache_retention_period, :content_cache_retention_period, :backups_retention_period, numericality: { only_integer: true }
def initialize(_attributes = {})
super

View File

@ -45,7 +45,6 @@
.fields-group
= f.input :require_invite_text, as: :boolean, wrapper: :with_label, label: t('admin.settings.registrations.require_invite_text.title'), hint: t('admin.settings.registrations.require_invite_text.desc_html'), disabled: !approved_registrations?
.fields-group
%hr.spacer/
@ -100,5 +99,12 @@
= f.input :site_terms, wrapper: :with_block_label, as: :text, label: t('admin.settings.site_terms.title'), hint: t('admin.settings.site_terms.desc_html'), input_html: { rows: 8 }
= f.input :custom_css, wrapper: :with_block_label, as: :text, input_html: { rows: 8 }, label: t('admin.settings.custom_css.title'), hint: t('admin.settings.custom_css.desc_html')
%hr.spacer/
.fields-group
= f.input :media_cache_retention_period, wrapper: :with_block_label, input_html: { pattern: '[0-9]+' }
= f.input :content_cache_retention_period, wrapper: :with_block_label, input_html: { pattern: '[0-9]+' }
= f.input :backups_retention_period, wrapper: :with_block_label, input_html: { pattern: '[0-9]+' }
.actions
= f.button :button, t('generic.save_changes'), type: :submit

View File

@ -1,17 +0,0 @@
# frozen_string_literal: true
class Scheduler::BackupCleanupScheduler
include Sidekiq::Worker
sidekiq_options retry: 0
def perform
old_backups.reorder(nil).find_each(&:destroy!)
end
private
def old_backups
Backup.where('created_at < ?', 7.days.ago)
end
end

View File

@ -1,35 +0,0 @@
# frozen_string_literal: true
class Scheduler::FeedCleanupScheduler
include Sidekiq::Worker
include Redisable
sidekiq_options retry: 0
def perform
clean_home_feeds!
clean_list_feeds!
end
private
def clean_home_feeds!
feed_manager.clean_feeds!(:home, inactive_account_ids)
end
def clean_list_feeds!
feed_manager.clean_feeds!(:list, inactive_list_ids)
end
def inactive_account_ids
@inactive_account_ids ||= User.confirmed.inactive.pluck(:account_id)
end
def inactive_list_ids
List.where(account_id: inactive_account_ids).pluck(:id)
end
def feed_manager
FeedManager.instance
end
end

View File

@ -1,17 +0,0 @@
# frozen_string_literal: true
class Scheduler::MediaCleanupScheduler
include Sidekiq::Worker
sidekiq_options retry: 0
def perform
unattached_media.find_each(&:destroy)
end
private
def unattached_media
MediaAttachment.reorder(nil).unattached.where('created_at < ?', 1.day.ago)
end
end

View File

@ -0,0 +1,56 @@
# frozen_string_literal: true
class Scheduler::VacuumScheduler
include Sidekiq::Worker
sidekiq_options retry: 0
def perform
vacuum_operations.each do |operation|
operation.perform
rescue => e
Rails.logger.error("Error while running #{operation.class.name}: #{e}")
end
end
private
def vacuum_operations
[
statuses_vacuum,
media_attachments_vacuum,
preview_cards_vacuum,
backups_vacuum,
access_tokens_vacuum,
feeds_vacuum,
]
end
def statuses_vacuum
Vacuum::StatusesVacuum.new(content_retention_policy.content_cache_retention_period)
end
def media_attachments_vacuum
Vacuum::MediaAttachmentsVacuum.new(content_retention_policy.media_cache_retention_period)
end
def preview_cards_vacuum
Vacuum::PreviewCardsVacuum.new(content_retention_policy.media_cache_retention_period)
end
def backups_vacuum
Vacuum::BackupsVacuum.new(content_retention_policy.backups_retention_period)
end
def access_tokens_vacuum
Vacuum::AccessTokensVacuum.new
end
def feeds_vacuum
Vacuum::FeedsVacuum.new
end
def content_retention_policy
ContentRetentionPolicy.current
end
end

View File

@ -73,6 +73,10 @@ en:
actions:
hide: Completely hide the filtered content, behaving as if it did not exist
warn: Hide the filtered content behind a warning mentioning the filter's title
form_admin_settings:
backups_retention_period: Keep generated user archives for the specified number of days.
content_cache_retention_period: Posts from other servers will be deleted after the specified number of days when set to a positive value. This may be irreversible.
media_cache_retention_period: Downloaded media files will be deleted after the specified number of days when set to a positive value, and re-downloaded on demand.
form_challenge:
current_password: You are entering a secure area
imports:
@ -207,6 +211,10 @@ en:
actions:
hide: Hide completely
warn: Hide with a warning
form_admin_settings:
backups_retention_period: User archive retention period
content_cache_retention_period: Content cache retention period
media_cache_retention_period: Media cache retention period
interactions:
must_be_follower: Block notifications from non-followers
must_be_following: Block notifications from people you don't follow

View File

@ -70,6 +70,7 @@ defaults: &defaults
show_domain_blocks: 'disabled'
show_domain_blocks_rationale: 'disabled'
require_invite_text: false
backups_retention_period: 7
development:
<<: *defaults

View File

@ -25,22 +25,14 @@
every: '5m'
class: Scheduler::IndexingScheduler
queue: scheduler
media_cleanup_scheduler:
vacuum_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
class: Scheduler::MediaCleanupScheduler
queue: scheduler
feed_cleanup_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(0..2) %> * * *'
class: Scheduler::FeedCleanupScheduler
class: Scheduler::VacuumScheduler
queue: scheduler
follow_recommendations_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(6..9) %> * * *'
class: Scheduler::FollowRecommendationsScheduler
queue: scheduler
doorkeeper_cleanup_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(0..2) %> * * 0'
class: Scheduler::DoorkeeperCleanupScheduler
queue: scheduler
user_cleanup_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(4..6) %> * * *'
class: Scheduler::UserCleanupScheduler
@ -49,10 +41,6 @@
cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
class: Scheduler::IpCleanupScheduler
queue: scheduler
backup_cleanup_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
class: Scheduler::BackupCleanupScheduler
queue: scheduler
pghero_scheduler:
cron: '0 0 * * *'
class: Scheduler::PgheroScheduler

View File

@ -0,0 +1,6 @@
Fabricator :access_grant, from: 'Doorkeeper::AccessGrant' do
application
resource_owner_id { Fabricate(:user).id }
expires_in 3_600
redirect_uri { Doorkeeper.configuration.native_redirect_uri }
end

View File

@ -3,4 +3,5 @@ Fabricator(:preview_card) do
title { Faker::Lorem.sentence }
description { Faker::Lorem.paragraph }
type 'link'
image { attachment_fixture('attachment.jpg') }
end

View File

@ -0,0 +1,33 @@
require 'rails_helper'
RSpec.describe Vacuum::AccessTokensVacuum do
subject { described_class.new }
describe '#perform' do
let!(:revoked_access_token) { Fabricate(:access_token, revoked_at: 1.minute.ago) }
let!(:active_access_token) { Fabricate(:access_token) }
let!(:revoked_access_grant) { Fabricate(:access_grant, revoked_at: 1.minute.ago) }
let!(:active_access_grant) { Fabricate(:access_grant) }
before do
subject.perform
end
it 'deletes revoked access tokens' do
expect { revoked_access_token.reload }.to raise_error ActiveRecord::RecordNotFound
end
it 'deletes revoked access grants' do
expect { revoked_access_grant.reload }.to raise_error ActiveRecord::RecordNotFound
end
it 'does not delete active access tokens' do
expect { active_access_token.reload }.to_not raise_error
end
it 'does not delete active access grants' do
expect { active_access_grant.reload }.to_not raise_error
end
end
end

View File

@ -0,0 +1,24 @@
require 'rails_helper'
RSpec.describe Vacuum::BackupsVacuum do
let(:retention_period) { 7.days }
subject { described_class.new(retention_period) }
describe '#perform' do
let!(:expired_backup) { Fabricate(:backup, created_at: (retention_period + 1.day).ago) }
let!(:current_backup) { Fabricate(:backup) }
before do
subject.perform
end
it 'deletes backups past the retention period' do
expect { expired_backup.reload }.to raise_error ActiveRecord::RecordNotFound
end
it 'does not delete backups within the retention period' do
expect { current_backup.reload }.to_not raise_error
end
end
end

View File

@ -0,0 +1,30 @@
require 'rails_helper'
RSpec.describe Vacuum::FeedsVacuum do
subject { described_class.new }
describe '#perform' do
let!(:active_user) { Fabricate(:user, current_sign_in_at: 2.days.ago) }
let!(:inactive_user) { Fabricate(:user, current_sign_in_at: 22.days.ago) }
before do
redis.zadd(feed_key_for(inactive_user), 1, 1)
redis.zadd(feed_key_for(active_user), 1, 1)
redis.zadd(feed_key_for(inactive_user, 'reblogs'), 2, 2)
redis.sadd(feed_key_for(inactive_user, 'reblogs:2'), 3)
subject.perform
end
it 'clears feeds of inactive users and lists' do
expect(redis.zcard(feed_key_for(inactive_user))).to eq 0
expect(redis.zcard(feed_key_for(active_user))).to eq 1
expect(redis.exists?(feed_key_for(inactive_user, 'reblogs'))).to be false
expect(redis.exists?(feed_key_for(inactive_user, 'reblogs:2'))).to be false
end
end
def feed_key_for(user, subtype = nil)
FeedManager.instance.key(:home, user.account_id, subtype)
end
end

View File

@ -0,0 +1,47 @@
require 'rails_helper'
RSpec.describe Vacuum::MediaAttachmentsVacuum do
let(:retention_period) { 7.days }
subject { described_class.new(retention_period) }
let(:remote_status) { Fabricate(:status, account: Fabricate(:account, domain: 'example.com')) }
let(:local_status) { Fabricate(:status) }
describe '#perform' do
let!(:old_remote_media) { Fabricate(:media_attachment, remote_url: 'https://example.com/foo.png', status: remote_status, created_at: (retention_period + 1.day).ago, updated_at: (retention_period + 1.day).ago) }
let!(:old_local_media) { Fabricate(:media_attachment, status: local_status, created_at: (retention_period + 1.day).ago, updated_at: (retention_period + 1.day).ago) }
let!(:new_remote_media) { Fabricate(:media_attachment, remote_url: 'https://example.com/foo.png', status: remote_status) }
let!(:new_local_media) { Fabricate(:media_attachment, status: local_status) }
let!(:old_unattached_media) { Fabricate(:media_attachment, account_id: nil, created_at: 10.days.ago) }
let!(:new_unattached_media) { Fabricate(:media_attachment, account_id: nil, created_at: 1.hour.ago) }
before do
subject.perform
end
it 'deletes cache of remote media attachments past the retention period' do
expect(old_remote_media.reload.file).to be_blank
end
it 'does not touch local media attachments past the retention period' do
expect(old_local_media.reload.file).to_not be_blank
end
it 'does not delete cache of remote media attachments within the retention period' do
expect(new_remote_media.reload.file).to_not be_blank
end
it 'does not touch local media attachments within the retention period' do
expect(new_local_media.reload.file).to_not be_blank
end
it 'deletes unattached media attachments past TTL' do
expect { old_unattached_media.reload }.to raise_error(ActiveRecord::RecordNotFound)
end
it 'does not delete unattached media attachments within TTL' do
expect(new_unattached_media.reload).to be_persisted
end
end
end

View File

@ -0,0 +1,36 @@
require 'rails_helper'
RSpec.describe Vacuum::PreviewCardsVacuum do
let(:retention_period) { 7.days }
subject { described_class.new(retention_period) }
describe '#perform' do
let!(:orphaned_preview_card) { Fabricate(:preview_card, created_at: 2.days.ago) }
let!(:old_preview_card) { Fabricate(:preview_card, updated_at: (retention_period + 1.day).ago) }
let!(:new_preview_card) { Fabricate(:preview_card) }
before do
old_preview_card.statuses << Fabricate(:status)
new_preview_card.statuses << Fabricate(:status)
subject.perform
end
it 'deletes cache of preview cards last updated before the retention period' do
expect(old_preview_card.reload.image).to be_blank
end
it 'does not delete cache of preview cards last updated within the retention period' do
expect(new_preview_card.reload.image).to_not be_blank
end
it 'does not delete attached preview cards' do
expect(new_preview_card.reload).to be_persisted
end
it 'deletes preview cards not attached to any status' do
expect { orphaned_preview_card.reload }.to raise_error ActiveRecord::RecordNotFound
end
end
end

View File

@ -0,0 +1,36 @@
require 'rails_helper'
RSpec.describe Vacuum::StatusesVacuum do
let(:retention_period) { 7.days }
let(:remote_account) { Fabricate(:account, domain: 'example.com') }
subject { described_class.new(retention_period) }
describe '#perform' do
let!(:remote_status_old) { Fabricate(:status, account: remote_account, created_at: (retention_period + 2.days).ago) }
let!(:remote_status_recent) { Fabricate(:status, account: remote_account, created_at: (retention_period - 2.days).ago) }
let!(:local_status_old) { Fabricate(:status, created_at: (retention_period + 2.days).ago) }
let!(:local_status_recent) { Fabricate(:status, created_at: (retention_period - 2.days).ago) }
before do
subject.perform
end
it 'deletes remote statuses past the retention period' do
expect { remote_status_old.reload }.to raise_error ActiveRecord::RecordNotFound
end
it 'does not delete local statuses past the retention period' do
expect { local_status_old.reload }.to_not raise_error
end
it 'does not delete remote statuses within the retention period' do
expect { remote_status_recent.reload }.to_not raise_error
end
it 'does not delete local statuses within the retention period' do
expect { local_status_recent.reload }.to_not raise_error
end
end
end

View File

@ -0,0 +1,22 @@
require 'rails_helper'
RSpec.describe Vacuum::SystemKeysVacuum do
subject { described_class.new }
describe '#perform' do
let!(:expired_system_key) { Fabricate(:system_key, created_at: (SystemKey::ROTATION_PERIOD * 4).ago) }
let!(:current_system_key) { Fabricate(:system_key) }
before do
subject.perform
end
it 'deletes the expired key' do
expect { expired_system_key.reload }.to raise_error ActiveRecord::RecordNotFound
end
it 'does not delete the current key' do
expect { current_system_key.reload }.to_not raise_error
end
end
end

View File

@ -1,26 +0,0 @@
require 'rails_helper'
describe Scheduler::FeedCleanupScheduler do
subject { described_class.new }
let!(:active_user) { Fabricate(:user, current_sign_in_at: 2.days.ago) }
let!(:inactive_user) { Fabricate(:user, current_sign_in_at: 22.days.ago) }
it 'clears feeds of inactives' do
redis.zadd(feed_key_for(inactive_user), 1, 1)
redis.zadd(feed_key_for(active_user), 1, 1)
redis.zadd(feed_key_for(inactive_user, 'reblogs'), 2, 2)
redis.sadd(feed_key_for(inactive_user, 'reblogs:2'), 3)
subject.perform
expect(redis.zcard(feed_key_for(inactive_user))).to eq 0
expect(redis.zcard(feed_key_for(active_user))).to eq 1
expect(redis.exists?(feed_key_for(inactive_user, 'reblogs'))).to be false
expect(redis.exists?(feed_key_for(inactive_user, 'reblogs:2'))).to be false
end
def feed_key_for(user, subtype = nil)
FeedManager.instance.key(:home, user.account_id, subtype)
end
end

View File

@ -1,15 +0,0 @@
require 'rails_helper'
describe Scheduler::MediaCleanupScheduler do
subject { described_class.new }
let!(:old_media) { Fabricate(:media_attachment, account_id: nil, created_at: 10.days.ago) }
let!(:new_media) { Fabricate(:media_attachment, account_id: nil, created_at: 1.hour.ago) }
it 'removes old media records' do
subject.perform
expect { old_media.reload }.to raise_error(ActiveRecord::RecordNotFound)
expect(new_media.reload).to be_persisted
end
end