Change auto-deletion throttling constants to better scale with server size (#23320)

This commit is contained in:
Claire 2023-02-23 16:52:40 +01:00
parent a133570b26
commit 68af19c328
2 changed files with 8 additions and 25 deletions

View file

@ -7,7 +7,7 @@ class Scheduler::AccountsStatusesCleanupScheduler
# This limit is mostly to be nice to the fediverse at large and not # This limit is mostly to be nice to the fediverse at large and not
# generate too much traffic. # generate too much traffic.
# This also helps limiting the running time of the scheduler itself. # This also helps limiting the running time of the scheduler itself.
MAX_BUDGET = 50 MAX_BUDGET = 150
# This is an attempt to spread the load across instances, as various # This is an attempt to spread the load across instances, as various
# accounts are likely to have various followers. # accounts are likely to have various followers.
@ -15,28 +15,22 @@ class Scheduler::AccountsStatusesCleanupScheduler
# This is an attempt to limit the workload generated by status removal # This is an attempt to limit the workload generated by status removal
# jobs to something the particular instance can handle. # jobs to something the particular instance can handle.
PER_THREAD_BUDGET = 5 PER_THREAD_BUDGET = 6
# Those avoid loading an instance that is already under load # Those avoid loading an instance that is already under load
MAX_DEFAULT_SIZE = 2 MAX_DEFAULT_SIZE = 200
MAX_DEFAULT_LATENCY = 5 MAX_DEFAULT_LATENCY = 5
MAX_PUSH_SIZE = 5 MAX_PUSH_SIZE = 500
MAX_PUSH_LATENCY = 10 MAX_PUSH_LATENCY = 10
# 'pull' queue has lower priority jobs, and it's unlikely that pushing # 'pull' queue has lower priority jobs, and it's unlikely that pushing
# deletes would cause much issues with this queue if it didn't cause issues # deletes would cause much issues with this queue if it didn't cause issues
# with default and push. Yet, do not enqueue deletes if the instance is # with default and push. Yet, do not enqueue deletes if the instance is
# lagging behind too much. # lagging behind too much.
MAX_PULL_SIZE = 500 MAX_PULL_SIZE = 10_000
MAX_PULL_LATENCY = 300 MAX_PULL_LATENCY = 5.minutes.to_i
# This is less of an issue in general, but deleting old statuses is likely sidekiq_options retry: 0, lock: :until_executed, lock_ttl: 1.day.to_i
# to cause delivery errors, and thus increase the number of jobs to be retried.
# This doesn't directly translate to load, but connection errors and a high
# number of dead instances may lead to this spiraling out of control if
# unchecked.
MAX_RETRY_SIZE = 50_000
sidekiq_options retry: 0, lock: :until_executed
def perform def perform
return if under_load? return if under_load?
@ -72,7 +66,6 @@ class Scheduler::AccountsStatusesCleanupScheduler
end end
def under_load? def under_load?
return true if Sidekiq::Stats.new.retry_size > MAX_RETRY_SIZE
queue_under_load?('default', MAX_DEFAULT_SIZE, MAX_DEFAULT_LATENCY) || queue_under_load?('push', MAX_PUSH_SIZE, MAX_PUSH_LATENCY) || queue_under_load?('pull', MAX_PULL_SIZE, MAX_PULL_LATENCY) queue_under_load?('default', MAX_DEFAULT_SIZE, MAX_DEFAULT_LATENCY) || queue_under_load?('push', MAX_PUSH_SIZE, MAX_PUSH_LATENCY) || queue_under_load?('pull', MAX_PULL_SIZE, MAX_PULL_LATENCY)
end end

View file

@ -23,7 +23,6 @@ describe Scheduler::AccountsStatusesCleanupScheduler do
}, },
] ]
end end
let(:retry_size) { 0 }
before do before do
queue_stub = double queue_stub = double
@ -33,7 +32,6 @@ describe Scheduler::AccountsStatusesCleanupScheduler do
allow(Sidekiq::ProcessSet).to receive(:new).and_return(process_set_stub) allow(Sidekiq::ProcessSet).to receive(:new).and_return(process_set_stub)
sidekiq_stats_stub = double sidekiq_stats_stub = double
allow(sidekiq_stats_stub).to receive(:retry_size).and_return(retry_size)
allow(Sidekiq::Stats).to receive(:new).and_return(sidekiq_stats_stub) allow(Sidekiq::Stats).to receive(:new).and_return(sidekiq_stats_stub)
# Create a bunch of old statuses # Create a bunch of old statuses
@ -70,14 +68,6 @@ describe Scheduler::AccountsStatusesCleanupScheduler do
expect(subject.under_load?).to be true expect(subject.under_load?).to be true
end end
end end
context 'when there is a huge amount of jobs to retry' do
let(:retry_size) { 1_000_000 }
it 'returns true' do
expect(subject.under_load?).to be true
end
end
end end
describe '#get_budget' do describe '#get_budget' do