From ee66f5790f6bee17ba5d6876021ba8ffd1ef222b Mon Sep 17 00:00:00 2001 From: Claire Date: Thu, 15 Dec 2022 19:21:17 +0100 Subject: [PATCH] Fix unbounded recursion in account discovery (v3.5 backport) (#22026) * Fix trying to fetch posts from other users when fetching featured posts * Rate-limit discovery of new subdomains * Put a limit on recursively discovering new accounts --- Gemfile | 1 + Gemfile.lock | 1 + app/lib/activitypub/activity/create.rb | 2 +- app/lib/activitypub/activity/update.rb | 4 +- app/models/concerns/domain_materializable.rb | 15 ++- .../fetch_featured_collection_service.rb | 7 +- .../fetch_remote_account_service.rb | 4 +- .../fetch_remote_status_service.rb | 8 +- .../activitypub/process_account_service.rb | 25 ++++- .../process_status_update_service.rb | 5 +- .../synchronize_featured_collection_worker.rb | 6 +- .../process_account_service_spec.rb | 94 +++++++++++++++++++ 12 files changed, 152 insertions(+), 20 deletions(-) diff --git a/Gemfile b/Gemfile index 2e77fb42a..f76776c8c 100644 --- a/Gemfile +++ b/Gemfile @@ -66,6 +66,7 @@ gem 'oj', '~> 3.13' gem 'ox', '~> 2.14' gem 'parslet' gem 'posix-spawn' +gem 'public_suffix', '~> 4.0.7' gem 'pundit', '~> 2.2' gem 'premailer-rails' gem 'rack-attack', '~> 6.6' diff --git a/Gemfile.lock b/Gemfile.lock index e12fdc237..2742de860 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -803,6 +803,7 @@ DEPENDENCIES private_address_check (~> 0.5) pry-byebug (~> 3.9) pry-rails (~> 0.3) + public_suffix (~> 4.0.7) puma (~> 5.6) pundit (~> 2.2) rack (~> 2.2.3) diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index 73882e134..b15e66ca2 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -222,7 +222,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity return if tag['href'].blank? account = account_from_uri(tag['href']) - account = ActivityPub::FetchRemoteAccountService.new.call(tag['href']) if account.nil? + account = ActivityPub::FetchRemoteAccountService.new.call(tag['href'], request_id: @options[:request_id]) if account.nil? return if account.nil? diff --git a/app/lib/activitypub/activity/update.rb b/app/lib/activitypub/activity/update.rb index 5b3238ece..e7c3bc9bf 100644 --- a/app/lib/activitypub/activity/update.rb +++ b/app/lib/activitypub/activity/update.rb @@ -18,7 +18,7 @@ class ActivityPub::Activity::Update < ActivityPub::Activity def update_account return reject_payload! if @account.uri != object_uri - ActivityPub::ProcessAccountService.new.call(@account.username, @account.domain, @object, signed_with_known_key: true) + ActivityPub::ProcessAccountService.new.call(@account.username, @account.domain, @object, signed_with_known_key: true, request_id: @options[:request_id]) end def update_status @@ -28,6 +28,6 @@ class ActivityPub::Activity::Update < ActivityPub::Activity return if @status.nil? - ActivityPub::ProcessStatusUpdateService.new.call(@status, @object) + ActivityPub::ProcessStatusUpdateService.new.call(@status, @object, request_id: @options[:request_id]) end end diff --git a/app/models/concerns/domain_materializable.rb b/app/models/concerns/domain_materializable.rb index 88337f8c0..0eac6878e 100644 --- a/app/models/concerns/domain_materializable.rb +++ b/app/models/concerns/domain_materializable.rb @@ -3,11 +3,24 @@ module DomainMaterializable extend ActiveSupport::Concern + include Redisable + included do after_create_commit :refresh_instances_view end def refresh_instances_view - Instance.refresh unless domain.nil? || Instance.where(domain: domain).exists? + return if domain.nil? || Instance.exists?(domain: domain) + + Instance.refresh + count_unique_subdomains! + end + + def count_unique_subdomains! + second_and_top_level_domain = PublicSuffix.domain(domain, ignore_private: true) + with_redis do |redis| + redis.pfadd("unique_subdomains_for:#{second_and_top_level_domain}", domain) + redis.expire("unique_subdomains_for:#{second_and_top_level_domain}", 1.minute.seconds) + end end end diff --git a/app/services/activitypub/fetch_featured_collection_service.rb b/app/services/activitypub/fetch_featured_collection_service.rb index 37d05e055..b2b132fd8 100644 --- a/app/services/activitypub/fetch_featured_collection_service.rb +++ b/app/services/activitypub/fetch_featured_collection_service.rb @@ -3,10 +3,11 @@ class ActivityPub::FetchFeaturedCollectionService < BaseService include JsonLdHelper - def call(account) + def call(account, **options) return if account.featured_collection_url.blank? || account.suspended? || account.local? @account = account + @options = options @json = fetch_resource(@account.featured_collection_url, true, local_follower) return unless supported_context?(@json) @@ -38,9 +39,9 @@ class ActivityPub::FetchFeaturedCollectionService < BaseService def process_items(items) status_ids = items.filter_map do |item| uri = value_or_id(item) - next if ActivityPub::TagManager.instance.local_uri?(uri) + next if ActivityPub::TagManager.instance.local_uri?(uri) || invalid_origin?(uri) - status = ActivityPub::FetchRemoteStatusService.new.call(uri, on_behalf_of: local_follower) + status = ActivityPub::FetchRemoteStatusService.new.call(uri, on_behalf_of: local_follower, expected_actor_uri: @account.uri, request_id: @options[:request_id]) next unless status&.account_id == @account.id status.id diff --git a/app/services/activitypub/fetch_remote_account_service.rb b/app/services/activitypub/fetch_remote_account_service.rb index 9d01f5386..df46e03b8 100644 --- a/app/services/activitypub/fetch_remote_account_service.rb +++ b/app/services/activitypub/fetch_remote_account_service.rb @@ -8,7 +8,7 @@ class ActivityPub::FetchRemoteAccountService < BaseService SUPPORTED_TYPES = %w(Application Group Organization Person Service).freeze # Does a WebFinger roundtrip on each call, unless `only_key` is true - def call(uri, id: true, prefetched_body: nil, break_on_redirect: false, only_key: false) + def call(uri, id: true, prefetched_body: nil, break_on_redirect: false, only_key: false, request_id: nil) return if domain_not_allowed?(uri) return ActivityPub::TagManager.instance.uri_to_resource(uri, Account) if ActivityPub::TagManager.instance.local_uri?(uri) @@ -28,7 +28,7 @@ class ActivityPub::FetchRemoteAccountService < BaseService return unless only_key || verified_webfinger? - ActivityPub::ProcessAccountService.new.call(@username, @domain, @json, only_key: only_key, verified_webfinger: !only_key) + ActivityPub::ProcessAccountService.new.call(@username, @domain, @json, only_key: only_key, verified_webfinger: !only_key, request_id: request_id) rescue Oj::ParseError nil end diff --git a/app/services/activitypub/fetch_remote_status_service.rb b/app/services/activitypub/fetch_remote_status_service.rb index 803098245..21b9242f8 100644 --- a/app/services/activitypub/fetch_remote_status_service.rb +++ b/app/services/activitypub/fetch_remote_status_service.rb @@ -4,7 +4,8 @@ class ActivityPub::FetchRemoteStatusService < BaseService include JsonLdHelper # Should be called when uri has already been checked for locality - def call(uri, id: true, prefetched_body: nil, on_behalf_of: nil) + def call(uri, id: true, prefetched_body: nil, on_behalf_of: nil, expected_actor_uri: nil, request_id: nil) + @request_id = request_id @json = begin if prefetched_body.nil? fetch_resource(uri, id, on_behalf_of) @@ -30,6 +31,7 @@ class ActivityPub::FetchRemoteStatusService < BaseService end return if activity_json.nil? || object_uri.nil? || !trustworthy_attribution?(@json['id'], actor_uri) + return if expected_actor_uri.present? && actor_uri != expected_actor_uri return ActivityPub::TagManager.instance.uri_to_resource(object_uri, Status) if ActivityPub::TagManager.instance.local_uri?(object_uri) actor = account_from_uri(actor_uri) @@ -40,7 +42,7 @@ class ActivityPub::FetchRemoteStatusService < BaseService # activity as an update rather than create activity_json['type'] = 'Update' if equals_or_includes_any?(activity_json['type'], %w(Create)) && Status.where(uri: object_uri, account_id: actor.id).exists? - ActivityPub::Activity.factory(activity_json, actor).perform + ActivityPub::Activity.factory(activity_json, actor, request_id: request_id).perform end private @@ -52,7 +54,7 @@ class ActivityPub::FetchRemoteStatusService < BaseService def account_from_uri(uri) actor = ActivityPub::TagManager.instance.uri_to_resource(uri, Account) - actor = ActivityPub::FetchRemoteAccountService.new.call(uri, id: true) if actor.nil? || actor.possibly_stale? + actor = ActivityPub::FetchRemoteAccountService.new.call(uri, id: true, request_id: @request_id) if actor.nil? || actor.possibly_stale? actor end diff --git a/app/services/activitypub/process_account_service.rb b/app/services/activitypub/process_account_service.rb index 4449a5427..65ab84edd 100644 --- a/app/services/activitypub/process_account_service.rb +++ b/app/services/activitypub/process_account_service.rb @@ -6,6 +6,9 @@ class ActivityPub::ProcessAccountService < BaseService include Redisable include Lockable + SUBDOMAINS_RATELIMIT = 10 + DISCOVERIES_PER_REQUEST = 400 + # Should be called with confirmed valid JSON # and WebFinger-resolved username and domain def call(username, domain, json, options = {}) @@ -15,9 +18,12 @@ class ActivityPub::ProcessAccountService < BaseService @json = json @uri = @json['id'] @username = username - @domain = domain + @domain = TagManager.instance.normalize_domain(domain) @collections = {} + # The key does not need to be unguessable, it just needs to be somewhat unique + @options[:request_id] ||= "#{Time.now.utc.to_i}-#{username}@#{domain}" + with_lock("process_account:#{@uri}") do @account = Account.remote.find_by(uri: @uri) if @options[:only_key] @account ||= Account.find_remote(@username, @domain) @@ -25,7 +31,18 @@ class ActivityPub::ProcessAccountService < BaseService @old_protocol = @account&.protocol @suspension_changed = false - create_account if @account.nil? + if @account.nil? + with_redis do |redis| + return nil if redis.pfcount("unique_subdomains_for:#{PublicSuffix.domain(@domain, ignore_private: true)}") >= SUBDOMAINS_RATELIMIT + + discoveries = redis.incr("discovery_per_request:#{@options[:request_id]}") + redis.expire("discovery_per_request:#{@options[:request_id]}", 5.minutes.seconds) + return nil if discoveries > DISCOVERIES_PER_REQUEST + end + + create_account + end + update_account process_tags @@ -149,7 +166,7 @@ class ActivityPub::ProcessAccountService < BaseService end def check_featured_collection! - ActivityPub::SynchronizeFeaturedCollectionWorker.perform_async(@account.id) + ActivityPub::SynchronizeFeaturedCollectionWorker.perform_async(@account.id, { 'request_id' => @options[:request_id] }) end def check_links! @@ -249,7 +266,7 @@ class ActivityPub::ProcessAccountService < BaseService def moved_account account = ActivityPub::TagManager.instance.uri_to_resource(@json['movedTo'], Account) - account ||= ActivityPub::FetchRemoteAccountService.new.call(@json['movedTo'], id: true, break_on_redirect: true) + account ||= ActivityPub::FetchRemoteAccountService.new.call(@json['movedTo'], id: true, break_on_redirect: true, request_id: @options[:request_id]) account end diff --git a/app/services/activitypub/process_status_update_service.rb b/app/services/activitypub/process_status_update_service.rb index addd5fc27..9fd835d77 100644 --- a/app/services/activitypub/process_status_update_service.rb +++ b/app/services/activitypub/process_status_update_service.rb @@ -5,7 +5,7 @@ class ActivityPub::ProcessStatusUpdateService < BaseService include Redisable include Lockable - def call(status, json) + def call(status, json, request_id: nil) raise ArgumentError, 'Status has unsaved changes' if status.changed? @json = json @@ -15,6 +15,7 @@ class ActivityPub::ProcessStatusUpdateService < BaseService @account = status.account @media_attachments_changed = false @poll_changed = false + @request_id = request_id # Only native types can be updated at the moment return @status if !expected_type? || already_updated_more_recently? @@ -185,7 +186,7 @@ class ActivityPub::ProcessStatusUpdateService < BaseService next if href.blank? account = ActivityPub::TagManager.instance.uri_to_resource(href, Account) - account ||= ActivityPub::FetchRemoteAccountService.new.call(href) + account ||= ActivityPub::FetchRemoteAccountService.new.call(href, request_id: @request_id) next if account.nil? diff --git a/app/workers/activitypub/synchronize_featured_collection_worker.rb b/app/workers/activitypub/synchronize_featured_collection_worker.rb index 7a0898e89..f67d693cb 100644 --- a/app/workers/activitypub/synchronize_featured_collection_worker.rb +++ b/app/workers/activitypub/synchronize_featured_collection_worker.rb @@ -5,8 +5,10 @@ class ActivityPub::SynchronizeFeaturedCollectionWorker sidekiq_options queue: 'pull', lock: :until_executed - def perform(account_id) - ActivityPub::FetchFeaturedCollectionService.new.call(Account.find(account_id)) + def perform(account_id, options = {}) + options = { note: true, hashtag: false }.deep_merge(options.deep_symbolize_keys) + + ActivityPub::FetchFeaturedCollectionService.new.call(Account.find(account_id), **options) rescue ActiveRecord::RecordNotFound true end diff --git a/spec/services/activitypub/process_account_service_spec.rb b/spec/services/activitypub/process_account_service_spec.rb index 7728b9ba8..2b20d17b1 100644 --- a/spec/services/activitypub/process_account_service_spec.rb +++ b/spec/services/activitypub/process_account_service_spec.rb @@ -109,4 +109,98 @@ RSpec.describe ActivityPub::ProcessAccountService, type: :service do end end end + + context 'discovering many subdomains in a short timeframe' do + before do + stub_const 'ActivityPub::ProcessAccountService::SUBDOMAINS_RATELIMIT', 5 + end + + let(:subject) do + 8.times do |i| + domain = "test#{i}.testdomain.com" + json = { + id: "https://#{domain}/users/1", + type: 'Actor', + inbox: "https://#{domain}/inbox", + }.with_indifferent_access + described_class.new.call('alice', domain, json) + end + end + + it 'creates at least some accounts' do + expect { subject }.to change { Account.remote.count }.by_at_least(2) + end + + it 'creates no more account than the limit allows' do + expect { subject }.to change { Account.remote.count }.by_at_most(5) + end + end + + context 'accounts referencing other accounts' do + before do + stub_const 'ActivityPub::ProcessAccountService::DISCOVERIES_PER_REQUEST', 5 + end + + let(:payload) do + { + '@context': ['https://www.w3.org/ns/activitystreams'], + id: 'https://foo.test/users/1', + type: 'Person', + inbox: 'https://foo.test/inbox', + featured: 'https://foo.test/users/1/featured', + preferredUsername: 'user1', + }.with_indifferent_access + end + + before do + 8.times do |i| + actor_json = { + '@context': ['https://www.w3.org/ns/activitystreams'], + id: "https://foo.test/users/#{i}", + type: 'Person', + inbox: 'https://foo.test/inbox', + featured: "https://foo.test/users/#{i}/featured", + preferredUsername: "user#{i}", + }.with_indifferent_access + status_json = { + '@context': ['https://www.w3.org/ns/activitystreams'], + id: "https://foo.test/users/#{i}/status", + attributedTo: "https://foo.test/users/#{i}", + type: 'Note', + content: "@user#{i + 1} test", + tag: [ + { + type: 'Mention', + href: "https://foo.test/users/#{i + 1}", + name: "@user#{i + 1 }", + } + ], + to: [ 'as:Public', "https://foo.test/users/#{i + 1}" ] + }.with_indifferent_access + featured_json = { + '@context': ['https://www.w3.org/ns/activitystreams'], + id: "https://foo.test/users/#{i}/featured", + type: 'OrderedCollection', + totelItems: 1, + orderedItems: [status_json], + }.with_indifferent_access + webfinger = { + subject: "acct:user#{i}@foo.test", + links: [{ rel: 'self', href: "https://foo.test/users/#{i}" }], + }.with_indifferent_access + stub_request(:get, "https://foo.test/users/#{i}").to_return(status: 200, body: actor_json.to_json, headers: { 'Content-Type': 'application/activity+json' }) + stub_request(:get, "https://foo.test/users/#{i}/featured").to_return(status: 200, body: featured_json.to_json, headers: { 'Content-Type': 'application/activity+json' }) + stub_request(:get, "https://foo.test/users/#{i}/status").to_return(status: 200, body: status_json.to_json, headers: { 'Content-Type': 'application/activity+json' }) + stub_request(:get, "https://foo.test/.well-known/webfinger?resource=acct:user#{i}@foo.test").to_return(body: webfinger.to_json, headers: { 'Content-Type': 'application/jrd+json' }) + end + end + + it 'creates at least some accounts' do + expect { subject.call('user1', 'foo.test', payload) }.to change { Account.remote.count }.by_at_least(2) + end + + it 'creates no more account than the limit allows' do + expect { subject.call('user1', 'foo.test', payload) }.to change { Account.remote.count }.by_at_most(5) + end + end end