Improved remote thread fetching (#10106)
* Fetch up to 5 replies when discovering a new remote status This is used for resolving threads downwards. The originating server must add a “replies” attributes with such replies for it to be useful. * Add some tests for ActivityPub::FetchRepliesWorker * Add specs for ActivityPub::FetchRepliesService * Serialize up to 5 public self-replies for ActivityPub notes * Add specs for ActivityPub::NoteSerializer * Move exponential backoff logic to a worker concern * Fetch first page of paginated collections when fetching thread replies * Add specs for paginated collections in replies * Move Note replies serialization to a first CollectionPage The collection isn't actually paginable yet as it has no id nor a `next` field. This may come in another PR. * Use pluck(:uri) instead of map(&:uri) to improve performances * Fix fetching replies when they are in a CollectionPage
This commit is contained in:
		
					parent
					
						
							
								6e8743d17a
							
						
					
				
			
			
				commit
				
					
						9d3c6f1849
					
				
			
		
					 13 changed files with 333 additions and 7 deletions
				
			
		|  | @ -40,6 +40,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity | |||
|     end | ||||
| 
 | ||||
|     resolve_thread(@status) | ||||
|     fetch_replies(@status) | ||||
|     distribute(@status) | ||||
|     forward_for_reply if @status.public_visibility? || @status.unlisted_visibility? | ||||
|   end | ||||
|  | @ -213,6 +214,15 @@ class ActivityPub::Activity::Create < ActivityPub::Activity | |||
|     ThreadResolveWorker.perform_async(status.id, in_reply_to_uri) | ||||
|   end | ||||
| 
 | ||||
|   def fetch_replies(status) | ||||
|     collection = @object['replies'] | ||||
|     return if collection.nil? | ||||
|     replies = ActivityPub::FetchRepliesService.new.call(status, collection, false) | ||||
|     return if replies.present? | ||||
|     uri = value_or_id(collection) | ||||
|     ActivityPub::FetchRepliesWorker.perform_async(status.id, uri) unless uri.nil? | ||||
|   end | ||||
| 
 | ||||
|   def conversation_from_uri(uri) | ||||
|     return nil if uri.nil? | ||||
|     return Conversation.find_by(id: OStatus::TagManager.instance.unique_tag_to_local_id(uri, 'Conversation')) if OStatus::TagManager.instance.local_id?(uri) | ||||
|  |  | |||
|  | @ -11,6 +11,10 @@ module StatusThreadingConcern | |||
|     find_statuses_from_tree_path(descendant_ids(limit, max_child_id, since_child_id, depth), account, promote: true) | ||||
|   end | ||||
| 
 | ||||
|   def self_replies(limit) | ||||
|     account.statuses.where(in_reply_to_id: id, visibility: [:public, :unlisted]).reorder(id: :asc).limit(limit) | ||||
|   end | ||||
| 
 | ||||
|   private | ||||
| 
 | ||||
|   def ancestor_ids(limit) | ||||
|  |  | |||
|  | @ -1,5 +1,5 @@ | |||
| # frozen_string_literal: true | ||||
| 
 | ||||
| class ActivityPub::CollectionPresenter < ActiveModelSerializers::Model | ||||
|   attributes :id, :type, :size, :items, :part_of, :first, :last, :next, :prev | ||||
|   attributes :id, :type, :size, :items, :page, :part_of, :first, :last, :next, :prev | ||||
| end | ||||
|  |  | |||
|  | @ -7,7 +7,8 @@ class ActivityPub::CollectionSerializer < ActiveModel::Serializer | |||
|     super | ||||
|   end | ||||
| 
 | ||||
|   attributes :id, :type | ||||
|   attribute :id, if: -> { object.id.present? } | ||||
|   attribute :type | ||||
|   attribute :total_items, if: -> { object.size.present? } | ||||
|   attribute :next, if: -> { object.next.present? } | ||||
|   attribute :prev, if: -> { object.prev.present? } | ||||
|  | @ -37,6 +38,6 @@ class ActivityPub::CollectionSerializer < ActiveModel::Serializer | |||
|   end | ||||
| 
 | ||||
|   def page? | ||||
|     object.part_of.present? | ||||
|     object.part_of.present? || object.page.present? | ||||
|   end | ||||
| end | ||||
|  |  | |||
|  | @ -13,6 +13,8 @@ class ActivityPub::NoteSerializer < ActiveModel::Serializer | |||
|   has_many :media_attachments, key: :attachment | ||||
|   has_many :virtual_tags, key: :tag | ||||
| 
 | ||||
|   has_one :replies, serializer: ActivityPub::CollectionSerializer | ||||
| 
 | ||||
|   def id | ||||
|     ActivityPub::TagManager.instance.uri_for(object) | ||||
|   end | ||||
|  | @ -33,6 +35,17 @@ class ActivityPub::NoteSerializer < ActiveModel::Serializer | |||
|     { object.language => Formatter.instance.format(object) } | ||||
|   end | ||||
| 
 | ||||
|   def replies | ||||
|     ActivityPub::CollectionPresenter.new( | ||||
|       type: :unordered, | ||||
|       first: ActivityPub::CollectionPresenter.new( | ||||
|         type: :unordered, | ||||
|         page: true, | ||||
|         items: object.self_replies(5).pluck(:uri) | ||||
|       ) | ||||
|     ) | ||||
|   end | ||||
| 
 | ||||
|   def language? | ||||
|     object.language.present? | ||||
|   end | ||||
|  |  | |||
							
								
								
									
										60
									
								
								app/services/activitypub/fetch_replies_service.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								app/services/activitypub/fetch_replies_service.rb
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,60 @@ | |||
| # frozen_string_literal: true | ||||
| 
 | ||||
| class ActivityPub::FetchRepliesService < BaseService | ||||
|   include JsonLdHelper | ||||
| 
 | ||||
|   def call(parent_status, collection_or_uri, allow_synchronous_requests = true) | ||||
|     @account = parent_status.account | ||||
|     @allow_synchronous_requests = allow_synchronous_requests | ||||
| 
 | ||||
|     @items = collection_items(collection_or_uri) | ||||
|     return if @items.nil? | ||||
| 
 | ||||
|     FetchReplyWorker.push_bulk(filtered_replies) | ||||
| 
 | ||||
|     @items | ||||
|   end | ||||
| 
 | ||||
|   private | ||||
| 
 | ||||
|   def collection_items(collection_or_uri) | ||||
|     collection = fetch_collection(collection_or_uri) | ||||
|     return unless collection.is_a?(Hash) | ||||
| 
 | ||||
|     collection = fetch_collection(collection['first']) if collection['first'].present? | ||||
|     return unless collection.is_a?(Hash) | ||||
| 
 | ||||
|     case collection['type'] | ||||
|     when 'Collection', 'CollectionPage' | ||||
|       collection['items'] | ||||
|     when 'OrderedCollection', 'OrderedCollectionPage' | ||||
|       collection['orderedItems'] | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   def fetch_collection(collection_or_uri) | ||||
|     return collection_or_uri if collection_or_uri.is_a?(Hash) | ||||
|     return unless @allow_synchronous_requests | ||||
|     return if invalid_origin?(collection_or_uri) | ||||
|     collection = fetch_resource_without_id_validation(collection_or_uri) | ||||
|     raise Mastodon::UnexpectedResponseError if collection.nil? | ||||
|     collection | ||||
|   end | ||||
| 
 | ||||
|   def filtered_replies | ||||
|     # Only fetch replies to the same server as the original status to avoid | ||||
|     # amplification attacks. | ||||
| 
 | ||||
|     # Also limit to 5 fetched replies to limit potential for DoS. | ||||
|     @items.map { |item| value_or_id(item) }.reject { |uri| invalid_origin?(uri) }.take(5) | ||||
|   end | ||||
| 
 | ||||
|   def invalid_origin?(url) | ||||
|     return true if unsupported_uri_scheme?(url) | ||||
| 
 | ||||
|     needle   = Addressable::URI.parse(url).host | ||||
|     haystack = Addressable::URI.parse(@account.uri).host | ||||
| 
 | ||||
|     !haystack.casecmp(needle).zero? | ||||
|   end | ||||
| end | ||||
							
								
								
									
										12
									
								
								app/workers/activitypub/fetch_replies_worker.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								app/workers/activitypub/fetch_replies_worker.rb
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,12 @@ | |||
| # frozen_string_literal: true | ||||
| 
 | ||||
| class ActivityPub::FetchRepliesWorker | ||||
|   include Sidekiq::Worker | ||||
|   include ExponentialBackoff | ||||
| 
 | ||||
|   sidekiq_options queue: 'pull', retry: 3 | ||||
| 
 | ||||
|   def perform(parent_status_id, replies_uri) | ||||
|     ActivityPub::FetchRepliesService.new.call(Status.find(parent_status_id), replies_uri) | ||||
|   end | ||||
| end | ||||
							
								
								
									
										11
									
								
								app/workers/concerns/exponential_backoff.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								app/workers/concerns/exponential_backoff.rb
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,11 @@ | |||
| # frozen_string_literal: true | ||||
| 
 | ||||
| module ExponentialBackoff | ||||
|   extend ActiveSupport::Concern | ||||
| 
 | ||||
|   included do | ||||
|     sidekiq_retry_in do |count| | ||||
|       15 + 10 * (count**4) + rand(10 * (count**4)) | ||||
|     end | ||||
|   end | ||||
| end | ||||
							
								
								
									
										12
									
								
								app/workers/fetch_reply_worker.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								app/workers/fetch_reply_worker.rb
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,12 @@ | |||
| # frozen_string_literal: true | ||||
| 
 | ||||
| class FetchReplyWorker | ||||
|   include Sidekiq::Worker | ||||
|   include ExponentialBackoff | ||||
| 
 | ||||
|   sidekiq_options queue: 'pull', retry: 3 | ||||
| 
 | ||||
|   def perform(child_url) | ||||
|     FetchRemoteStatusService.new.call(child_url) | ||||
|   end | ||||
| end | ||||
|  | @ -2,13 +2,10 @@ | |||
| 
 | ||||
| class ThreadResolveWorker | ||||
|   include Sidekiq::Worker | ||||
|   include ExponentialBackoff | ||||
| 
 | ||||
|   sidekiq_options queue: 'pull', retry: 3 | ||||
| 
 | ||||
|   sidekiq_retry_in do |count| | ||||
|     15 + 10 * (count**4) + rand(10 * (count**4)) | ||||
|   end | ||||
| 
 | ||||
|   def perform(child_status_id, parent_url) | ||||
|     child_status  = Status.find(child_status_id) | ||||
|     parent_status = FetchRemoteStatusService.new.call(parent_url) | ||||
|  |  | |||
							
								
								
									
										44
									
								
								spec/serializers/activitypub/note_spec.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								spec/serializers/activitypub/note_spec.rb
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,44 @@ | |||
| # frozen_string_literal: true | ||||
| 
 | ||||
| require 'rails_helper' | ||||
| 
 | ||||
| describe ActivityPub::NoteSerializer do | ||||
|   let!(:account) { Fabricate(:account) } | ||||
|   let!(:other)   { Fabricate(:account) } | ||||
|   let!(:parent)  { Fabricate(:status, account: account, visibility: :public) } | ||||
|   let!(:reply1)  { Fabricate(:status, account: account, thread: parent, visibility: :public) } | ||||
|   let!(:reply2)  { Fabricate(:status, account: account, thread: parent, visibility: :public) } | ||||
|   let!(:reply3)  { Fabricate(:status, account: other, thread: parent, visibility: :public) } | ||||
|   let!(:reply4)  { Fabricate(:status, account: account, thread: parent, visibility: :public) } | ||||
|   let!(:reply5)  { Fabricate(:status, account: account, thread: parent, visibility: :direct) } | ||||
| 
 | ||||
|   before(:each) do | ||||
|     @serialization = ActiveModelSerializers::SerializableResource.new(parent, serializer: ActivityPub::NoteSerializer, adapter: ActivityPub::Adapter) | ||||
|   end | ||||
| 
 | ||||
|   subject { JSON.parse(@serialization.to_json) } | ||||
| 
 | ||||
|   it 'has a Note type' do | ||||
|     expect(subject['type']).to eql('Note') | ||||
|   end | ||||
| 
 | ||||
|   it 'has a replies collection' do | ||||
|     expect(subject['replies']['type']).to eql('Collection') | ||||
|   end | ||||
| 
 | ||||
|   it 'has a replies collection with a first Page' do | ||||
|     expect(subject['replies']['first']['type']).to eql('CollectionPage') | ||||
|   end | ||||
| 
 | ||||
|   it 'includes public self-replies in its replies collection' do | ||||
|     expect(subject['replies']['first']['items']).to include(reply1.uri, reply2.uri, reply4.uri) | ||||
|   end | ||||
| 
 | ||||
|   it 'does not include replies from others in its replies collection' do | ||||
|     expect(subject['replies']['first']['items']).to_not include(reply3.uri) | ||||
|   end | ||||
| 
 | ||||
|   it 'does not include replies with direct visibility in its replies collection' do | ||||
|     expect(subject['replies']['first']['items']).to_not include(reply5.uri) | ||||
|   end | ||||
| end | ||||
							
								
								
									
										122
									
								
								spec/services/activitypub/fetch_replies_service_spec.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								spec/services/activitypub/fetch_replies_service_spec.rb
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,122 @@ | |||
| require 'rails_helper' | ||||
| 
 | ||||
| RSpec.describe ActivityPub::FetchRepliesService, type: :service do | ||||
|   let(:actor)          { Fabricate(:account, domain: 'example.com', uri: 'http://example.com/account') } | ||||
|   let(:status)         { Fabricate(:status, account: actor) } | ||||
|   let(:collection_uri) { 'http://example.com/replies/1' } | ||||
| 
 | ||||
|   let(:items) do | ||||
|     [ | ||||
|       'http://example.com/self-reply-1', | ||||
|       'http://example.com/self-reply-2', | ||||
|       'http://example.com/self-reply-3', | ||||
|       'http://other.com/other-reply-1', | ||||
|       'http://other.com/other-reply-2', | ||||
|       'http://other.com/other-reply-3', | ||||
|       'http://example.com/self-reply-4', | ||||
|       'http://example.com/self-reply-5', | ||||
|       'http://example.com/self-reply-6', | ||||
|     ] | ||||
|   end | ||||
| 
 | ||||
|   let(:payload) do | ||||
|     { | ||||
|       '@context': 'https://www.w3.org/ns/activitystreams', | ||||
|       type: 'Collection', | ||||
|       id: collection_uri, | ||||
|       items: items, | ||||
|     }.with_indifferent_access | ||||
|   end | ||||
| 
 | ||||
|   subject { described_class.new } | ||||
| 
 | ||||
|   describe '#call' do | ||||
|     context 'when the payload is a Collection with inlined replies' do | ||||
|       context 'when passing the collection itself' do | ||||
|         it 'spawns workers for up to 5 replies on the same server' do | ||||
|           allow(FetchReplyWorker).to receive(:push_bulk) | ||||
|           subject.call(status, payload) | ||||
|           expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) | ||||
|         end | ||||
|       end | ||||
| 
 | ||||
|       context 'when passing the URL to the collection' do | ||||
|         before do | ||||
|           stub_request(:get, collection_uri).to_return(status: 200, body: Oj.dump(payload)) | ||||
|         end | ||||
| 
 | ||||
|         it 'spawns workers for up to 5 replies on the same server' do | ||||
|           allow(FetchReplyWorker).to receive(:push_bulk) | ||||
|           subject.call(status, collection_uri) | ||||
|           expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) | ||||
|         end | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     context 'when the payload is an OrderedCollection with inlined replies' do | ||||
|       let(:payload) do | ||||
|         { | ||||
|           '@context': 'https://www.w3.org/ns/activitystreams', | ||||
|           type: 'OrderedCollection', | ||||
|           id: collection_uri, | ||||
|           orderedItems: items, | ||||
|         }.with_indifferent_access | ||||
|       end | ||||
| 
 | ||||
|       context 'when passing the collection itself' do | ||||
|         it 'spawns workers for up to 5 replies on the same server' do | ||||
|           allow(FetchReplyWorker).to receive(:push_bulk) | ||||
|           subject.call(status, payload) | ||||
|           expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) | ||||
|         end | ||||
|       end | ||||
| 
 | ||||
|       context 'when passing the URL to the collection' do | ||||
|         before do | ||||
|           stub_request(:get, collection_uri).to_return(status: 200, body: Oj.dump(payload)) | ||||
|         end | ||||
| 
 | ||||
|         it 'spawns workers for up to 5 replies on the same server' do | ||||
|           allow(FetchReplyWorker).to receive(:push_bulk) | ||||
|           subject.call(status, collection_uri) | ||||
|           expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) | ||||
|         end | ||||
|       end | ||||
|     end | ||||
| 
 | ||||
|     context 'when the payload is a paginated Collection with inlined replies' do | ||||
|       let(:payload) do | ||||
|         { | ||||
|           '@context': 'https://www.w3.org/ns/activitystreams', | ||||
|           type: 'Collection', | ||||
|           id: collection_uri, | ||||
|           first: { | ||||
|             type: 'CollectionPage', | ||||
|             partOf: collection_uri, | ||||
|             items: items, | ||||
|           } | ||||
|         }.with_indifferent_access | ||||
|       end | ||||
| 
 | ||||
|       context 'when passing the collection itself' do | ||||
|         it 'spawns workers for up to 5 replies on the same server' do | ||||
|           allow(FetchReplyWorker).to receive(:push_bulk) | ||||
|           subject.call(status, payload) | ||||
|           expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) | ||||
|         end | ||||
|       end | ||||
| 
 | ||||
|       context 'when passing the URL to the collection' do | ||||
|         before do | ||||
|           stub_request(:get, collection_uri).to_return(status: 200, body: Oj.dump(payload)) | ||||
|         end | ||||
| 
 | ||||
|         it 'spawns workers for up to 5 replies on the same server' do | ||||
|           allow(FetchReplyWorker).to receive(:push_bulk) | ||||
|           subject.call(status, collection_uri) | ||||
|           expect(FetchReplyWorker).to have_received(:push_bulk).with(['http://example.com/self-reply-1', 'http://example.com/self-reply-2', 'http://example.com/self-reply-3', 'http://example.com/self-reply-4', 'http://example.com/self-reply-5']) | ||||
|         end | ||||
|       end | ||||
|     end | ||||
|   end | ||||
| end | ||||
							
								
								
									
										40
									
								
								spec/workers/activitypub/fetch_replies_worker_spec.rb
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								spec/workers/activitypub/fetch_replies_worker_spec.rb
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,40 @@ | |||
| # frozen_string_literal: true | ||||
| 
 | ||||
| require 'rails_helper' | ||||
| 
 | ||||
| describe ActivityPub::FetchRepliesWorker do | ||||
|   subject { described_class.new } | ||||
| 
 | ||||
|   let(:account) { Fabricate(:account, uri: 'https://example.com/user/1') } | ||||
|   let(:status)  { Fabricate(:status, account: account) } | ||||
| 
 | ||||
|   let(:payload) do | ||||
|     { | ||||
|       '@context': 'https://www.w3.org/ns/activitystreams', | ||||
|       id: 'https://example.com/statuses_replies/1', | ||||
|       type: 'Collection', | ||||
|       items: [], | ||||
|     } | ||||
|   end | ||||
| 
 | ||||
|   let(:json) { Oj.dump(payload) } | ||||
| 
 | ||||
|   describe 'perform' do | ||||
|     it 'performs a request if the collection URI is from the same host' do | ||||
|       stub_request(:get, 'https://example.com/statuses_replies/1').to_return(status: 200, body: json) | ||||
|       subject.perform(status.id, 'https://example.com/statuses_replies/1') | ||||
|       expect(a_request(:get, 'https://example.com/statuses_replies/1')).to have_been_made.once | ||||
|     end | ||||
| 
 | ||||
|     it 'does not perform a request if the collection URI is from a different host' do | ||||
|       stub_request(:get, 'https://other.com/statuses_replies/1').to_return(status: 200) | ||||
|       subject.perform(status.id, 'https://other.com/statuses_replies/1') | ||||
|       expect(a_request(:get, 'https://other.com/statuses_replies/1')).to_not have_been_made | ||||
|     end | ||||
| 
 | ||||
|     it 'raises when request fails' do | ||||
|       stub_request(:get, 'https://example.com/statuses_replies/1').to_return(status: 500) | ||||
|       expect { subject.perform(status.id, 'https://example.com/statuses_replies/1') }.to raise_error Mastodon::UnexpectedResponseError | ||||
|     end | ||||
|   end | ||||
| end | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue