Refactor fetching of remote resources (#11251)

This commit is contained in:
Eugen Rochko 2019-07-10 18:59:28 +02:00 committed by GitHub
parent d04c584159
commit 5d3feed191
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 142 additions and 216 deletions

View file

@ -16,13 +16,15 @@ module JsonLdHelper
# The url attribute can be a string, an array of strings, or an array of objects. # The url attribute can be a string, an array of strings, or an array of objects.
# The objects could include a mimeType. Not-included mimeType means it's text/html. # The objects could include a mimeType. Not-included mimeType means it's text/html.
def url_to_href(value, preferred_type = nil) def url_to_href(value, preferred_type = nil)
single_value = if value.is_a?(Array) && !value.first.is_a?(String) single_value = begin
value.find { |link| preferred_type.nil? || ((link['mimeType'].presence || 'text/html') == preferred_type) } if value.is_a?(Array) && !value.first.is_a?(String)
elsif value.is_a?(Array) value.find { |link| preferred_type.nil? || ((link['mimeType'].presence || 'text/html') == preferred_type) }
value.first elsif value.is_a?(Array)
else value.first
value else
end value
end
end
if single_value.nil? || single_value.is_a?(String) if single_value.nil? || single_value.is_a?(String)
single_value single_value
@ -64,7 +66,9 @@ module JsonLdHelper
def fetch_resource(uri, id, on_behalf_of = nil) def fetch_resource(uri, id, on_behalf_of = nil)
unless id unless id
json = fetch_resource_without_id_validation(uri, on_behalf_of) json = fetch_resource_without_id_validation(uri, on_behalf_of)
return unless json return unless json
uri = json['id'] uri = json['id']
end end
@ -74,24 +78,26 @@ module JsonLdHelper
def fetch_resource_without_id_validation(uri, on_behalf_of = nil, raise_on_temporary_error = false) def fetch_resource_without_id_validation(uri, on_behalf_of = nil, raise_on_temporary_error = false)
build_request(uri, on_behalf_of).perform do |response| build_request(uri, on_behalf_of).perform do |response|
unless response_successful?(response) || response_error_unsalvageable?(response) || !raise_on_temporary_error raise Mastodon::UnexpectedResponseError, response unless response_successful?(response) || response_error_unsalvageable?(response) || !raise_on_temporary_error
raise Mastodon::UnexpectedResponseError, response
end
return body_to_json(response.body_with_limit) if response.code == 200 return body_to_json(response.body_with_limit) if response.code == 200
end end
# If request failed, retry without doing it on behalf of a user # If request failed, retry without doing it on behalf of a user
return if on_behalf_of.nil? return if on_behalf_of.nil?
build_request(uri).perform do |response| build_request(uri).perform do |response|
unless response_successful?(response) || response_error_unsalvageable?(response) || !raise_on_temporary_error raise Mastodon::UnexpectedResponseError, response unless response_successful?(response) || response_error_unsalvageable?(response) || !raise_on_temporary_error
raise Mastodon::UnexpectedResponseError, response
end
response.code == 200 ? body_to_json(response.body_with_limit) : nil response.code == 200 ? body_to_json(response.body_with_limit) : nil
end end
end end
def body_to_json(body, compare_id: nil) def body_to_json(body, compare_id: nil)
json = body.is_a?(String) ? Oj.load(body, mode: :strict) : body json = body.is_a?(String) ? Oj.load(body, mode: :strict) : body
return if compare_id.present? && json['id'] != compare_id return if compare_id.present? && json['id'] != compare_id
json json
rescue Oj::ParseError rescue Oj::ParseError
nil nil
@ -105,35 +111,34 @@ module JsonLdHelper
end end
end end
private
def response_successful?(response) def response_successful?(response)
(200...300).cover?(response.code) (200...300).cover?(response.code)
end end
def response_error_unsalvageable?(response) def response_error_unsalvageable?(response)
(400...500).cover?(response.code) && response.code != 429 response.code == 501 || ((400...500).cover?(response.code) && ![401, 408, 429].include?(response.code))
end end
def build_request(uri, on_behalf_of = nil) def build_request(uri, on_behalf_of = nil)
request = Request.new(:get, uri) Request.new(:get, uri).tap do |request|
request.on_behalf_of(on_behalf_of) if on_behalf_of request.on_behalf_of(on_behalf_of) if on_behalf_of
request.add_headers('Accept' => 'application/activity+json, application/ld+json') request.add_headers('Accept' => 'application/activity+json, application/ld+json')
request end
end end
def load_jsonld_context(url, _options = {}, &_block) def load_jsonld_context(url, _options = {}, &_block)
json = Rails.cache.fetch("jsonld:context:#{url}", expires_in: 30.days, raw: true) do json = Rails.cache.fetch("jsonld:context:#{url}", expires_in: 30.days, raw: true) do
request = Request.new(:get, url) request = Request.new(:get, url)
request.add_headers('Accept' => 'application/ld+json') request.add_headers('Accept' => 'application/ld+json')
request.perform do |res| request.perform do |res|
raise JSON::LD::JsonLdError::LoadingDocumentFailed unless res.code == 200 && res.mime_type == 'application/ld+json' raise JSON::LD::JsonLdError::LoadingDocumentFailed unless res.code == 200 && res.mime_type == 'application/ld+json'
res.body_with_limit res.body_with_limit
end end
end end
doc = JSON::LD::API::RemoteDocument.new(url, json) doc = JSON::LD::API::RemoteDocument.new(url, json)
block_given? ? yield(doc) : doc block_given? ? yield(doc) : doc
end end
end end

View file

@ -41,7 +41,7 @@ class Request
end end
def on_behalf_of(account, key_id_format = :acct, sign_with: nil) def on_behalf_of(account, key_id_format = :acct, sign_with: nil)
raise ArgumentError unless account.local? raise ArgumentError, 'account must be local' unless account&.local?
@account = account @account = account
@keypair = sign_with.present? ? OpenSSL::PKey::RSA.new(sign_with) : @account.keypair @keypair = sign_with.present? ? OpenSSL::PKey::RSA.new(sign_with) : @account.keypair

View file

@ -5,18 +5,18 @@ class ActivityPub::FetchRemoteStatusService < BaseService
# Should be called when uri has already been checked for locality # Should be called when uri has already been checked for locality
def call(uri, id: true, prefetched_body: nil, on_behalf_of: nil) def call(uri, id: true, prefetched_body: nil, on_behalf_of: nil)
@json = if prefetched_body.nil? @json = begin
fetch_resource(uri, id, on_behalf_of) if prefetched_body.nil?
else fetch_resource(uri, id, on_behalf_of)
body_to_json(prefetched_body, compare_id: id ? uri : nil) else
end body_to_json(prefetched_body, compare_id: id ? uri : nil)
end
end
return unless supported_context? && expected_type? return if !(supported_context? && expected_type?) || actor_id.nil? || !trustworthy_attribution?(@json['id'], actor_id)
return if actor_id.nil? || !trustworthy_attribution?(@json['id'], actor_id)
actor = ActivityPub::TagManager.instance.uri_to_resource(actor_id, Account) actor = ActivityPub::TagManager.instance.uri_to_resource(actor_id, Account)
actor = ActivityPub::FetchRemoteAccountService.new.call(actor_id, id: true) if actor.nil? || needs_update(actor) actor = ActivityPub::FetchRemoteAccountService.new.call(actor_id, id: true) if actor.nil? || needs_update?(actor)
return if actor.nil? || actor.suspended? return if actor.nil? || actor.suspended?
@ -46,7 +46,7 @@ class ActivityPub::FetchRemoteStatusService < BaseService
equals_or_includes_any?(@json['type'], ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES) equals_or_includes_any?(@json['type'], ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES)
end end
def needs_update(actor) def needs_update?(actor)
actor.possibly_stale? actor.possibly_stale?
end end
end end

View file

@ -1,93 +0,0 @@
# frozen_string_literal: true
class FetchAtomService < BaseService
include JsonLdHelper
def call(url)
return if url.blank?
result = process(url)
# retry without ActivityPub
result ||= process(url) if @unsupported_activity
result
rescue OpenSSL::SSL::SSLError => e
Rails.logger.debug "SSL error: #{e}"
nil
rescue HTTP::ConnectionError => e
Rails.logger.debug "HTTP ConnectionError: #{e}"
nil
end
private
def process(url, terminal = false)
@url = url
perform_request { |response| process_response(response, terminal) }
end
def perform_request(&block)
accept = 'text/html'
accept = 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams", application/atom+xml, ' + accept unless @unsupported_activity
Request.new(:get, @url).add_headers('Accept' => accept).perform(&block)
end
def process_response(response, terminal = false)
return nil if response.code != 200
if response.mime_type == 'application/atom+xml'
[@url, { prefetched_body: response.body_with_limit }, :ostatus]
elsif ['application/activity+json', 'application/ld+json'].include?(response.mime_type)
body = response.body_with_limit
json = body_to_json(body)
if supported_context?(json) && equals_or_includes_any?(json['type'], ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) && json['inbox'].present?
[json['id'], { prefetched_body: body, id: true }, :activitypub]
elsif supported_context?(json) && expected_type?(json)
[json['id'], { prefetched_body: body, id: true }, :activitypub]
else
@unsupported_activity = true
nil
end
elsif !terminal
link_header = response['Link'] && parse_link_header(response)
if link_header&.find_link(%w(rel alternate))
process_link_headers(link_header)
elsif response.mime_type == 'text/html'
process_html(response)
end
end
end
def expected_type?(json)
equals_or_includes_any?(json['type'], ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES)
end
def process_html(response)
page = Nokogiri::HTML(response.body_with_limit)
json_link = page.xpath('//link[@rel="alternate"]').find { |link| ['application/activity+json', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'].include?(link['type']) }
atom_link = page.xpath('//link[@rel="alternate"]').find { |link| link['type'] == 'application/atom+xml' }
result ||= process(json_link['href'], terminal: true) unless json_link.nil? || @unsupported_activity
result ||= process(atom_link['href'], terminal: true) unless atom_link.nil?
result
end
def process_link_headers(link_header)
json_link = link_header.find_link(%w(rel alternate), %w(type application/activity+json)) || link_header.find_link(%w(rel alternate), ['type', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'])
atom_link = link_header.find_link(%w(rel alternate), %w(type application/atom+xml))
result ||= process(json_link.href, terminal: true) unless json_link.nil? || @unsupported_activity
result ||= process(atom_link.href, terminal: true) unless atom_link.nil?
result
end
def parse_link_header(response)
LinkHeader.parse(response['Link'].is_a?(Array) ? response['Link'].first : response['Link'])
end
end

View file

@ -29,7 +29,7 @@ class FetchLinkCardService < BaseService
end end
attach_card if @card&.persisted? attach_card if @card&.persisted?
rescue HTTP::Error, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e rescue HTTP::Error, OpenSSL::SSL::SSLError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e
Rails.logger.debug "Error fetching link #{@url}: #{e}" Rails.logger.debug "Error fetching link #{@url}: #{e}"
nil nil
end end

View file

@ -3,7 +3,7 @@
class FetchRemoteAccountService < BaseService class FetchRemoteAccountService < BaseService
def call(url, prefetched_body = nil, protocol = :ostatus) def call(url, prefetched_body = nil, protocol = :ostatus)
if prefetched_body.nil? if prefetched_body.nil?
resource_url, resource_options, protocol = FetchAtomService.new.call(url) resource_url, resource_options, protocol = FetchResourceService.new.call(url)
else else
resource_url = url resource_url = url
resource_options = { prefetched_body: prefetched_body } resource_options = { prefetched_body: prefetched_body }

View file

@ -3,7 +3,7 @@
class FetchRemoteStatusService < BaseService class FetchRemoteStatusService < BaseService
def call(url, prefetched_body = nil, protocol = :ostatus) def call(url, prefetched_body = nil, protocol = :ostatus)
if prefetched_body.nil? if prefetched_body.nil?
resource_url, resource_options, protocol = FetchAtomService.new.call(url) resource_url, resource_options, protocol = FetchResourceService.new.call(url)
else else
resource_url = url resource_url = url
resource_options = { prefetched_body: prefetched_body } resource_options = { prefetched_body: prefetched_body }

View file

@ -0,0 +1,68 @@
# frozen_string_literal: true
class FetchResourceService < BaseService
include JsonLdHelper
ACCEPT_HEADER = 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams", text/html'
def call(url)
return if url.blank?
process(url)
rescue HTTP::Error, OpenSSL::SSL::SSLError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e
Rails.logger.debug "Error fetching resource #{@url}: #{e}"
nil
end
private
def process(url, terminal = false)
@url = url
perform_request { |response| process_response(response, terminal) }
end
def perform_request(&block)
Request.new(:get, @url).add_headers('Accept' => ACCEPT_HEADER).perform(&block)
end
def process_response(response, terminal = false)
return nil if response.code != 200
if ['application/activity+json', 'application/ld+json'].include?(response.mime_type)
body = response.body_with_limit
json = body_to_json(body)
[json['id'], { prefetched_body: body, id: true }, :activitypub] if supported_context?(json) && (equals_or_includes_any?(json['type'], ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) || expected_type?(json))
elsif !terminal
link_header = response['Link'] && parse_link_header(response)
if link_header&.find_link(%w(rel alternate))
process_link_headers(link_header)
elsif response.mime_type == 'text/html'
process_html(response)
end
end
end
def expected_type?(json)
equals_or_includes_any?(json['type'], ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES)
end
def process_html(response)
page = Nokogiri::HTML(response.body_with_limit)
json_link = page.xpath('//link[@rel="alternate"]').find { |link| ['application/activity+json', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'].include?(link['type']) }
process(json_link['href'], terminal: true) unless json_link.nil?
end
def process_link_headers(link_header)
json_link = link_header.find_link(%w(rel alternate), %w(type application/activity+json)) || link_header.find_link(%w(rel alternate), ['type', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'])
process(json_link.href, terminal: true) unless json_link.nil?
end
def parse_link_header(response)
LinkHeader.parse(response['Link'].is_a?(Array) ? response['Link'].first : response['Link'])
end
end

View file

@ -4,64 +4,49 @@ class ResolveURLService < BaseService
include JsonLdHelper include JsonLdHelper
include Authorization include Authorization
attr_reader :url
def call(url, on_behalf_of: nil) def call(url, on_behalf_of: nil)
@url = url @url = url
@on_behalf_of = on_behalf_of @on_behalf_of = on_behalf_of
return process_local_url if local_url? if local_url?
process_local_url
process_url unless fetched_atom_feed.nil? elsif !fetched_resource.nil?
process_url
end
end end
private private
def process_url def process_url
if equals_or_includes_any?(type, ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) if equals_or_includes_any?(type, ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES)
FetchRemoteAccountService.new.call(atom_url, body, protocol) FetchRemoteAccountService.new.call(resource_url, body, protocol)
elsif equals_or_includes_any?(type, ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES) elsif equals_or_includes_any?(type, ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES)
FetchRemoteStatusService.new.call(atom_url, body, protocol) FetchRemoteStatusService.new.call(resource_url, body, protocol)
end end
end end
def fetched_atom_feed def fetched_resource
@_fetched_atom_feed ||= FetchAtomService.new.call(url) @fetched_resource ||= FetchResourceService.new.call(@url)
end end
def atom_url def resource_url
fetched_atom_feed.first fetched_resource.first
end end
def body def body
fetched_atom_feed.second[:prefetched_body] fetched_resource.second[:prefetched_body]
end end
def protocol def protocol
fetched_atom_feed.third fetched_resource.third
end end
def type def type
return json_data['type'] if protocol == :activitypub return json_data['type'] if protocol == :activitypub
case xml_root
when 'feed'
'Person'
when 'entry'
'Note'
end
end end
def json_data def json_data
@_json_data ||= body_to_json(body) @json_data ||= body_to_json(body)
end
def xml_root
xml_data.root.name
end
def xml_data
@_xml_data ||= Nokogiri::XML(body, nil, 'utf-8')
end end
def local_url? def local_url?
@ -83,10 +68,10 @@ class ResolveURLService < BaseService
def check_local_status(status) def check_local_status(status)
return if status.nil? return if status.nil?
authorize_with @on_behalf_of, status, :show? authorize_with @on_behalf_of, status, :show?
status status
rescue Mastodon::NotPermittedError rescue Mastodon::NotPermittedError
# Do not disclose the existence of status the user is not authorized to see
nil nil
end end
end end

View file

@ -2,6 +2,7 @@
class ActivityPub::DeliveryWorker class ActivityPub::DeliveryWorker
include Sidekiq::Worker include Sidekiq::Worker
include JsonLdHelper
STOPLIGHT_FAILURE_THRESHOLD = 10 STOPLIGHT_FAILURE_THRESHOLD = 10
STOPLIGHT_COOLDOWN = 60 STOPLIGHT_COOLDOWN = 60
@ -32,9 +33,10 @@ class ActivityPub::DeliveryWorker
private private
def build_request(http_client) def build_request(http_client)
request = Request.new(:post, @inbox_url, body: @json, http_client: http_client) Request.new(:post, @inbox_url, body: @json, http_client: http_client).tap do |request|
request.on_behalf_of(@source_account, :uri, sign_with: @options[:sign_with]) request.on_behalf_of(@source_account, :uri, sign_with: @options[:sign_with])
request.add_headers(HEADERS) request.add_headers(HEADERS)
end
end end
def perform_request def perform_request
@ -53,14 +55,6 @@ class ActivityPub::DeliveryWorker
.run .run
end end
def response_successful?(response)
(200...300).cover?(response.code)
end
def response_error_unsalvageable?(response)
response.code == 501 || ((400...500).cover?(response.code) && ![401, 408, 429].include?(response.code))
end
def failure_tracker def failure_tracker
@failure_tracker ||= DeliveryFailureTracker.new(@inbox_url) @failure_tracker ||= DeliveryFailureTracker.new(@inbox_url)
end end

View file

@ -4,6 +4,7 @@ RSpec.describe FetchRemoteAccountService, type: :service do
let(:url) { 'https://example.com/alice' } let(:url) { 'https://example.com/alice' }
let(:prefetched_body) { nil } let(:prefetched_body) { nil }
let(:protocol) { :ostatus } let(:protocol) { :ostatus }
subject { FetchRemoteAccountService.new.call(url, prefetched_body, protocol) } subject { FetchRemoteAccountService.new.call(url, prefetched_body, protocol) }
let(:actor) do let(:actor) do

View file

@ -1,9 +1,11 @@
require 'rails_helper' require 'rails_helper'
RSpec.describe FetchAtomService, type: :service do RSpec.describe FetchResourceService, type: :service do
let!(:representative) { Fabricate(:account) }
describe '#call' do describe '#call' do
let(:url) { 'http://example.com' } let(:url) { 'http://example.com' }
subject { FetchAtomService.new.call(url) } subject { described_class.new.call(url) }
context 'url is blank' do context 'url is blank' do
let(:url) { '' } let(:url) { '' }
@ -23,8 +25,7 @@ RSpec.describe FetchAtomService, type: :service do
allow(Request).to receive_message_chain(:new, :add_headers, :perform).and_raise(OpenSSL::SSL::SSLError) allow(Request).to receive_message_chain(:new, :add_headers, :perform).and_raise(OpenSSL::SSL::SSLError)
end end
it 'output log and return nil' do it 'return nil' do
expect_any_instance_of(ActiveSupport::Logger).to receive(:debug).with('SSL error: OpenSSL::SSL::SSLError')
is_expected.to be_nil is_expected.to be_nil
end end
end end
@ -34,8 +35,7 @@ RSpec.describe FetchAtomService, type: :service do
allow(Request).to receive_message_chain(:new, :add_headers, :perform).and_raise(HTTP::ConnectionError) allow(Request).to receive_message_chain(:new, :add_headers, :perform).and_raise(HTTP::ConnectionError)
end end
it 'output log and return nil' do it 'return nil' do
expect_any_instance_of(ActiveSupport::Logger).to receive(:debug).with('HTTP ConnectionError: HTTP::ConnectionError')
is_expected.to be_nil is_expected.to be_nil
end end
end end
@ -57,7 +57,7 @@ RSpec.describe FetchAtomService, type: :service do
context 'content type is application/atom+xml' do context 'content type is application/atom+xml' do
let(:content_type) { 'application/atom+xml' } let(:content_type) { 'application/atom+xml' }
it { is_expected.to eq [url, { :prefetched_body => "" }, :ostatus] } it { is_expected.to eq nil }
end end
context 'content_type is activity+json' do context 'content_type is activity+json' do

View file

@ -6,48 +6,14 @@ describe ResolveURLService, type: :service do
subject { described_class.new } subject { described_class.new }
describe '#call' do describe '#call' do
it 'returns nil when there is no atom url' do it 'returns nil when there is no resource url' do
url = 'http://example.com/missing-atom' url = 'http://example.com/missing-resource'
service = double service = double
allow(FetchAtomService).to receive(:new).and_return service
allow(FetchResourceService).to receive(:new).and_return service
allow(service).to receive(:call).with(url).and_return(nil) allow(service).to receive(:call).with(url).and_return(nil)
result = subject.call(url) expect(subject.call(url)).to be_nil
expect(result).to be_nil
end
it 'fetches remote accounts for feed types' do
url = 'http://example.com/atom-feed'
service = double
allow(FetchAtomService).to receive(:new).and_return service
feed_url = 'http://feed-url'
feed_content = '<feed>contents</feed>'
allow(service).to receive(:call).with(url).and_return([feed_url, { prefetched_body: feed_content }])
account_service = double
allow(FetchRemoteAccountService).to receive(:new).and_return(account_service)
allow(account_service).to receive(:call)
_result = subject.call(url)
expect(account_service).to have_received(:call).with(feed_url, feed_content, nil)
end
it 'fetches remote statuses for entry types' do
url = 'http://example.com/atom-entry'
service = double
allow(FetchAtomService).to receive(:new).and_return service
feed_url = 'http://feed-url'
feed_content = '<entry>contents</entry>'
allow(service).to receive(:call).with(url).and_return([feed_url, { prefetched_body: feed_content }])
account_service = double
allow(FetchRemoteStatusService).to receive(:new).and_return(account_service)
allow(account_service).to receive(:call)
_result = subject.call(url)
expect(account_service).to have_received(:call).with(feed_url, feed_content, nil)
end end
end end
end end