From 967505ee9bcacf0e5189aa06c654ff586c198a46 Mon Sep 17 00:00:00 2001 From: David Roetzel Date: Tue, 9 Jul 2024 15:11:34 +0200 Subject: [PATCH] Add size limit for all PreviewCard URLs (#30973) --- app/models/preview_card.rb | 7 +++++- app/services/fetch_link_card_service.rb | 7 ++---- spec/fixtures/requests/long_canonical_url.txt | 18 +++++++++++++++ spec/services/fetch_link_card_service_spec.rb | 22 ++++++++----------- 4 files changed, 35 insertions(+), 19 deletions(-) create mode 100644 spec/fixtures/requests/long_canonical_url.txt diff --git a/app/models/preview_card.rb b/app/models/preview_card.rb index eac02ac14..5a11351e5 100644 --- a/app/models/preview_card.rb +++ b/app/models/preview_card.rb @@ -46,6 +46,11 @@ class PreviewCard < ApplicationRecord y_comp: 4, }.freeze + # URL size limit to safely store in PosgreSQL's unique indexes + # Technically this is a byte-size limit but we use it as a + # character limit to work with length validation + URL_CHARACTER_LIMIT = 2692 + self.inheritance_column = false enum :type, { link: 0, photo: 1, video: 2, rich: 3 } @@ -63,7 +68,7 @@ class PreviewCard < ApplicationRecord convert_options: { all: '-quality 90 +profile "!icc,*" +set date:modify +set date:create +set date:timestamp' }, validate_media_type: false - validates :url, presence: true, uniqueness: true, url: true + validates :url, presence: true, uniqueness: true, url: true, length: { maximum: URL_CHARACTER_LIMIT } validates_attachment_content_type :image, content_type: IMAGE_MIME_TYPES validates_attachment_size :image, less_than: LIMIT remotable_attachment :image, LIMIT diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index 436e024c9..adabb1096 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -15,9 +15,6 @@ class FetchLinkCardService < BaseService ) }iox - # URL size limit to safely store in PosgreSQL's unique indexes - BYTESIZE_LIMIT = 2692 - def call(status) @status = status @original_url = parse_urls @@ -32,7 +29,7 @@ class FetchLinkCardService < BaseService end attach_card if @card&.persisted? - rescue HTTP::Error, OpenSSL::SSL::SSLError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError, EncodingError => e + rescue HTTP::Error, OpenSSL::SSL::SSLError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError, EncodingError, ActiveRecord::RecordInvalid => e Rails.logger.debug { "Error fetching link #{@original_url}: #{e}" } nil end @@ -88,7 +85,7 @@ class FetchLinkCardService < BaseService def bad_url?(uri) # Avoid local instance URLs and invalid URLs - uri.host.blank? || TagManager.instance.local_url?(uri.to_s) || !%w(http https).include?(uri.scheme) || uri.to_s.bytesize > BYTESIZE_LIMIT + uri.host.blank? || TagManager.instance.local_url?(uri.to_s) || !%w(http https).include?(uri.scheme) end def mention_link?(anchor) diff --git a/spec/fixtures/requests/long_canonical_url.txt b/spec/fixtures/requests/long_canonical_url.txt new file mode 100644 index 000000000..97d6c9396 --- /dev/null +++ b/spec/fixtures/requests/long_canonical_url.txt @@ -0,0 +1,18 @@ +HTTP/1.1 200 OK +server: nginx +date: Thu, 13 Jun 2024 14:33:13 GMT +content-type: text/html; charset=utf-8 +content-length: 3225 +accept-ranges: bytes + + + + + + + Very long canonical URL + + +

We have very long URLs

+ + diff --git a/spec/services/fetch_link_card_service_spec.rb b/spec/services/fetch_link_card_service_spec.rb index b2cd99cea..342902cdb 100644 --- a/spec/services/fetch_link_card_service_spec.rb +++ b/spec/services/fetch_link_card_service_spec.rb @@ -31,6 +31,7 @@ RSpec.describe FetchLinkCardService do stub_request(:get, 'http://example.com/latin1_posing_as_utf8_recoverable').to_return(request_fixture('latin1_posing_as_utf8_recoverable.txt')) stub_request(:get, 'http://example.com/aergerliche-umlaute').to_return(request_fixture('redirect_with_utf8_url.txt')) stub_request(:get, 'http://example.com/page_without_title').to_return(request_fixture('page_without_title.txt')) + stub_request(:get, 'http://example.com/long_canonical_url').to_return(request_fixture('long_canonical_url.txt')) Rails.cache.write('oembed_endpoint:example.com', oembed_cache) if oembed_cache @@ -233,19 +234,6 @@ RSpec.describe FetchLinkCardService do end end - context 'with an URL too long for PostgreSQL unique indexes' do - let(:url) { "http://example.com/#{'a' * 2674}" } - let(:status) { Fabricate(:status, text: url) } - - it 'does not fetch the URL' do - expect(a_request(:get, url)).to_not have_been_made - end - - it 'does not create a preview card' do - expect(status.preview_card).to be_nil - end - end - context 'with a URL of a page with oEmbed support' do let(:html) { 'Hello world' } let(:status) { Fabricate(:status, text: 'http://example.com/html') } @@ -296,6 +284,14 @@ RSpec.describe FetchLinkCardService do end end end + + context 'with a URL of a page that includes a canonical URL too long for PostgreSQL unique indexes' do + let(:status) { Fabricate(:status, text: 'test http://example.com/long_canonical_url') } + + it 'does not create a preview card' do + expect(status.preview_card).to be_nil + end + end end context 'with a remote status' do