5b07f4e90e
* Add tests * Fix some link previews being incorrectly generated from different prior links PR #12403 added a cache to avoid redundant queries when the OEmbed endpoint can be guessed from the URL. This caching mechanism is not perfectly correct as there is no guarantee that all pages from a given domain share the same OEmbed provider endpoint. This PR prevents the FetchOEmbedService from caching OEmbed endpoint that cannot be generalized by replacing a fully-qualified URL from the endpoint's parameters, greatly reducing the number of incorrect cached generalizations.
103 lines
2.6 KiB
Ruby
103 lines
2.6 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class FetchOEmbedService
|
|
ENDPOINT_CACHE_EXPIRES_IN = 24.hours.freeze
|
|
URL_REGEX = /(=(http[s]?(%3A|:)(\/\/|%2F%2F)))([^&]*)/i.freeze
|
|
|
|
attr_reader :url, :options, :format, :endpoint_url
|
|
|
|
def call(url, options = {})
|
|
@url = url
|
|
@options = options
|
|
|
|
if @options[:cached_endpoint]
|
|
parse_cached_endpoint!
|
|
else
|
|
discover_endpoint!
|
|
end
|
|
|
|
fetch!
|
|
end
|
|
|
|
private
|
|
|
|
def discover_endpoint!
|
|
return if html.nil?
|
|
|
|
@format = @options[:format]
|
|
page = Nokogiri::HTML(html)
|
|
|
|
if @format.nil? || @format == :json
|
|
@endpoint_url ||= page.at_xpath('//link[@type="application/json+oembed"]')&.attribute('href')&.value
|
|
@format ||= :json if @endpoint_url
|
|
end
|
|
|
|
if @format.nil? || @format == :xml
|
|
@endpoint_url ||= page.at_xpath('//link[@type="text/xml+oembed"]')&.attribute('href')&.value
|
|
@format ||= :xml if @endpoint_url
|
|
end
|
|
|
|
return if @endpoint_url.blank?
|
|
|
|
@endpoint_url = (Addressable::URI.parse(@url) + @endpoint_url).to_s
|
|
|
|
cache_endpoint!
|
|
rescue Addressable::URI::InvalidURIError
|
|
@endpoint_url = nil
|
|
end
|
|
|
|
def parse_cached_endpoint!
|
|
cached = @options[:cached_endpoint]
|
|
|
|
return if cached[:endpoint].nil? || cached[:format].nil?
|
|
|
|
@endpoint_url = Addressable::Template.new(cached[:endpoint]).expand(url: @url).to_s
|
|
@format = cached[:format]
|
|
end
|
|
|
|
def cache_endpoint!
|
|
return unless URL_REGEX.match?(@endpoint_url)
|
|
|
|
url_domain = Addressable::URI.parse(@url).normalized_host
|
|
|
|
endpoint_hash = {
|
|
endpoint: @endpoint_url.gsub(URL_REGEX, '={url}'),
|
|
format: @format,
|
|
}
|
|
|
|
Rails.cache.write("oembed_endpoint:#{url_domain}", endpoint_hash, expires_in: ENDPOINT_CACHE_EXPIRES_IN)
|
|
end
|
|
|
|
def fetch!
|
|
return if @endpoint_url.blank?
|
|
|
|
body = Request.new(:get, @endpoint_url).perform do |res|
|
|
res.code != 200 ? nil : res.body_with_limit
|
|
end
|
|
|
|
validate(parse_for_format(body)) if body.present?
|
|
rescue Oj::ParseError, Ox::ParseError
|
|
nil
|
|
end
|
|
|
|
def parse_for_format(body)
|
|
case @format
|
|
when :json
|
|
Oj.load(body, mode: :strict)&.with_indifferent_access
|
|
when :xml
|
|
Ox.load(body, mode: :hash_no_attrs)&.with_indifferent_access&.dig(:oembed)
|
|
end
|
|
end
|
|
|
|
def validate(oembed)
|
|
oembed if oembed[:version] == '1.0' && oembed[:type].present?
|
|
end
|
|
|
|
def html
|
|
return @html if defined?(@html)
|
|
|
|
@html = @options[:html] || Request.new(:get, @url).add_headers('Accept' => 'text/html').perform do |res|
|
|
res.code != 200 || res.mime_type != 'text/html' ? nil : res.body_with_limit
|
|
end
|
|
end
|
|
end
|