2018-05-03 02:58:48 +10:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
class FetchOEmbedService
|
2019-11-18 04:40:33 +11:00
|
|
|
ENDPOINT_CACHE_EXPIRES_IN = 24.hours.freeze
|
2023-06-06 22:50:51 +10:00
|
|
|
URL_REGEX = %r{(=(https?(%3A|:)(//|%2F%2F)))([^&]*)}i
|
2019-11-18 04:40:33 +11:00
|
|
|
|
2018-05-03 02:58:48 +10:00
|
|
|
attr_reader :url, :options, :format, :endpoint_url
|
|
|
|
|
|
|
|
def call(url, options = {})
|
|
|
|
@url = url
|
|
|
|
@options = options
|
|
|
|
|
2019-11-18 04:40:33 +11:00
|
|
|
if @options[:cached_endpoint]
|
|
|
|
parse_cached_endpoint!
|
|
|
|
else
|
|
|
|
discover_endpoint!
|
|
|
|
end
|
|
|
|
|
2018-05-03 02:58:48 +10:00
|
|
|
fetch!
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def discover_endpoint!
|
|
|
|
return if html.nil?
|
|
|
|
|
|
|
|
@format = @options[:format]
|
2024-09-09 04:41:37 +10:00
|
|
|
page = Nokogiri::HTML5(html)
|
2018-05-03 02:58:48 +10:00
|
|
|
|
|
|
|
if @format.nil? || @format == :json
|
2023-01-05 23:36:24 +11:00
|
|
|
@endpoint_url ||= page.at_xpath('//link[@type="application/json+oembed"]|//link[@type="text/json+oembed"]')&.attribute('href')&.value
|
2018-05-03 02:58:48 +10:00
|
|
|
@format ||= :json if @endpoint_url
|
|
|
|
end
|
|
|
|
|
|
|
|
if @format.nil? || @format == :xml
|
|
|
|
@endpoint_url ||= page.at_xpath('//link[@type="text/xml+oembed"]')&.attribute('href')&.value
|
|
|
|
@format ||= :xml if @endpoint_url
|
|
|
|
end
|
|
|
|
|
|
|
|
return if @endpoint_url.blank?
|
|
|
|
|
2021-02-12 15:45:38 +11:00
|
|
|
@endpoint_url = begin
|
|
|
|
base_url = Addressable::URI.parse(@url)
|
|
|
|
|
|
|
|
# If the OEmbed endpoint is given as http but the URL we opened
|
|
|
|
# was served over https, we can assume OEmbed will be available
|
|
|
|
# through https as well
|
|
|
|
|
|
|
|
(base_url + @endpoint_url).tap do |absolute_url|
|
|
|
|
absolute_url.scheme = base_url.scheme if base_url.scheme == 'https'
|
|
|
|
end.to_s
|
|
|
|
end
|
2019-11-18 04:40:33 +11:00
|
|
|
|
|
|
|
cache_endpoint!
|
2018-05-03 02:58:48 +10:00
|
|
|
rescue Addressable::URI::InvalidURIError
|
|
|
|
@endpoint_url = nil
|
|
|
|
end
|
|
|
|
|
2019-11-18 04:40:33 +11:00
|
|
|
def parse_cached_endpoint!
|
|
|
|
cached = @options[:cached_endpoint]
|
|
|
|
|
|
|
|
return if cached[:endpoint].nil? || cached[:format].nil?
|
|
|
|
|
|
|
|
@endpoint_url = Addressable::Template.new(cached[:endpoint]).expand(url: @url).to_s
|
|
|
|
@format = cached[:format]
|
|
|
|
end
|
|
|
|
|
|
|
|
def cache_endpoint!
|
2021-10-22 05:39:35 +11:00
|
|
|
return unless URL_REGEX.match?(@endpoint_url)
|
|
|
|
|
2019-11-18 04:40:33 +11:00
|
|
|
url_domain = Addressable::URI.parse(@url).normalized_host
|
|
|
|
|
|
|
|
endpoint_hash = {
|
2021-10-22 05:39:35 +11:00
|
|
|
endpoint: @endpoint_url.gsub(URL_REGEX, '={url}'),
|
2019-11-18 04:40:33 +11:00
|
|
|
format: @format,
|
|
|
|
}
|
|
|
|
|
|
|
|
Rails.cache.write("oembed_endpoint:#{url_domain}", endpoint_hash, expires_in: ENDPOINT_CACHE_EXPIRES_IN)
|
|
|
|
end
|
|
|
|
|
2018-05-03 02:58:48 +10:00
|
|
|
def fetch!
|
|
|
|
return if @endpoint_url.blank?
|
|
|
|
|
|
|
|
body = Request.new(:get, @endpoint_url).perform do |res|
|
2023-02-08 17:07:36 +11:00
|
|
|
res.code == 200 ? res.body_with_limit : nil
|
2018-05-03 02:58:48 +10:00
|
|
|
end
|
|
|
|
|
2019-01-15 03:28:41 +11:00
|
|
|
validate(parse_for_format(body)) if body.present?
|
2018-05-03 02:58:48 +10:00
|
|
|
rescue Oj::ParseError, Ox::ParseError
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
|
|
|
def parse_for_format(body)
|
|
|
|
case @format
|
|
|
|
when :json
|
|
|
|
Oj.load(body, mode: :strict)&.with_indifferent_access
|
|
|
|
when :xml
|
|
|
|
Ox.load(body, mode: :hash_no_attrs)&.with_indifferent_access&.dig(:oembed)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def validate(oembed)
|
2023-12-12 19:29:46 +11:00
|
|
|
oembed if oembed.present? && oembed[:version].to_s == '1.0' && oembed[:type].present?
|
2018-05-03 02:58:48 +10:00
|
|
|
end
|
|
|
|
|
|
|
|
def html
|
|
|
|
return @html if defined?(@html)
|
|
|
|
|
2019-12-19 02:56:06 +11:00
|
|
|
@html = @options[:html] || Request.new(:get, @url).add_headers('Accept' => 'text/html').perform do |res|
|
2018-05-03 02:58:48 +10:00
|
|
|
res.code != 200 || res.mime_type != 'text/html' ? nil : res.body_with_limit
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|