Merge tag 'v4.3.0-rc.1'

This commit is contained in:
Mike Barnes 2024-10-02 10:34:27 +10:00
commit 26c9b9ba39
3459 changed files with 130932 additions and 69993 deletions

View file

@ -62,7 +62,8 @@ class LinkDetailsExtractor
end
def author_name
author['name']
name = author['name']
name.is_a?(Array) ? name.join(', ') : name
end
def author_url
@ -100,7 +101,7 @@ class LinkDetailsExtractor
end
def json
@json ||= root_array(Oj.load(@data)).find { |obj| SUPPORTED_TYPES.include?(obj['@type']) } || {}
@json ||= root_array(Oj.load(@data)).compact.find { |obj| SUPPORTED_TYPES.include?(obj['@type']) } || {}
end
end
@ -156,7 +157,7 @@ class LinkDetailsExtractor
end
def title
html_entities.decode(structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first)
html_entities.decode(structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first)&.strip
end
def description
@ -195,6 +196,10 @@ class LinkDetailsExtractor
structured_data&.author_url
end
def author_account
opengraph_tag('fediverse:creator')
end
def embed_url
valid_url_or_nil(opengraph_tag('twitter:player:stream'))
end
@ -220,7 +225,7 @@ class LinkDetailsExtractor
end
def valid_url_or_nil(str, same_origin_only: false)
return if str.blank? || str == 'null'
return if str.blank? || str == 'null' || str == 'undefined'
url = @original_url + Addressable::URI.parse(str)
@ -269,10 +274,32 @@ class LinkDetailsExtractor
end
def detect_encoding_and_parse_document
[detect_encoding, nil, @html_charset, 'UTF-8'].uniq.each do |encoding|
document = Nokogiri::HTML(@html, nil, encoding)
return document if document.to_s.valid_encoding?
html = nil
encoding = nil
[detect_encoding, header_encoding].compact.each do |enc|
html = @html.dup.force_encoding(enc)
if html.valid_encoding?
encoding = enc
break
end
end
html = @html unless encoding
Nokogiri::HTML5(html, nil, encoding)
end
def detect_encoding
guess = detector.detect(@html, @html_charset)
guess&.fetch(:confidence, 0).to_i > 60 ? guess&.fetch(:encoding, nil) : nil
end
def header_encoding
Encoding.find(@html_charset).name if @html_charset
rescue ArgumentError
# Encoding from HTTP header is not recognized by ruby
nil
end
def detect_encoding
@ -287,6 +314,6 @@ class LinkDetailsExtractor
end
def html_entities
@html_entities ||= HTMLEntities.new
@html_entities ||= HTMLEntities.new(:expanded)
end
end