Merge tag 'v4.3.0-rc.1'
This commit is contained in:
commit
26c9b9ba39
3459 changed files with 130932 additions and 69993 deletions
|
|
@ -62,7 +62,8 @@ class LinkDetailsExtractor
|
|||
end
|
||||
|
||||
def author_name
|
||||
author['name']
|
||||
name = author['name']
|
||||
name.is_a?(Array) ? name.join(', ') : name
|
||||
end
|
||||
|
||||
def author_url
|
||||
|
|
@ -100,7 +101,7 @@ class LinkDetailsExtractor
|
|||
end
|
||||
|
||||
def json
|
||||
@json ||= root_array(Oj.load(@data)).find { |obj| SUPPORTED_TYPES.include?(obj['@type']) } || {}
|
||||
@json ||= root_array(Oj.load(@data)).compact.find { |obj| SUPPORTED_TYPES.include?(obj['@type']) } || {}
|
||||
end
|
||||
end
|
||||
|
||||
|
|
@ -156,7 +157,7 @@ class LinkDetailsExtractor
|
|||
end
|
||||
|
||||
def title
|
||||
html_entities.decode(structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first)
|
||||
html_entities.decode(structured_data&.headline || opengraph_tag('og:title') || document.xpath('//title').map(&:content).first)&.strip
|
||||
end
|
||||
|
||||
def description
|
||||
|
|
@ -195,6 +196,10 @@ class LinkDetailsExtractor
|
|||
structured_data&.author_url
|
||||
end
|
||||
|
||||
def author_account
|
||||
opengraph_tag('fediverse:creator')
|
||||
end
|
||||
|
||||
def embed_url
|
||||
valid_url_or_nil(opengraph_tag('twitter:player:stream'))
|
||||
end
|
||||
|
|
@ -220,7 +225,7 @@ class LinkDetailsExtractor
|
|||
end
|
||||
|
||||
def valid_url_or_nil(str, same_origin_only: false)
|
||||
return if str.blank? || str == 'null'
|
||||
return if str.blank? || str == 'null' || str == 'undefined'
|
||||
|
||||
url = @original_url + Addressable::URI.parse(str)
|
||||
|
||||
|
|
@ -269,10 +274,32 @@ class LinkDetailsExtractor
|
|||
end
|
||||
|
||||
def detect_encoding_and_parse_document
|
||||
[detect_encoding, nil, @html_charset, 'UTF-8'].uniq.each do |encoding|
|
||||
document = Nokogiri::HTML(@html, nil, encoding)
|
||||
return document if document.to_s.valid_encoding?
|
||||
html = nil
|
||||
encoding = nil
|
||||
|
||||
[detect_encoding, header_encoding].compact.each do |enc|
|
||||
html = @html.dup.force_encoding(enc)
|
||||
if html.valid_encoding?
|
||||
encoding = enc
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
html = @html unless encoding
|
||||
|
||||
Nokogiri::HTML5(html, nil, encoding)
|
||||
end
|
||||
|
||||
def detect_encoding
|
||||
guess = detector.detect(@html, @html_charset)
|
||||
guess&.fetch(:confidence, 0).to_i > 60 ? guess&.fetch(:encoding, nil) : nil
|
||||
end
|
||||
|
||||
def header_encoding
|
||||
Encoding.find(@html_charset).name if @html_charset
|
||||
rescue ArgumentError
|
||||
# Encoding from HTTP header is not recognized by ruby
|
||||
nil
|
||||
end
|
||||
|
||||
def detect_encoding
|
||||
|
|
@ -287,6 +314,6 @@ class LinkDetailsExtractor
|
|||
end
|
||||
|
||||
def html_entities
|
||||
@html_entities ||= HTMLEntities.new
|
||||
@html_entities ||= HTMLEntities.new(:expanded)
|
||||
end
|
||||
end
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue