From 10143d053a99d69f5770f6a5478ab0f88a95ae5b Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sun, 8 Sep 2024 14:41:37 -0400 Subject: [PATCH] Change some instances of Nokogiri HTML4 parsing to HTML5 (#31812) --- app/helpers/admin/trends/statuses_helper.rb | 2 +- app/lib/emoji_formatter.rb | 8 ++++---- app/lib/plain_text_formatter.rb | 2 +- app/services/fetch_oembed_service.rb | 2 +- app/services/fetch_resource_service.rb | 2 +- app/services/translate_status_service.rb | 2 +- lib/sanitize_ext/sanitize_config.rb | 2 +- lib/tasks/emojis.rake | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/app/helpers/admin/trends/statuses_helper.rb b/app/helpers/admin/trends/statuses_helper.rb index 79fee44dc..c7a59660c 100644 --- a/app/helpers/admin/trends/statuses_helper.rb +++ b/app/helpers/admin/trends/statuses_helper.rb @@ -5,7 +5,7 @@ module Admin::Trends::StatusesHelper text = if status.local? status.text.split("\n").first else - Nokogiri::HTML(status.text).css('html > body > *').first&.text + Nokogiri::HTML5(status.text).css('html > body > *').first&.text end return '' if text.blank? diff --git a/app/lib/emoji_formatter.rb b/app/lib/emoji_formatter.rb index 2a3683c49..5f1a4651f 100644 --- a/app/lib/emoji_formatter.rb +++ b/app/lib/emoji_formatter.rb @@ -24,7 +24,7 @@ class EmojiFormatter def to_s return html if custom_emojis.empty? || html.blank? - tree = Nokogiri::HTML.fragment(html) + tree = Nokogiri::HTML5.fragment(html) tree.xpath('./text()|.//text()[not(ancestor[@class="invisible"])]').to_a.each do |node| i = -1 inside_shortname = false @@ -43,8 +43,8 @@ class EmojiFormatter next unless (char_after.nil? || !DISALLOWED_BOUNDING_REGEX.match?(char_after)) && (emoji = emoji_map[shortcode]) - result << Nokogiri::XML::Text.new(text[last_index..shortname_start_index - 1], tree.document) if shortname_start_index.positive? - result << Nokogiri::HTML.fragment(tag_for_emoji(shortcode, emoji)) + result << tree.document.create_text_node(text[last_index..shortname_start_index - 1]) if shortname_start_index.positive? + result << tree.document.fragment(tag_for_emoji(shortcode, emoji)) last_index = i + 1 elsif text[i] == ':' && (i.zero? || !DISALLOWED_BOUNDING_REGEX.match?(text[i - 1])) @@ -53,7 +53,7 @@ class EmojiFormatter end end - result << Nokogiri::XML::Text.new(text[last_index..], tree.document) + result << tree.document.create_text_node(text[last_index..]) node.replace(result) end diff --git a/app/lib/plain_text_formatter.rb b/app/lib/plain_text_formatter.rb index d1ff6808b..f960ba7ac 100644 --- a/app/lib/plain_text_formatter.rb +++ b/app/lib/plain_text_formatter.rb @@ -16,7 +16,7 @@ class PlainTextFormatter if local? text else - node = Nokogiri::HTML.fragment(insert_newlines) + node = Nokogiri::HTML5.fragment(insert_newlines) # Elements that are entirely removed with our Sanitize config node.xpath('.//iframe|.//math|.//noembed|.//noframes|.//noscript|.//plaintext|.//script|.//style|.//svg|.//xmp').remove node.text.chomp diff --git a/app/services/fetch_oembed_service.rb b/app/services/fetch_oembed_service.rb index dc84b16b6..c7d4f7e29 100644 --- a/app/services/fetch_oembed_service.rb +++ b/app/services/fetch_oembed_service.rb @@ -25,7 +25,7 @@ class FetchOEmbedService return if html.nil? @format = @options[:format] - page = Nokogiri::HTML(html) + page = Nokogiri::HTML5(html) if @format.nil? || @format == :json @endpoint_url ||= page.at_xpath('//link[@type="application/json+oembed"]|//link[@type="text/json+oembed"]')&.attribute('href')&.value diff --git a/app/services/fetch_resource_service.rb b/app/services/fetch_resource_service.rb index 84c36f6a1..b69015a5e 100644 --- a/app/services/fetch_resource_service.rb +++ b/app/services/fetch_resource_service.rb @@ -73,7 +73,7 @@ class FetchResourceService < BaseService end def process_html(response) - page = Nokogiri::HTML(response.body_with_limit) + page = Nokogiri::HTML5(response.body_with_limit) json_link = page.xpath('//link[@rel="alternate"]').find { |link| ACTIVITY_STREAM_LINK_TYPES.include?(link['type']) } process(json_link['href'], terminal: true) unless json_link.nil? diff --git a/app/services/translate_status_service.rb b/app/services/translate_status_service.rb index 9ad146ae7..e2e076e21 100644 --- a/app/services/translate_status_service.rb +++ b/app/services/translate_status_service.rb @@ -100,7 +100,7 @@ class TranslateStatusService < BaseService end def unwrap_emoji_shortcodes(html) - fragment = Nokogiri::HTML.fragment(html) + fragment = Nokogiri::HTML5.fragment(html) fragment.css('span[translate="no"]').each do |element| element.remove_attribute('translate') element.replace(element.children) if element.attributes.empty? diff --git a/lib/sanitize_ext/sanitize_config.rb b/lib/sanitize_ext/sanitize_config.rb index ad310b393..f0a7b6578 100644 --- a/lib/sanitize_ext/sanitize_config.rb +++ b/lib/sanitize_ext/sanitize_config.rb @@ -52,7 +52,7 @@ class Sanitize :relative end - current_node.replace(Nokogiri::XML::Text.new(current_node.text, current_node.document)) unless LINK_PROTOCOLS.include?(scheme) + current_node.replace(current_node.document.create_text_node(current_node.text)) unless LINK_PROTOCOLS.include?(scheme) end UNSUPPORTED_ELEMENTS_TRANSFORMER = lambda do |env| diff --git a/lib/tasks/emojis.rake b/lib/tasks/emojis.rake index e9fea2dee..fb18f21cf 100644 --- a/lib/tasks/emojis.rake +++ b/lib/tasks/emojis.rake @@ -13,7 +13,7 @@ def gen_border(codepoint, color) view_box[3] += 4 svg['viewBox'] = view_box.join(' ') end - g = Nokogiri::XML::Node.new 'g', doc + g = doc.create_element('g') doc.css('svg > *').each do |elem| border_elem = elem.dup