Add support for magnet: URIs (#12905)

This commit is contained in:
ThibG 2020-01-23 21:27:26 +01:00 committed by Eugen Rochko
parent c0006a004d
commit a8e46cf7a1
4 changed files with 32 additions and 17 deletions

View file

@ -245,9 +245,9 @@ class Formatter
end end
standard = Extractor.extract_entities_with_indices(text, options) standard = Extractor.extract_entities_with_indices(text, options)
xmpp = Extractor.extract_xmpp_uris_with_indices(text, options) extra = Extractor.extract_extra_uris_with_indices(text, options)
Extractor.remove_overlapping_entities(special + standard + xmpp) Extractor.remove_overlapping_entities(special + standard + extra)
end end
def link_to_url(entity, options = {}) def link_to_url(entity, options = {})

View file

@ -2,7 +2,7 @@
class Sanitize class Sanitize
module Config module Config
HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', :relative].freeze HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', 'magnet', :relative].freeze
CLASS_WHITELIST_TRANSFORMER = lambda do |env| CLASS_WHITELIST_TRANSFORMER = lambda do |env|
node = env[:node] node = env[:node]

View file

@ -47,32 +47,39 @@ module Twitter
#{REGEXEN[:validate_url_pct_encoded]}| #{REGEXEN[:validate_url_pct_encoded]}|
#{REGEXEN[:validate_url_sub_delims]} #{REGEXEN[:validate_url_sub_delims]}
)/iox )/iox
REGEXEN[:valid_xmpp_uri] = %r{ REGEXEN[:xmpp_uri] = %r{
( # $1 total match (xmpp:) # Protocol
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # Authority (optional)
( # $3 URL (#{REGEXEN[:validate_nodeid]}+@)? # Username in path (optional)
((?:xmpp):) # $4 Protocol (#{REGEXEN[:valid_domain]}) # Domain in path
(//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional) (/#{REGEXEN[:validate_resid]}+)? # Resource in path (optional)
(#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional) (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # Query String
(#{REGEXEN[:valid_domain]}) # $7 Domain in path }iox
(/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional) REGEXEN[:magnet_uri] = %r{
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String (magnet:) # Protocol
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]}) # Query String
}iox
REGEXEN[:valid_extended_uri] = %r{
( # $1 total match
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
( # $3 URL
(#{REGEXEN[:xmpp_uri]}) | (#{REGEXEN[:magnet_uri]})
) )
) )
}iox }iox
end end
module Extractor module Extractor
# Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along # Extracts a list of all XMPP and magnet URIs included in the Toot <tt>text</tt> along
# with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no # with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
# XMPP URIs an empty array will be returned. # XMPP or magnet URIs an empty array will be returned.
# #
# If a block is given then it will be called for each XMPP URI. # If a block is given then it will be called for each XMPP URI.
def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end def extract_extra_uris_with_indices(text, options = {}) # :yields: uri, start, end
return [] unless text && text.index(":") return [] unless text && text.index(":")
urls = [] urls = []
text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do text.to_s.scan(Twitter::Regex[:valid_extended_uri]) do
valid_uri_match_data = $~ valid_uri_match_data = $~
start_position = valid_uri_match_data.char_begin(3) start_position = valid_uri_match_data.char_begin(3)

View file

@ -258,6 +258,14 @@ RSpec.describe Formatter do
is_expected.to include 'href="xmpp:muc@instance.com?join"' is_expected.to include 'href="xmpp:muc@instance.com?join"'
end end
end end
context 'given text containing a magnet: URI' do
let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' }
it 'matches the full URI' do
is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"'
end
end
end end
describe '#format_spoiler' do describe '#format_spoiler' do