Add support for magnet: URIs (#12905)

This commit is contained in:
ThibG 2020-01-23 21:27:26 +01:00 committed by Eugen Rochko
parent c0006a004d
commit a8e46cf7a1
4 changed files with 32 additions and 17 deletions

View file

@ -245,9 +245,9 @@ class Formatter
end
standard = Extractor.extract_entities_with_indices(text, options)
xmpp = Extractor.extract_xmpp_uris_with_indices(text, options)
extra = Extractor.extract_extra_uris_with_indices(text, options)
Extractor.remove_overlapping_entities(special + standard + xmpp)
Extractor.remove_overlapping_entities(special + standard + extra)
end
def link_to_url(entity, options = {})

View file

@ -2,7 +2,7 @@
class Sanitize
module Config
HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', :relative].freeze
HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', 'magnet', :relative].freeze
CLASS_WHITELIST_TRANSFORMER = lambda do |env|
node = env[:node]

View file

@ -47,32 +47,39 @@ module Twitter
#{REGEXEN[:validate_url_pct_encoded]}|
#{REGEXEN[:validate_url_sub_delims]}
)/iox
REGEXEN[:valid_xmpp_uri] = %r{
REGEXEN[:xmpp_uri] = %r{
(xmpp:) # Protocol
(//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # Authority (optional)
(#{REGEXEN[:validate_nodeid]}+@)? # Username in path (optional)
(#{REGEXEN[:valid_domain]}) # Domain in path
(/#{REGEXEN[:validate_resid]}+)? # Resource in path (optional)
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # Query String
}iox
REGEXEN[:magnet_uri] = %r{
(magnet:) # Protocol
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]}) # Query String
}iox
REGEXEN[:valid_extended_uri] = %r{
( # $1 total match
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
( # $3 URL
((?:xmpp):) # $4 Protocol
(//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional)
(#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional)
(#{REGEXEN[:valid_domain]}) # $7 Domain in path
(/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional)
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String
(#{REGEXEN[:xmpp_uri]}) | (#{REGEXEN[:magnet_uri]})
)
)
}iox
end
module Extractor
# Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along
# Extracts a list of all XMPP and magnet URIs included in the Toot <tt>text</tt> along
# with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
# XMPP URIs an empty array will be returned.
# XMPP or magnet URIs an empty array will be returned.
#
# If a block is given then it will be called for each XMPP URI.
def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end
def extract_extra_uris_with_indices(text, options = {}) # :yields: uri, start, end
return [] unless text && text.index(":")
urls = []
text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do
text.to_s.scan(Twitter::Regex[:valid_extended_uri]) do
valid_uri_match_data = $~
start_position = valid_uri_match_data.char_begin(3)

View file

@ -258,6 +258,14 @@ RSpec.describe Formatter do
is_expected.to include 'href="xmpp:muc@instance.com?join"'
end
end
context 'given text containing a magnet: URI' do
let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' }
it 'matches the full URI' do
is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"'
end
end
end
describe '#format_spoiler' do