Add support for linking XMPP URIs in toots (#12709)

* Fix wrong grouping in Twitter valid_url regex

* Add support for xmpp URIs

Fixes #9776

The difficult part is autolinking, because Twitter-text's extractor does
some pretty ad-hoc stuff to find things that “look like” URLs, and XMPP
URIs do not really match the assumptions of that lib, so it doesn't sound
wise to try to shoehorn it into the existing regex.

This is why I used a specific regex (very close, although slightly more
permissive than the RFC), and a specific scan function (a simplified version
of the generalized one from Twitter).

* Remove leading “xmpp:” from auto-linked text
This commit is contained in:
ThibG 2020-01-11 02:15:25 +01:00 committed by Eugen Rochko
parent e9ea09d173
commit ea436b355b
4 changed files with 70 additions and 4 deletions

View file

@ -245,8 +245,9 @@ class Formatter
end end
standard = Extractor.extract_entities_with_indices(text, options) standard = Extractor.extract_entities_with_indices(text, options)
xmpp = Extractor.extract_xmpp_uris_with_indices(text, options)
Extractor.remove_overlapping_entities(special + standard) Extractor.remove_overlapping_entities(special + standard + xmpp)
end end
def link_to_url(entity, options = {}) def link_to_url(entity, options = {})
@ -284,7 +285,7 @@ class Formatter
def link_html(url) def link_html(url)
url = Addressable::URI.parse(url).to_s url = Addressable::URI.parse(url).to_s
prefix = url.match(/\Ahttps?:\/\/(www\.)?/).to_s prefix = url.match(/\A(https?:\/\/(www\.)?|xmpp:)/).to_s
text = url[prefix.length, 30] text = url[prefix.length, 30]
suffix = url[prefix.length + 30..-1] suffix = url[prefix.length + 30..-1]
cutoff = url[prefix.length..-1].length > 30 cutoff = url[prefix.length..-1].length > 30

View file

@ -2,7 +2,7 @@
class Sanitize class Sanitize
module Config module Config
HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', :relative].freeze HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', :relative].freeze
CLASS_WHITELIST_TRANSFORMER = lambda do |env| CLASS_WHITELIST_TRANSFORMER = lambda do |env|
node = env[:node] node = env[:node]

View file

@ -29,7 +29,7 @@ module Twitter
( # $1 total match ( # $1 total match
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
( # $3 URL ( # $3 URL
((https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional) ((?:https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional)
(#{REGEXEN[:valid_domain]}) # $5 Domain(s) (#{REGEXEN[:valid_domain]}) # $5 Domain(s)
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional) (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
(/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
@ -37,5 +37,54 @@ module Twitter
) )
) )
}iox }iox
REGEXEN[:validate_nodeid] = /(?:
#{REGEXEN[:validate_url_unreserved]}|
#{REGEXEN[:validate_url_pct_encoded]}|
[!$()*+,;=]
)/iox
REGEXEN[:validate_resid] = /(?:
#{REGEXEN[:validate_url_unreserved]}|
#{REGEXEN[:validate_url_pct_encoded]}|
#{REGEXEN[:validate_url_sub_delims]}
)/iox
REGEXEN[:valid_xmpp_uri] = %r{
( # $1 total match
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
( # $3 URL
((?:xmpp):) # $4 Protocol
(//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional)
(#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional)
(#{REGEXEN[:valid_domain]}) # $7 Domain in path
(/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional)
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String
)
)
}iox
end
module Extractor
# Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along
# with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
# XMPP URIs an empty array will be returned.
#
# If a block is given then it will be called for each XMPP URI.
def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end
return [] unless text && text.index(":")
urls = []
text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do
valid_uri_match_data = $~
start_position = valid_uri_match_data.char_begin(3)
end_position = valid_uri_match_data.char_end(3)
urls << {
:url => valid_uri_match_data[3],
:indices => [start_position, end_position]
}
end
urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given?
urls
end
end end
end end

View file

@ -242,6 +242,22 @@ RSpec.describe Formatter do
is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#<span>hashtagタグ</span></a>' is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#<span>hashtagタグ</span></a>'
end end
end end
context 'given a stand-alone xmpp: URI' do
let(:text) { 'xmpp:user@instance.com' }
it 'matches the full URI' do
is_expected.to include 'href="xmpp:user@instance.com"'
end
end
context 'given a an xmpp: URI with a query-string' do
let(:text) { 'please join xmpp:muc@instance.com?join right now' }
it 'matches the full URI' do
is_expected.to include 'href="xmpp:muc@instance.com?join"'
end
end
end end
describe '#format_spoiler' do describe '#format_spoiler' do