Fix ・ detection in hashtag regex to construct hashtag correctly (#22888)
* Fix ・ detection in hashtag regex to construct hashtag correctly * Fixed rubocop liniting issues * More rubocop linting fix
This commit is contained in:
parent
546e301bcd
commit
115ab2869b
2 changed files with 50 additions and 31 deletions
|
@ -26,8 +26,12 @@ class Tag < ApplicationRecord
|
||||||
has_many :featured_tags, dependent: :destroy, inverse_of: :tag
|
has_many :featured_tags, dependent: :destroy, inverse_of: :tag
|
||||||
has_many :followers, through: :passive_relationships, source: :account
|
has_many :followers, through: :passive_relationships, source: :account
|
||||||
|
|
||||||
HASHTAG_SEPARATORS = "_\u00B7\u200c"
|
HASHTAG_SEPARATORS = "_\u00B7\u30FB\u200c"
|
||||||
HASHTAG_NAME_PAT = "([[:word:]_][[:word:]#{HASHTAG_SEPARATORS}]*[[:alpha:]#{HASHTAG_SEPARATORS}][[:word:]#{HASHTAG_SEPARATORS}]*[[:word:]_])|([[:word:]_]*[[:alpha:]][[:word:]_]*)"
|
HASHTAG_FIRST_SEQUENCE_CHUNK_ONE = "[[:word:]_][[:word:]#{HASHTAG_SEPARATORS}]*[[:alpha:]#{HASHTAG_SEPARATORS}]"
|
||||||
|
HASHTAG_FIRST_SEQUENCE_CHUNK_TWO = "[[:word:]#{HASHTAG_SEPARATORS}]*[[:word:]_]"
|
||||||
|
HASHTAG_FIRST_SEQUENCE = "(#{HASHTAG_FIRST_SEQUENCE_CHUNK_ONE}#{HASHTAG_FIRST_SEQUENCE_CHUNK_TWO})"
|
||||||
|
HASTAG_LAST_SEQUENCE = '([[:word:]_]*[[:alpha:]][[:word:]_]*)'
|
||||||
|
HASHTAG_NAME_PAT = "#{HASHTAG_FIRST_SEQUENCE}|#{HASTAG_LAST_SEQUENCE}"
|
||||||
|
|
||||||
HASHTAG_RE = /(?:^|[^\/\)\w])#(#{HASHTAG_NAME_PAT})/i
|
HASHTAG_RE = /(?:^|[^\/\)\w])#(#{HASHTAG_NAME_PAT})/i
|
||||||
HASHTAG_NAME_RE = /\A(#{HASHTAG_NAME_PAT})\z/i
|
HASHTAG_NAME_RE = /\A(#{HASHTAG_NAME_PAT})\z/i
|
||||||
|
@ -45,7 +49,11 @@ class Tag < ApplicationRecord
|
||||||
scope :listable, -> { where(listable: [true, nil]) }
|
scope :listable, -> { where(listable: [true, nil]) }
|
||||||
scope :trendable, -> { Setting.trendable_by_default ? where(trendable: [true, nil]) : where(trendable: true) }
|
scope :trendable, -> { Setting.trendable_by_default ? where(trendable: [true, nil]) : where(trendable: true) }
|
||||||
scope :not_trendable, -> { where(trendable: false) }
|
scope :not_trendable, -> { where(trendable: false) }
|
||||||
scope :recently_used, ->(account) { joins(:statuses).where(statuses: { id: account.statuses.select(:id).limit(1000) }).group(:id).order(Arel.sql('count(*) desc')) }
|
scope :recently_used, ->(account) {
|
||||||
|
joins(:statuses)
|
||||||
|
.where(statuses: { id: account.statuses.select(:id).limit(1000) })
|
||||||
|
.group(:id).order(Arel.sql('count(*) desc'))
|
||||||
|
}
|
||||||
scope :matches_name, ->(term) { where(arel_table[:name].lower.matches(arel_table.lower("#{sanitize_sql_like(Tag.normalize(term))}%"), nil, true)) } # Search with case-sensitive to use B-tree index
|
scope :matches_name, ->(term) { where(arel_table[:name].lower.matches(arel_table.lower("#{sanitize_sql_like(Tag.normalize(term))}%"), nil, true)) } # Search with case-sensitive to use B-tree index
|
||||||
|
|
||||||
update_index('tags', :self)
|
update_index('tags', :self)
|
||||||
|
@ -105,7 +113,8 @@ class Tag < ApplicationRecord
|
||||||
names = Array(name_or_names).map { |str| [normalize(str), str] }.uniq(&:first)
|
names = Array(name_or_names).map { |str| [normalize(str), str] }.uniq(&:first)
|
||||||
|
|
||||||
names.map do |(normalized_name, display_name)|
|
names.map do |(normalized_name, display_name)|
|
||||||
tag = matching_name(normalized_name).first || create(name: normalized_name, display_name: display_name.gsub(HASHTAG_INVALID_CHARS_RE, ''))
|
tag = matching_name(normalized_name).first || create(name: normalized_name,
|
||||||
|
display_name: display_name.gsub(HASHTAG_INVALID_CHARS_RE, ''))
|
||||||
|
|
||||||
yield tag if block_given?
|
yield tag if block_given?
|
||||||
|
|
||||||
|
@ -154,6 +163,9 @@ class Tag < ApplicationRecord
|
||||||
end
|
end
|
||||||
|
|
||||||
def validate_display_name_change
|
def validate_display_name_change
|
||||||
errors.add(:display_name, I18n.t('tags.does_not_match_previous_name')) unless HashtagNormalizer.new.normalize(display_name).casecmp(name.mb_chars).zero?
|
unless HashtagNormalizer.new.normalize(display_name).casecmp(name.mb_chars).zero?
|
||||||
|
errors.add(:display_name,
|
||||||
|
I18n.t('tags.does_not_match_previous_name'))
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,21 +1,22 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
require 'rails_helper'
|
require 'rails_helper'
|
||||||
|
|
||||||
RSpec.describe Tag, type: :model do
|
RSpec.describe Tag do
|
||||||
describe 'validations' do
|
describe 'validations' do
|
||||||
it 'invalid with #' do
|
it 'invalid with #' do
|
||||||
expect(Tag.new(name: '#hello_world')).to_not be_valid
|
expect(described_class.new(name: '#hello_world')).not_to be_valid
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'invalid with .' do
|
it 'invalid with .' do
|
||||||
expect(Tag.new(name: '.abcdef123')).to_not be_valid
|
expect(described_class.new(name: '.abcdef123')).not_to be_valid
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'invalid with spaces' do
|
it 'invalid with spaces' do
|
||||||
expect(Tag.new(name: 'hello world')).to_not be_valid
|
expect(described_class.new(name: 'hello world')).not_to be_valid
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'valid with aesthetic' do
|
it 'valid with aesthetic' do
|
||||||
expect(Tag.new(name: 'aesthetic')).to be_valid
|
expect(described_class.new(name: 'aesthetic')).to be_valid
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -62,6 +63,10 @@ RSpec.describe Tag, type: :model do
|
||||||
expect(subject.match('hello #one·two·three').to_s).to eq ' #one·two·three'
|
expect(subject.match('hello #one·two·three').to_s).to eq ' #one·two·three'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'matches ・unicode in ぼっち・ざ・ろっく correctly' do
|
||||||
|
expect(subject.match('testing #ぼっち・ざ・ろっく').to_s).to eq ' #ぼっち・ざ・ろっく'
|
||||||
|
end
|
||||||
|
|
||||||
it 'matches ZWNJ' do
|
it 'matches ZWNJ' do
|
||||||
expect(subject.match('just add #نرمافزار and').to_s).to eq ' #نرمافزار'
|
expect(subject.match('just add #نرمافزار and').to_s).to eq ' #نرمافزار'
|
||||||
end
|
end
|
||||||
|
@ -89,44 +94,46 @@ RSpec.describe Tag, type: :model do
|
||||||
describe '.find_normalized' do
|
describe '.find_normalized' do
|
||||||
it 'returns tag for a multibyte case-insensitive name' do
|
it 'returns tag for a multibyte case-insensitive name' do
|
||||||
upcase_string = 'abcABCabcABCやゆよ'
|
upcase_string = 'abcABCabcABCやゆよ'
|
||||||
downcase_string = 'abcabcabcabcやゆよ';
|
downcase_string = 'abcabcabcabcやゆよ'
|
||||||
|
|
||||||
tag = Fabricate(:tag, name: HashtagNormalizer.new.normalize(downcase_string))
|
tag = Fabricate(:tag, name: HashtagNormalizer.new.normalize(downcase_string))
|
||||||
expect(Tag.find_normalized(upcase_string)).to eq tag
|
expect(described_class.find_normalized(upcase_string)).to eq tag
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '.matches_name' do
|
describe '.matches_name' do
|
||||||
it 'returns tags for multibyte case-insensitive names' do
|
it 'returns tags for multibyte case-insensitive names' do
|
||||||
upcase_string = 'abcABCabcABCやゆよ'
|
upcase_string = 'abcABCabcABCやゆよ'
|
||||||
downcase_string = 'abcabcabcabcやゆよ';
|
downcase_string = 'abcabcabcabcやゆよ'
|
||||||
|
|
||||||
tag = Fabricate(:tag, name: HashtagNormalizer.new.normalize(downcase_string))
|
tag = Fabricate(:tag, name: HashtagNormalizer.new.normalize(downcase_string))
|
||||||
expect(Tag.matches_name(upcase_string)).to eq [tag]
|
expect(described_class.matches_name(upcase_string)).to eq [tag]
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'uses the LIKE operator' do
|
it 'uses the LIKE operator' do
|
||||||
expect(Tag.matches_name('100%abc').to_sql).to eq %q[SELECT "tags".* FROM "tags" WHERE LOWER("tags"."name") LIKE LOWER('100abc%')]
|
result = %q[SELECT "tags".* FROM "tags" WHERE LOWER("tags"."name") LIKE LOWER('100abc%')]
|
||||||
|
expect(described_class.matches_name('100%abc').to_sql).to eq result
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '.matching_name' do
|
describe '.matching_name' do
|
||||||
it 'returns tags for multibyte case-insensitive names' do
|
it 'returns tags for multibyte case-insensitive names' do
|
||||||
upcase_string = 'abcABCabcABCやゆよ'
|
upcase_string = 'abcABCabcABCやゆよ'
|
||||||
downcase_string = 'abcabcabcabcやゆよ';
|
downcase_string = 'abcabcabcabcやゆよ'
|
||||||
|
|
||||||
tag = Fabricate(:tag, name: HashtagNormalizer.new.normalize(downcase_string))
|
tag = Fabricate(:tag, name: HashtagNormalizer.new.normalize(downcase_string))
|
||||||
expect(Tag.matching_name(upcase_string)).to eq [tag]
|
expect(described_class.matching_name(upcase_string)).to eq [tag]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '.find_or_create_by_names' do
|
describe '.find_or_create_by_names' do
|
||||||
it 'runs a passed block once per tag regardless of duplicates' do
|
let(:upcase_string) { 'abcABCabcABCやゆよ' }
|
||||||
upcase_string = 'abcABCabcABCやゆよ'
|
let(:downcase_string) { 'abcabcabcabcやゆよ' }
|
||||||
downcase_string = 'abcabcabcabcやゆよ';
|
|
||||||
count = 0
|
|
||||||
|
|
||||||
Tag.find_or_create_by_names([upcase_string, downcase_string]) do |tag|
|
it 'runs a passed block once per tag regardless of duplicates' do
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
described_class.find_or_create_by_names([upcase_string, downcase_string]) do |_tag|
|
||||||
count += 1
|
count += 1
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -136,28 +143,28 @@ RSpec.describe Tag, type: :model do
|
||||||
|
|
||||||
describe '.search_for' do
|
describe '.search_for' do
|
||||||
it 'finds tag records with matching names' do
|
it 'finds tag records with matching names' do
|
||||||
tag = Fabricate(:tag, name: "match")
|
tag = Fabricate(:tag, name: 'match')
|
||||||
_miss_tag = Fabricate(:tag, name: "miss")
|
_miss_tag = Fabricate(:tag, name: 'miss')
|
||||||
|
|
||||||
results = Tag.search_for("match")
|
results = described_class.search_for('match')
|
||||||
|
|
||||||
expect(results).to eq [tag]
|
expect(results).to eq [tag]
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'finds tag records in case insensitive' do
|
it 'finds tag records in case insensitive' do
|
||||||
tag = Fabricate(:tag, name: "MATCH")
|
tag = Fabricate(:tag, name: 'MATCH')
|
||||||
_miss_tag = Fabricate(:tag, name: "miss")
|
_miss_tag = Fabricate(:tag, name: 'miss')
|
||||||
|
|
||||||
results = Tag.search_for("match")
|
results = described_class.search_for('match')
|
||||||
|
|
||||||
expect(results).to eq [tag]
|
expect(results).to eq [tag]
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'finds the exact matching tag as the first item' do
|
it 'finds the exact matching tag as the first item' do
|
||||||
similar_tag = Fabricate(:tag, name: "matchlater", reviewed_at: Time.now.utc)
|
similar_tag = Fabricate(:tag, name: 'matchlater', reviewed_at: Time.now.utc)
|
||||||
tag = Fabricate(:tag, name: "match", reviewed_at: Time.now.utc)
|
tag = Fabricate(:tag, name: 'match', reviewed_at: Time.now.utc)
|
||||||
|
|
||||||
results = Tag.search_for("match")
|
results = described_class.search_for('match')
|
||||||
|
|
||||||
expect(results).to eq [tag, similar_tag]
|
expect(results).to eq [tag, similar_tag]
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue