Change account search tokenizer and queries (#26378)
This commit is contained in:
parent
60fbb0fe91
commit
72423bc8f6
2 changed files with 144 additions and 89 deletions
|
@ -33,7 +33,7 @@ class AccountsIndex < Chewy::Index
|
||||||
},
|
},
|
||||||
|
|
||||||
verbatim: {
|
verbatim: {
|
||||||
tokenizer: 'whitespace',
|
tokenizer: 'standard',
|
||||||
filter: %w(lowercase asciifolding cjk_width),
|
filter: %w(lowercase asciifolding cjk_width),
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,143 @@ class AccountSearchService < BaseService
|
||||||
# Min. number of characters to look for non-exact matches
|
# Min. number of characters to look for non-exact matches
|
||||||
MIN_QUERY_LENGTH = 5
|
MIN_QUERY_LENGTH = 5
|
||||||
|
|
||||||
|
class QueryBuilder
|
||||||
|
def initialize(query, account, options = {})
|
||||||
|
@query = query
|
||||||
|
@account = account
|
||||||
|
@options = options
|
||||||
|
end
|
||||||
|
|
||||||
|
def build
|
||||||
|
AccountsIndex.query(
|
||||||
|
bool: {
|
||||||
|
must: {
|
||||||
|
function_score: {
|
||||||
|
query: {
|
||||||
|
bool: {
|
||||||
|
must: must_clauses,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
functions: [
|
||||||
|
reputation_score_function,
|
||||||
|
followers_score_function,
|
||||||
|
time_distance_function,
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
should: should_clauses,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def must_clauses
|
||||||
|
if @account && @options[:following]
|
||||||
|
[core_query, only_following_query]
|
||||||
|
else
|
||||||
|
[core_query]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def should_clauses
|
||||||
|
if @account && !@options[:following]
|
||||||
|
[boost_following_query]
|
||||||
|
else
|
||||||
|
[]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# This function limits results to only the accounts the user is following
|
||||||
|
def only_following_query
|
||||||
|
{
|
||||||
|
terms: {
|
||||||
|
id: following_ids,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
# This function promotes accounts the user is following
|
||||||
|
def boost_following_query
|
||||||
|
{
|
||||||
|
terms: {
|
||||||
|
id: following_ids,
|
||||||
|
boost: 100,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
# This function deranks accounts that follow more people than follow them
|
||||||
|
def reputation_score_function
|
||||||
|
{
|
||||||
|
script_score: {
|
||||||
|
script: {
|
||||||
|
source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
# This function promotes accounts that have more followers
|
||||||
|
def followers_score_function
|
||||||
|
{
|
||||||
|
script_score: {
|
||||||
|
script: {
|
||||||
|
source: "(Math.max(doc['followers_count'].value, 0) / (Math.max(doc['followers_count'].value, 0) + 1))",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
# This function deranks accounts that haven't posted in a long time
|
||||||
|
def time_distance_function
|
||||||
|
{
|
||||||
|
gauss: {
|
||||||
|
last_status_at: {
|
||||||
|
scale: '30d',
|
||||||
|
offset: '30d',
|
||||||
|
decay: 0.3,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def following_ids
|
||||||
|
@following_ids ||= @account.active_relationships.pluck(:target_account_id) + [@account.id]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class AutocompleteQueryBuilder < QueryBuilder
|
||||||
|
private
|
||||||
|
|
||||||
|
def core_query
|
||||||
|
{
|
||||||
|
multi_match: {
|
||||||
|
query: @query,
|
||||||
|
type: 'bool_prefix',
|
||||||
|
fields: %w(username username.* display_name display_name.*),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class FullQueryBuilder < QueryBuilder
|
||||||
|
private
|
||||||
|
|
||||||
|
def core_query
|
||||||
|
{
|
||||||
|
multi_match: {
|
||||||
|
query: @query,
|
||||||
|
type: 'most_fields',
|
||||||
|
fields: %w(username^2 display_name^2 text text.*),
|
||||||
|
operator: 'and',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def call(query, account = nil, options = {})
|
def call(query, account = nil, options = {})
|
||||||
@query = query&.strip&.gsub(/\A@/, '')
|
@query = query&.strip&.gsub(/\A@/, '')
|
||||||
@limit = options[:limit].to_i
|
@limit = options[:limit].to_i
|
||||||
|
@ -71,27 +208,15 @@ class AccountSearchService < BaseService
|
||||||
end
|
end
|
||||||
|
|
||||||
def from_elasticsearch
|
def from_elasticsearch
|
||||||
must_clauses = must_clause
|
query_builder = begin
|
||||||
should_clauses = should_clause
|
if options[:use_searchable_text]
|
||||||
|
FullQueryBuilder.new(terms_for_query, account, options.slice(:following))
|
||||||
if account
|
else
|
||||||
return [] if options[:following] && following_ids.empty?
|
AutocompleteQueryBuilder.new(terms_for_query, account, options.slice(:following))
|
||||||
|
|
||||||
if options[:following]
|
|
||||||
must_clauses << { terms: { id: following_ids } }
|
|
||||||
elsif following_ids.any?
|
|
||||||
should_clauses << { terms: { id: following_ids, boost: 100 } }
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
query = { bool: { must: must_clauses, should: should_clauses } }
|
records = query_builder.build.limit(limit_for_non_exact_results).offset(offset).objects.compact
|
||||||
functions = [reputation_score_function, followers_score_function, time_distance_function]
|
|
||||||
|
|
||||||
records = AccountsIndex.query(function_score: { query: query, functions: functions })
|
|
||||||
.limit(limit_for_non_exact_results)
|
|
||||||
.offset(offset)
|
|
||||||
.objects
|
|
||||||
.compact
|
|
||||||
|
|
||||||
ActiveRecord::Associations::Preloader.new(records: records, associations: :account_stat)
|
ActiveRecord::Associations::Preloader.new(records: records, associations: :account_stat)
|
||||||
|
|
||||||
|
@ -100,76 +225,6 @@ class AccountSearchService < BaseService
|
||||||
nil
|
nil
|
||||||
end
|
end
|
||||||
|
|
||||||
def reputation_score_function
|
|
||||||
{
|
|
||||||
script_score: {
|
|
||||||
script: {
|
|
||||||
source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
def followers_score_function
|
|
||||||
{
|
|
||||||
script_score: {
|
|
||||||
script: {
|
|
||||||
source: "Math.log10(Math.max(doc['followers_count'].value, 0) + 2)",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
def time_distance_function
|
|
||||||
{
|
|
||||||
gauss: {
|
|
||||||
last_status_at: {
|
|
||||||
scale: '30d',
|
|
||||||
offset: '30d',
|
|
||||||
decay: 0.3,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
def must_clause
|
|
||||||
if options[:start_with_hashtag]
|
|
||||||
fields = %w(text text.*)
|
|
||||||
else
|
|
||||||
fields = %w(username username.* display_name display_name.*)
|
|
||||||
fields << 'text' << 'text.*' if options[:use_searchable_text]
|
|
||||||
end
|
|
||||||
|
|
||||||
[
|
|
||||||
{
|
|
||||||
multi_match: {
|
|
||||||
query: terms_for_query,
|
|
||||||
fields: fields,
|
|
||||||
type: 'best_fields',
|
|
||||||
operator: 'or',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
end
|
|
||||||
|
|
||||||
def should_clause
|
|
||||||
[
|
|
||||||
{
|
|
||||||
multi_match: {
|
|
||||||
query: terms_for_query,
|
|
||||||
fields: %w(username username.* display_name display_name.*),
|
|
||||||
type: 'best_fields',
|
|
||||||
operator: 'and',
|
|
||||||
boost: 10,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
end
|
|
||||||
|
|
||||||
def following_ids
|
|
||||||
@following_ids ||= account.active_relationships.pluck(:target_account_id) + [account.id]
|
|
||||||
end
|
|
||||||
|
|
||||||
def limit_for_non_exact_results
|
def limit_for_non_exact_results
|
||||||
return 0 if @account.nil? && query.size < MIN_QUERY_LENGTH
|
return 0 if @account.nil? && query.size < MIN_QUERY_LENGTH
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue