From 7bd5ebb0c50bb481c85ef81816089a04746f762d Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Mon, 28 Aug 2023 11:36:17 +0200 Subject: [PATCH] Fix multiple issues with status index mappings (#26686) --- app/chewy/accounts_index.rb | 9 +++++---- app/chewy/public_statuses_index.rb | 14 ++++++++++---- app/chewy/statuses_index.rb | 14 ++++++++++---- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb index 61e3399aa..8881b08f6 100644 --- a/app/chewy/accounts_index.rb +++ b/app/chewy/accounts_index.rb @@ -21,19 +21,20 @@ class AccountsIndex < Chewy::Index analyzer: { natural: { - tokenizer: 'uax_url_email', + tokenizer: 'standard', filter: %w( - english_possessive_stemmer lowercase asciifolding cjk_width + elision + english_possessive_stemmer english_stop english_stemmer ), }, verbatim: { - tokenizer: 'standard', + tokenizer: 'uax_url_email', filter: %w(lowercase asciifolding cjk_width), }, @@ -62,6 +63,6 @@ class AccountsIndex < Chewy::Index field(:last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }) field(:display_name, type: 'text', analyzer: 'verbatim') { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' } field(:username, type: 'text', analyzer: 'verbatim', value: ->(account) { [account.username, account.domain].compact.join('@') }) { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' } - field(:text, type: 'text', analyzer: 'whitespace', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' } + field(:text, type: 'text', analyzer: 'verbatim', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' } end end diff --git a/app/chewy/public_statuses_index.rb b/app/chewy/public_statuses_index.rb index 1fad5de3a..5c68a1365 100644 --- a/app/chewy/public_statuses_index.rb +++ b/app/chewy/public_statuses_index.rb @@ -20,13 +20,19 @@ class PublicStatusesIndex < Chewy::Index }, analyzer: { - content: { + verbatim: { tokenizer: 'uax_url_email', + filter: %w(lowercase), + }, + + content: { + tokenizer: 'standard', filter: %w( - english_possessive_stemmer lowercase asciifolding cjk_width + elision + english_possessive_stemmer english_stop english_stemmer ), @@ -40,9 +46,9 @@ class PublicStatusesIndex < Chewy::Index .includes(:media_attachments, :preloadable_poll, :preview_cards) root date_detection: false do - field(:id, type: 'keyword') + field(:id, type: 'long') field(:account_id, type: 'long') - field(:text, type: 'text', analyzer: 'whitespace', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } + field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } field(:language, type: 'keyword') field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties }) field(:created_at, type: 'date') diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 130f8801d..6d3352105 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -20,13 +20,19 @@ class StatusesIndex < Chewy::Index }, analyzer: { - content: { + verbatim: { tokenizer: 'uax_url_email', + filter: %w(lowercase), + }, + + content: { + tokenizer: 'standard', filter: %w( - english_possessive_stemmer lowercase asciifolding cjk_width + elision + english_possessive_stemmer english_stop english_stemmer ), @@ -64,9 +70,9 @@ class StatusesIndex < Chewy::Index end root date_detection: false do - field(:id, type: 'keyword') + field(:id, type: 'long') field(:account_id, type: 'long') - field(:text, type: 'text', analyzer: 'whitespace', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } + field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') } field(:searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }) field(:language, type: 'keyword') field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties })