Refactor streaming's filtering logic & improve documentation (#26213)
This commit is contained in:
parent
f52763d926
commit
663f801337
1 changed files with 110 additions and 52 deletions
|
@ -634,29 +634,39 @@ const startWorker = async (workerId) => {
|
||||||
|
|
||||||
log.verbose(req.requestId, `Starting stream from ${ids.join(', ')} for ${accountId}`);
|
log.verbose(req.requestId, `Starting stream from ${ids.join(', ')} for ${accountId}`);
|
||||||
|
|
||||||
// Currently message is of type string, soon it'll be Record<string, any>
|
const transmit = (event, payload) => {
|
||||||
|
// TODO: Replace "string"-based delete payloads with object payloads:
|
||||||
|
const encodedPayload = typeof payload === 'object' ? JSON.stringify(payload) : payload;
|
||||||
|
|
||||||
|
log.silly(req.requestId, `Transmitting for ${accountId}: ${event} ${encodedPayload}`);
|
||||||
|
output(event, encodedPayload);
|
||||||
|
};
|
||||||
|
|
||||||
|
// The listener used to process each message off the redis subscription,
|
||||||
|
// message here is an object with an `event` and `payload` property. Some
|
||||||
|
// events also include a queued_at value, but this is being removed shortly.
|
||||||
const listener = message => {
|
const listener = message => {
|
||||||
const { event, payload, queued_at } = message;
|
const { event, payload } = message;
|
||||||
|
|
||||||
const transmit = (payload) => {
|
// Streaming only needs to apply filtering to some channels and only to
|
||||||
const now = new Date().getTime();
|
// some events. This is because majority of the filtering happens on the
|
||||||
const delta = now - queued_at;
|
// Ruby on Rails side when producing the event for streaming.
|
||||||
const encodedPayload = typeof payload === 'object' ? JSON.stringify(payload) : payload;
|
//
|
||||||
|
// The only events that require filtering from the streaming server are
|
||||||
log.silly(req.requestId, `Transmitting for ${accountId}: ${event} ${encodedPayload} Delay: ${delta}ms`);
|
// `update` and `status.update`, all other events are transmitted to the
|
||||||
output(event, encodedPayload);
|
// client as soon as they're received (pass-through).
|
||||||
};
|
//
|
||||||
|
// The channels that need filtering are determined in the function
|
||||||
// Only messages that may require filtering are statuses, since notifications
|
// `channelNameToIds` defined below:
|
||||||
// are already personalized and deletes do not matter
|
if (!needsFiltering || (event !== 'update' && event !== 'status.update')) {
|
||||||
if (!needsFiltering || event !== 'update') {
|
transmit(event, payload);
|
||||||
transmit(payload);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const targetAccountIds = [payload.account.id].concat(payload.mentions.map(item => item.id));
|
// The rest of the logic from here on in this function is to handle
|
||||||
const accountDomain = payload.account.acct.split('@')[1];
|
// filtering of statuses:
|
||||||
|
|
||||||
|
// Filter based on language:
|
||||||
if (Array.isArray(req.chosenLanguages) && payload.language !== null && req.chosenLanguages.indexOf(payload.language) === -1) {
|
if (Array.isArray(req.chosenLanguages) && payload.language !== null && req.chosenLanguages.indexOf(payload.language) === -1) {
|
||||||
log.silly(req.requestId, `Message ${payload.id} filtered by language (${payload.language})`);
|
log.silly(req.requestId, `Message ${payload.id} filtered by language (${payload.language})`);
|
||||||
return;
|
return;
|
||||||
|
@ -664,11 +674,16 @@ const startWorker = async (workerId) => {
|
||||||
|
|
||||||
// When the account is not logged in, it is not necessary to confirm the block or mute
|
// When the account is not logged in, it is not necessary to confirm the block or mute
|
||||||
if (!req.accountId) {
|
if (!req.accountId) {
|
||||||
transmit(payload);
|
transmit(event, payload);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
pgPool.connect((err, client, done) => {
|
// Filter based on domain blocks, blocks, mutes, or custom filters:
|
||||||
|
const targetAccountIds = [payload.account.id].concat(payload.mentions.map(item => item.id));
|
||||||
|
const accountDomain = payload.account.acct.split('@')[1];
|
||||||
|
|
||||||
|
// TODO: Move this logic out of the message handling loop
|
||||||
|
pgPool.connect((err, client, releasePgConnection) => {
|
||||||
if (err) {
|
if (err) {
|
||||||
log.error(err);
|
log.error(err);
|
||||||
return;
|
return;
|
||||||
|
@ -695,28 +710,45 @@ const startWorker = async (workerId) => {
|
||||||
}
|
}
|
||||||
|
|
||||||
Promise.all(queries).then(values => {
|
Promise.all(queries).then(values => {
|
||||||
done();
|
releasePgConnection();
|
||||||
|
|
||||||
|
// Handling blocks & mutes and domain blocks: If one of those applies,
|
||||||
|
// then we don't transmit the payload of the event to the client
|
||||||
if (values[0].rows.length > 0 || (accountDomain && values[1].rows.length > 0)) {
|
if (values[0].rows.length > 0 || (accountDomain && values[1].rows.length > 0)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!payload.filtered && !req.cachedFilters) {
|
// If the payload already contains the `filtered` property, it means
|
||||||
|
// that filtering has been applied on the ruby on rails side, as
|
||||||
|
// such, we don't need to construct or apply the filters in streaming:
|
||||||
|
if (Object.prototype.hasOwnProperty.call(payload, "filtered")) {
|
||||||
|
transmit(event, payload);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handling for constructing the custom filters and caching them on the request
|
||||||
|
// TODO: Move this logic out of the message handling lifecycle
|
||||||
|
if (!req.cachedFilters) {
|
||||||
const filterRows = values[accountDomain ? 2 : 1].rows;
|
const filterRows = values[accountDomain ? 2 : 1].rows;
|
||||||
|
|
||||||
req.cachedFilters = filterRows.reduce((cache, row) => {
|
req.cachedFilters = filterRows.reduce((cache, filter) => {
|
||||||
if (cache[row.id]) {
|
if (cache[filter.id]) {
|
||||||
cache[row.id].keywords.push([row.keyword, row.whole_word]);
|
cache[filter.id].keywords.push([filter.keyword, filter.whole_word]);
|
||||||
} else {
|
} else {
|
||||||
cache[row.id] = {
|
cache[filter.id] = {
|
||||||
keywords: [[row.keyword, row.whole_word]],
|
keywords: [[filter.keyword, filter.whole_word]],
|
||||||
expires_at: row.expires_at,
|
expires_at: filter.expires_at,
|
||||||
repr: {
|
filter: {
|
||||||
id: row.id,
|
id: filter.id,
|
||||||
title: row.title,
|
title: filter.title,
|
||||||
context: row.context,
|
context: filter.context,
|
||||||
expires_at: row.expires_at,
|
expires_at: filter.expires_at,
|
||||||
filter_action: ['warn', 'hide'][row.filter_action],
|
// filter.filter_action is the value from the
|
||||||
|
// custom_filters.action database column, it is an integer
|
||||||
|
// representing a value in an enum defined by Ruby on Rails:
|
||||||
|
//
|
||||||
|
// enum { warn: 0, hide: 1 }
|
||||||
|
filter_action: ['warn', 'hide'][filter.filter_action],
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -724,6 +756,10 @@ const startWorker = async (workerId) => {
|
||||||
return cache;
|
return cache;
|
||||||
}, {});
|
}, {});
|
||||||
|
|
||||||
|
// Construct the regular expressions for the custom filters: This
|
||||||
|
// needs to be done in a separate loop as the database returns one
|
||||||
|
// filterRow per keyword, so we need all the keywords before
|
||||||
|
// constructing the regular expression
|
||||||
Object.keys(req.cachedFilters).forEach((key) => {
|
Object.keys(req.cachedFilters).forEach((key) => {
|
||||||
req.cachedFilters[key].regexp = new RegExp(req.cachedFilters[key].keywords.map(([keyword, whole_word]) => {
|
req.cachedFilters[key].regexp = new RegExp(req.cachedFilters[key].keywords.map(([keyword, whole_word]) => {
|
||||||
let expr = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');;
|
let expr = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');;
|
||||||
|
@ -743,34 +779,56 @@ const startWorker = async (workerId) => {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check filters
|
// Apply cachedFilters against the payload, constructing a
|
||||||
if (req.cachedFilters && !payload.filtered) {
|
// `filter_results` array of FilterResult entities
|
||||||
const mutatedPayload = { ...payload };
|
if (req.cachedFilters) {
|
||||||
const status = payload;
|
const status = payload;
|
||||||
const searchContent = ([status.spoiler_text || '', status.content].concat((status.poll && status.poll.options) ? status.poll.options.map(option => option.title) : [])).concat(status.media_attachments.map(att => att.description)).join('\n\n').replace(/<br\s*\/?>/g, '\n').replace(/<\/p><p>/g, '\n\n');
|
// TODO: Calculate searchableContent in Ruby on Rails:
|
||||||
const searchIndex = JSDOM.fragment(searchContent).textContent;
|
const searchableContent = ([status.spoiler_text || '', status.content].concat((status.poll && status.poll.options) ? status.poll.options.map(option => option.title) : [])).concat(status.media_attachments.map(att => att.description)).join('\n\n').replace(/<br\s*\/?>/g, '\n').replace(/<\/p><p>/g, '\n\n');
|
||||||
|
const searchableTextContent = JSDOM.fragment(searchableContent).textContent;
|
||||||
|
|
||||||
const now = new Date();
|
const now = new Date();
|
||||||
mutatedPayload.filtered = [];
|
const filter_results = Object.values(req.cachedFilters).reduce((results, cachedFilter) => {
|
||||||
Object.values(req.cachedFilters).forEach((cachedFilter) => {
|
// Check the filter hasn't expired before applying:
|
||||||
if ((cachedFilter.expires_at === null || cachedFilter.expires_at > now)) {
|
if (cachedFilter.expires_at !== null && cachedFilter.expires_at < now) {
|
||||||
const keyword_matches = searchIndex.match(cachedFilter.regexp);
|
return;
|
||||||
if (keyword_matches) {
|
|
||||||
mutatedPayload.filtered.push({
|
|
||||||
filter: cachedFilter.repr,
|
|
||||||
keyword_matches,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
});
|
|
||||||
|
|
||||||
transmit(mutatedPayload);
|
// Just in-case JSDOM fails to find textContent in searchableContent
|
||||||
|
if (!searchableTextContent) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const keyword_matches = searchableTextContent.match(cachedFilter.regexp);
|
||||||
|
if (keyword_matches) {
|
||||||
|
// results is an Array of FilterResult; status_matches is always
|
||||||
|
// null as we only are only applying the keyword-based custom
|
||||||
|
// filters, not the status-based custom filters.
|
||||||
|
// https://docs.joinmastodon.org/entities/FilterResult/
|
||||||
|
results.push({
|
||||||
|
filter: cachedFilter.filter,
|
||||||
|
keyword_matches,
|
||||||
|
status_matches: null
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Send the payload + the FilterResults as the `filtered` property
|
||||||
|
// to the streaming connection. To reach this code, the `event` must
|
||||||
|
// have been either `update` or `status.update`, meaning the
|
||||||
|
// `payload` is a Status entity, which has a `filtered` property:
|
||||||
|
//
|
||||||
|
// filtered: https://docs.joinmastodon.org/entities/Status/#filtered
|
||||||
|
transmit(event, {
|
||||||
|
...payload,
|
||||||
|
filtered: filter_results
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
transmit(payload);
|
transmit(event, payload);
|
||||||
}
|
}
|
||||||
}).catch(err => {
|
}).catch(err => {
|
||||||
|
releasePgConnection();
|
||||||
log.error(err);
|
log.error(err);
|
||||||
done();
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue