Files
fluxer/fluxer_api/src/search/MessageSearchService.ts
Hampus Kraft 2f557eda8c initial commit
2026-01-01 21:05:54 +00:00

566 lines
17 KiB
TypeScript

/*
* Copyright (C) 2026 Fluxer Contributors
*
* This file is part of Fluxer.
*
* Fluxer is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fluxer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
*/
import type {Index, MeiliSearch, SearchResponse} from 'meilisearch';
import type {GuildID, MessageID, UserID} from '~/BrandedTypes';
import {Logger} from '~/Logger';
import type {Message} from '~/Models';
import {SEARCH_MAX_TOTAL_HITS} from '~/search/constants';
import {extractTimestamp} from '~/utils/SnowflakeUtils';
const MESSAGE_INDEX_NAME = 'messages';
interface SearchableMessage {
id: string;
channelId: string;
authorId: string | null;
authorType: 'user' | 'bot' | 'webhook';
content: string | null;
createdAt: number;
editedAt: number | null;
isPinned: boolean;
mentionedUserIds: Array<string>;
mentionEveryone: boolean;
hasLink: boolean;
hasEmbed: boolean;
hasPoll: boolean;
hasFile: boolean;
hasVideo: boolean;
hasImage: boolean;
hasSound: boolean;
hasSticker: boolean;
hasForward: boolean;
embedTypes: Array<string>;
embedProviders: Array<string>;
linkHostnames: Array<string>;
attachmentFilenames: Array<string>;
attachmentExtensions: Array<string>;
}
export interface MessageSearchFilters {
maxId?: string;
minId?: string;
content?: string;
contents?: Array<string>;
channelId?: string;
channelIds?: Array<string>;
excludeChannelIds?: Array<string>;
authorId?: Array<string>;
authorType?: Array<string>;
excludeAuthorType?: Array<string>;
excludeAuthorIds?: Array<string>;
mentions?: Array<string>;
excludeMentions?: Array<string>;
mentionEveryone?: boolean;
pinned?: boolean;
has?: Array<string>;
excludeHas?: Array<string>;
embedType?: Array<'image' | 'video' | 'sound' | 'article'>;
excludeEmbedTypes?: Array<'image' | 'video' | 'sound' | 'article'>;
embedProvider?: Array<string>;
excludeEmbedProviders?: Array<string>;
linkHostname?: Array<string>;
excludeLinkHostnames?: Array<string>;
attachmentFilename?: Array<string>;
excludeAttachmentFilenames?: Array<string>;
attachmentExtension?: Array<string>;
excludeAttachmentExtensions?: Array<string>;
sortBy?: 'timestamp' | 'relevance';
sortOrder?: 'asc' | 'desc';
includeNsfw?: boolean;
}
export class MessageSearchService {
private meilisearch: MeiliSearch;
private index: Index<SearchableMessage> | null = null;
constructor(meilisearch: MeiliSearch) {
this.meilisearch = meilisearch;
}
async initialize(): Promise<void> {
try {
this.index = this.meilisearch.index<SearchableMessage>(MESSAGE_INDEX_NAME);
await this.index.updateSettings({
searchableAttributes: ['content'],
filterableAttributes: [
'channelId',
'authorId',
'authorType',
'isPinned',
'mentionedUserIds',
'mentionEveryone',
'hasLink',
'hasEmbed',
'hasPoll',
'hasFile',
'hasVideo',
'hasImage',
'hasSound',
'hasSticker',
'hasForward',
'createdAt',
'embedTypes',
'embedProviders',
'linkHostnames',
'attachmentFilenames',
'attachmentExtensions',
],
sortableAttributes: ['createdAt', 'editedAt'],
rankingRules: ['words', 'typo', 'proximity', 'attribute', 'sort', 'exactness'],
pagination: {
maxTotalHits: SEARCH_MAX_TOTAL_HITS,
},
});
Logger.debug('Message search index initialized successfully');
} catch (error) {
Logger.error({error}, 'Failed to initialize message search index');
throw error;
}
}
async indexMessage(message: Message, authorIsBot?: boolean): Promise<void> {
if (!this.index) {
throw new Error('Message search index not initialized');
}
const searchableMessage = this.convertToSearchableMessage(message, authorIsBot);
try {
await this.index.addDocuments([searchableMessage], {primaryKey: 'id'});
} catch (error) {
Logger.error({messageId: message.id, error}, 'Failed to index message');
throw error;
}
}
async indexMessages(messages: Array<Message>, authorBotMap?: Map<UserID, boolean>): Promise<void> {
if (!this.index) {
throw new Error('Message search index not initialized');
}
if (messages.length === 0) return;
const searchableMessages = messages.map((msg) => {
const isBot = msg.authorId ? (authorBotMap?.get(msg.authorId) ?? false) : false;
return this.convertToSearchableMessage(msg, isBot);
});
try {
await this.index.addDocuments(searchableMessages, {primaryKey: 'id'});
} catch (error) {
Logger.error({count: messages.length, error}, 'Failed to index messages');
throw error;
}
}
async updateMessage(message: Message, authorIsBot?: boolean): Promise<void> {
if (!this.index) {
throw new Error('Message search index not initialized');
}
const searchableMessage = this.convertToSearchableMessage(message, authorIsBot);
try {
await this.index.updateDocuments([searchableMessage], {primaryKey: 'id'});
} catch (error) {
Logger.error({messageId: message.id, error}, 'Failed to update message in search index');
throw error;
}
}
async deleteMessage(messageId: MessageID): Promise<void> {
if (!this.index) {
throw new Error('Message search index not initialized');
}
try {
await this.index.deleteDocument(messageId.toString());
} catch (error) {
Logger.error({messageId, error}, 'Failed to delete message from search index');
throw error;
}
}
async deleteMessages(messageIds: Array<MessageID>): Promise<void> {
if (!this.index) {
throw new Error('Message search index not initialized');
}
if (messageIds.length === 0) return;
try {
await this.index.deleteDocuments(messageIds.map((id) => id.toString()));
} catch (error) {
Logger.error({count: messageIds.length, error}, 'Failed to delete messages from search index');
throw error;
}
}
async deleteGuildMessages(guildId: GuildID): Promise<void> {
if (!this.index) {
throw new Error('Message search index not initialized');
}
try {
await this.index.deleteDocuments({
filter: [`channelId IN ${guildId.toString()}`],
});
} catch (error) {
Logger.error({guildId, error}, 'Failed to delete guild messages from search index');
throw error;
}
}
async searchMessages(
query: string,
filters: MessageSearchFilters,
options?: {
hitsPerPage?: number;
page?: number;
},
): Promise<{hits: Array<SearchableMessage>; total: number}> {
if (!this.index) {
throw new Error('Message search index not initialized');
}
const searchQuery = this.buildSearchQuery(query, filters);
const filterStrings = this.buildFilterStrings(filters);
const sortField = this.buildSortField(filters);
try {
const rawResult = (await this.index.search<SearchableMessage>(searchQuery, {
filter: filterStrings.length > 0 ? filterStrings : undefined,
hitsPerPage: options?.hitsPerPage ?? 25,
page: options?.page ?? 1,
sort: sortField,
})) as SearchResponse<SearchableMessage>;
const totalHits = rawResult.totalHits ?? rawResult.estimatedTotalHits ?? rawResult.hits.length;
return {
hits: rawResult.hits,
total: totalHits,
};
} catch (error) {
Logger.error({query: searchQuery, filters, error}, 'Failed to search messages');
throw error;
}
}
private buildSearchQuery(query: string, filters: MessageSearchFilters): string {
if (filters.contents && filters.contents.length > 0) {
const contentTerms = filters.contents.join(' ');
return query ? `${query} ${contentTerms}` : contentTerms;
}
if (filters.content) {
return query ? `${query} ${filters.content}` : filters.content;
}
return query;
}
private buildFilterStrings(filters: MessageSearchFilters): Array<string> {
const filterStrings: Array<string> = [];
if (filters.channelId) {
filterStrings.push(`channelId = "${filters.channelId}"`);
}
if (filters.channelIds && filters.channelIds.length > 0) {
this.addOrFilter(filterStrings, 'channelId', filters.channelIds);
}
if (filters.excludeChannelIds && filters.excludeChannelIds.length > 0) {
this.addExcludeFilter(filterStrings, 'channelId', filters.excludeChannelIds);
}
if (filters.authorId && filters.authorId.length > 0) {
this.addOrFilter(filterStrings, 'authorId', filters.authorId);
}
if (filters.excludeAuthorIds && filters.excludeAuthorIds.length > 0) {
this.addExcludeFilter(filterStrings, 'authorId', filters.excludeAuthorIds);
}
if (filters.authorType && filters.authorType.length > 0) {
this.addAuthorTypeFilter(filterStrings, filters.authorType);
}
if (filters.excludeAuthorType && filters.excludeAuthorType.length > 0) {
this.addExcludeFilter(filterStrings, 'authorType', filters.excludeAuthorType);
}
if (filters.mentions && filters.mentions.length > 0) {
this.addOrFilter(filterStrings, 'mentionedUserIds', filters.mentions);
}
if (filters.excludeMentions && filters.excludeMentions.length > 0) {
this.addExcludeFilter(filterStrings, 'mentionedUserIds', filters.excludeMentions);
}
if (filters.mentionEveryone !== undefined) {
filterStrings.push(`mentionEveryone = ${filters.mentionEveryone}`);
}
if (filters.pinned !== undefined) {
filterStrings.push(`isPinned = ${filters.pinned}`);
}
if (filters.has && filters.has.length > 0) {
this.addHasFilter(filterStrings, filters.has);
}
if (filters.excludeHas && filters.excludeHas.length > 0) {
this.addHasExcludeFilter(filterStrings, filters.excludeHas);
}
if (filters.embedType && filters.embedType.length > 0) {
this.addOrFilter(filterStrings, 'embedTypes', filters.embedType);
}
if (filters.excludeEmbedTypes && filters.excludeEmbedTypes.length > 0) {
this.addExcludeFilter(filterStrings, 'embedTypes', filters.excludeEmbedTypes);
}
if (filters.embedProvider && filters.embedProvider.length > 0) {
this.addOrFilter(filterStrings, 'embedProviders', filters.embedProvider);
}
if (filters.excludeEmbedProviders && filters.excludeEmbedProviders.length > 0) {
this.addExcludeFilter(filterStrings, 'embedProviders', filters.excludeEmbedProviders);
}
if (filters.linkHostname && filters.linkHostname.length > 0) {
this.addOrFilter(filterStrings, 'linkHostnames', filters.linkHostname);
}
if (filters.excludeLinkHostnames && filters.excludeLinkHostnames.length > 0) {
this.addExcludeFilter(filterStrings, 'linkHostnames', filters.excludeLinkHostnames);
}
if (filters.attachmentFilename && filters.attachmentFilename.length > 0) {
this.addOrFilter(filterStrings, 'attachmentFilenames', filters.attachmentFilename);
}
if (filters.excludeAttachmentFilenames && filters.excludeAttachmentFilenames.length > 0) {
this.addExcludeFilter(filterStrings, 'attachmentFilenames', filters.excludeAttachmentFilenames);
}
if (filters.attachmentExtension && filters.attachmentExtension.length > 0) {
this.addOrFilter(filterStrings, 'attachmentExtensions', filters.attachmentExtension);
}
if (filters.excludeAttachmentExtensions && filters.excludeAttachmentExtensions.length > 0) {
this.addExcludeFilter(filterStrings, 'attachmentExtensions', filters.excludeAttachmentExtensions);
}
if (filters.maxId) {
const timestamp = Math.floor(extractTimestamp(BigInt(filters.maxId)) / 1000);
filterStrings.push(`createdAt < ${timestamp}`);
}
if (filters.minId) {
const timestamp = Math.floor(extractTimestamp(BigInt(filters.minId)) / 1000);
filterStrings.push(`createdAt > ${timestamp}`);
}
return filterStrings;
}
private buildSortField(filters: MessageSearchFilters): Array<string> {
const sortBy = filters.sortBy ?? 'timestamp';
const sortOrder = filters.sortOrder ?? 'desc';
return sortBy === 'relevance' ? [] : [`createdAt:${sortOrder}`];
}
private addOrFilter(filterStrings: Array<string>, field: string, values: Array<string>): void {
const orFilters = values.map((value) => `${field} = "${value}"`).join(' OR ');
filterStrings.push(`(${orFilters})`);
}
private addExcludeFilter(filterStrings: Array<string>, field: string, values: Array<string>): void {
const notFilters = values.map((value) => `${field} != "${value}"`).join(' AND ');
filterStrings.push(`(${notFilters})`);
}
private addAuthorTypeFilter(filterStrings: Array<string>, authorTypes: Array<string>): void {
this.addOrFilter(filterStrings, 'authorType', authorTypes);
}
private addHasFilter(filterStrings: Array<string>, hasTypes: Array<string>): void {
for (const hasType of hasTypes) {
const field = this.getHasField(hasType);
if (field) {
filterStrings.push(`${field} = true`);
}
}
}
private addHasExcludeFilter(filterStrings: Array<string>, hasTypes: Array<string>): void {
for (const hasType of hasTypes) {
const field = this.getHasField(hasType);
if (field) {
filterStrings.push(`${field} = false`);
}
}
}
private getHasField(hasType: string): string | null {
const mapping: Record<string, string> = {
image: 'hasImage',
sound: 'hasSound',
video: 'hasVideo',
file: 'hasFile',
sticker: 'hasSticker',
embed: 'hasEmbed',
link: 'hasLink',
poll: 'hasPoll',
snapshot: 'hasForward',
};
return mapping[hasType] ?? null;
}
private getAuthorType(message: Message, authorIsBot?: boolean): 'user' | 'bot' | 'webhook' {
if (message.webhookId) return 'webhook';
if (authorIsBot) return 'bot';
return 'user';
}
private extractAttachmentTypes(message: Message): {hasVideo: boolean; hasImage: boolean; hasSound: boolean} {
const hasType = (prefix: string) =>
message.attachments.some((att) => att.contentType.trim().toLowerCase().startsWith(prefix));
return {
hasVideo: hasType('video/'),
hasImage: hasType('image/'),
hasSound: hasType('audio/'),
};
}
private extractEmbedTypes(message: Message): Array<string> {
const types: Array<string> = [];
for (const embed of message.embeds) {
if (embed.type && !types.includes(embed.type)) {
types.push(embed.type);
}
}
return types;
}
private extractEmbedProviders(message: Message): Array<string> {
const providers: Array<string> = [];
for (const embed of message.embeds) {
if (embed.provider?.name && !providers.includes(embed.provider.name)) {
providers.push(embed.provider.name);
}
}
return providers;
}
private extractLinkHostnames(message: Message): Array<string> {
const hostnames: Array<string> = [];
if (message.content) {
const urlMatches = message.content.matchAll(/https?:\/\/([^/\s]+)/g);
for (const match of urlMatches) {
const hostname = match[1];
if (hostname && !hostnames.includes(hostname)) {
hostnames.push(hostname);
}
}
}
for (const embed of message.embeds) {
if (embed.url) {
try {
const url = new URL(embed.url);
if (!hostnames.includes(url.hostname)) {
hostnames.push(url.hostname);
}
} catch {}
}
}
return hostnames;
}
private extractAttachmentInfo(message: Message): {
attachmentFilenames: Array<string>;
attachmentExtensions: Array<string>;
} {
const filenames: Array<string> = [];
const extensions: Array<string> = [];
for (const att of message.attachments) {
if (!filenames.includes(att.filename)) {
filenames.push(att.filename);
}
const parts = att.filename.split('.');
if (parts.length > 1) {
const ext = parts[parts.length - 1]!.toLowerCase();
if (ext.length > 0 && ext.length <= 10 && !extensions.includes(ext)) {
extensions.push(ext);
}
}
}
return {attachmentFilenames: filenames, attachmentExtensions: extensions};
}
private convertToSearchableMessage(message: Message, authorIsBot?: boolean): SearchableMessage {
const createdAt = Math.floor(extractTimestamp(BigInt(message.id)) / 1000);
const editedAt = message.editedTimestamp ? Math.floor(message.editedTimestamp.getTime() / 1000) : null;
const authorType = this.getAuthorType(message, authorIsBot);
const {hasVideo, hasImage, hasSound} = this.extractAttachmentTypes(message);
const hasLink = message.content !== null && /(https?:\/\/[^\s]+)/.test(message.content);
const embedTypes = this.extractEmbedTypes(message);
const embedProviders = this.extractEmbedProviders(message);
const linkHostnames = this.extractLinkHostnames(message);
const {attachmentFilenames, attachmentExtensions} = this.extractAttachmentInfo(message);
return {
id: message.id.toString(),
channelId: message.channelId.toString(),
authorId: message.authorId?.toString() ?? null,
authorType,
content: message.content,
createdAt,
editedAt,
isPinned: message.pinnedTimestamp !== null,
mentionedUserIds: Array.from(message.mentionedUserIds).map((id) => id.toString()),
mentionEveryone: message.mentionEveryone,
hasLink,
hasEmbed: message.embeds.length > 0,
hasPoll: false, // TODO: Implement when poll support is added
hasFile: message.attachments.length > 0,
hasVideo,
hasImage,
hasSound,
hasSticker: message.stickers.length > 0,
hasForward: message.reference?.type === 1,
embedTypes,
embedProviders,
linkHostnames,
attachmentFilenames,
attachmentExtensions,
};
}
}