Files
fluxer/fluxer_api/src/unfurler/resolvers/WikipediaResolver.ts
Hampus Kraft 2f557eda8c initial commit
2026-01-01 21:05:54 +00:00

167 lines
5.1 KiB
TypeScript

/*
* Copyright (C) 2026 Fluxer Contributors
*
* This file is part of Fluxer.
*
* Fluxer is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fluxer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
*/
import type {MessageEmbedResponse} from '~/channel/EmbedTypes';
import {Logger} from '~/Logger';
import {BaseResolver} from '~/unfurler/resolvers/BaseResolver';
import {buildEmbedMediaPayload} from '~/unfurler/resolvers/media/MediaMetadataHelpers';
import * as FetchUtils from '~/utils/FetchUtils';
import {parseString} from '~/utils/StringUtils';
interface WikiSummaryResponse {
type: string;
title: string;
extract: string;
thumbnail?: {
source: string;
width: number;
height: number;
};
originalimage?: {
source: string;
width: number;
height: number;
};
description?: string;
pageid: number;
}
type ProcessedThumbnail = NonNullable<MessageEmbedResponse['image']>;
export class WikipediaResolver extends BaseResolver {
private readonly SUPPORTED_DOMAINS = [
'wikipedia.org',
'www.wikipedia.org',
...['en', 'de', 'fr', 'es', 'it', 'ja', 'ru', 'zh'].map((lang) => `${lang}.wikipedia.org`),
];
match(url: URL, mimeType: string, _content: Uint8Array): boolean {
return (
this.SUPPORTED_DOMAINS.includes(url.hostname) &&
url.pathname.startsWith('/wiki/') &&
mimeType.startsWith('text/html')
);
}
private getLanguageFromURL(url: URL): string {
const subdomain = url.hostname.split('.')[0];
return this.SUPPORTED_DOMAINS.includes(`${subdomain}.wikipedia.org`) ? subdomain : 'en';
}
private async fetchArticleSummary(title: string, baseUrl: string): Promise<WikiSummaryResponse | null> {
const apiUrl = `${baseUrl}/api/rest_v1/page/summary/${encodeURIComponent(title)}`;
try {
const response = await FetchUtils.sendRequest({
url: apiUrl,
});
if (response.status !== 200) {
Logger.debug({title, status: response.status}, 'Failed to fetch Wikipedia article summary');
return null;
}
const responseText = await FetchUtils.streamToString(response.stream);
return JSON.parse(responseText) as WikiSummaryResponse;
} catch (error) {
Logger.error({error, title}, 'Failed to fetch or parse Wikipedia response');
return null;
}
}
private async processThumbnail(
thumbnailData: WikiSummaryResponse['thumbnail'],
isNSFWAllowed: boolean,
): Promise<ProcessedThumbnail | null> {
if (!thumbnailData) return null;
const thumbnailMetadata = await this.mediaService.getMetadata({
type: 'external',
url: thumbnailData.source,
isNSFWAllowed,
});
return buildEmbedMediaPayload(thumbnailData.source, thumbnailMetadata, {
width: thumbnailData.width,
height: thumbnailData.height,
}) as ProcessedThumbnail;
}
async resolve(url: URL, _content: Uint8Array, isNSFWAllowed: boolean = false): Promise<Array<MessageEmbedResponse>> {
try {
const title = decodeURIComponent(url.pathname.split('/wiki/')[1]);
if (!title) return [];
const language = this.getLanguageFromURL(url);
const baseUrl = `https://${language}.wikipedia.org`;
const article = await this.fetchArticleSummary(title, baseUrl);
if (!article) return [];
const thumbnail = await this.processThumbnail(article.thumbnail, isNSFWAllowed);
const originalImage = await this.processThumbnail(article.originalimage, isNSFWAllowed);
const uniqueImages = this.deduplicateThumbnails([thumbnail, originalImage]);
const primaryThumbnail = uniqueImages[0];
const embed: MessageEmbedResponse = {
type: 'article',
url: url.href,
title: parseString(article.title, 256),
description: parseString(article.extract, 350),
thumbnail: primaryThumbnail ?? undefined,
};
const extraImageEmbeds = uniqueImages.slice(1).map((image) => ({
type: 'rich' as const,
url: url.href,
image,
}));
return [embed, ...extraImageEmbeds];
} catch (error) {
Logger.error({error, url: url.toString()}, 'Failed to resolve Wikipedia article');
return [];
}
}
private deduplicateThumbnails(images: Array<ProcessedThumbnail | null>): Array<ProcessedThumbnail> {
const seen = new Set<string>();
const unique: Array<ProcessedThumbnail> = [];
for (const image of images) {
if (!image) continue;
const normalized = this.normalizeUrl(image.url);
if (!normalized) {
unique.push(image);
continue;
}
if (seen.has(normalized)) continue;
seen.add(normalized);
unique.push(image);
}
return unique;
}
private normalizeUrl(url?: string): string | null {
if (!url) return null;
try {
return new URL(url).href.replace(/\/$/, '');
} catch (error) {
Logger.debug({error, url}, 'Failed to normalize Wikipedia image URL');
return null;
}
}
}