initial commit
This commit is contained in:
166
fluxer_api/src/unfurler/resolvers/WikipediaResolver.ts
Normal file
166
fluxer_api/src/unfurler/resolvers/WikipediaResolver.ts
Normal file
@@ -0,0 +1,166 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Fluxer Contributors
|
||||
*
|
||||
* This file is part of Fluxer.
|
||||
*
|
||||
* Fluxer is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Fluxer is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import type {MessageEmbedResponse} from '~/channel/EmbedTypes';
|
||||
import {Logger} from '~/Logger';
|
||||
import {BaseResolver} from '~/unfurler/resolvers/BaseResolver';
|
||||
import {buildEmbedMediaPayload} from '~/unfurler/resolvers/media/MediaMetadataHelpers';
|
||||
import * as FetchUtils from '~/utils/FetchUtils';
|
||||
import {parseString} from '~/utils/StringUtils';
|
||||
|
||||
interface WikiSummaryResponse {
|
||||
type: string;
|
||||
title: string;
|
||||
extract: string;
|
||||
thumbnail?: {
|
||||
source: string;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
originalimage?: {
|
||||
source: string;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
description?: string;
|
||||
pageid: number;
|
||||
}
|
||||
|
||||
type ProcessedThumbnail = NonNullable<MessageEmbedResponse['image']>;
|
||||
|
||||
export class WikipediaResolver extends BaseResolver {
|
||||
private readonly SUPPORTED_DOMAINS = [
|
||||
'wikipedia.org',
|
||||
'www.wikipedia.org',
|
||||
...['en', 'de', 'fr', 'es', 'it', 'ja', 'ru', 'zh'].map((lang) => `${lang}.wikipedia.org`),
|
||||
];
|
||||
|
||||
match(url: URL, mimeType: string, _content: Uint8Array): boolean {
|
||||
return (
|
||||
this.SUPPORTED_DOMAINS.includes(url.hostname) &&
|
||||
url.pathname.startsWith('/wiki/') &&
|
||||
mimeType.startsWith('text/html')
|
||||
);
|
||||
}
|
||||
|
||||
private getLanguageFromURL(url: URL): string {
|
||||
const subdomain = url.hostname.split('.')[0];
|
||||
return this.SUPPORTED_DOMAINS.includes(`${subdomain}.wikipedia.org`) ? subdomain : 'en';
|
||||
}
|
||||
|
||||
private async fetchArticleSummary(title: string, baseUrl: string): Promise<WikiSummaryResponse | null> {
|
||||
const apiUrl = `${baseUrl}/api/rest_v1/page/summary/${encodeURIComponent(title)}`;
|
||||
try {
|
||||
const response = await FetchUtils.sendRequest({
|
||||
url: apiUrl,
|
||||
});
|
||||
if (response.status !== 200) {
|
||||
Logger.debug({title, status: response.status}, 'Failed to fetch Wikipedia article summary');
|
||||
return null;
|
||||
}
|
||||
const responseText = await FetchUtils.streamToString(response.stream);
|
||||
return JSON.parse(responseText) as WikiSummaryResponse;
|
||||
} catch (error) {
|
||||
Logger.error({error, title}, 'Failed to fetch or parse Wikipedia response');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async processThumbnail(
|
||||
thumbnailData: WikiSummaryResponse['thumbnail'],
|
||||
isNSFWAllowed: boolean,
|
||||
): Promise<ProcessedThumbnail | null> {
|
||||
if (!thumbnailData) return null;
|
||||
const thumbnailMetadata = await this.mediaService.getMetadata({
|
||||
type: 'external',
|
||||
url: thumbnailData.source,
|
||||
isNSFWAllowed,
|
||||
});
|
||||
return buildEmbedMediaPayload(thumbnailData.source, thumbnailMetadata, {
|
||||
width: thumbnailData.width,
|
||||
height: thumbnailData.height,
|
||||
}) as ProcessedThumbnail;
|
||||
}
|
||||
|
||||
async resolve(url: URL, _content: Uint8Array, isNSFWAllowed: boolean = false): Promise<Array<MessageEmbedResponse>> {
|
||||
try {
|
||||
const title = decodeURIComponent(url.pathname.split('/wiki/')[1]);
|
||||
if (!title) return [];
|
||||
const language = this.getLanguageFromURL(url);
|
||||
const baseUrl = `https://${language}.wikipedia.org`;
|
||||
const article = await this.fetchArticleSummary(title, baseUrl);
|
||||
if (!article) return [];
|
||||
const thumbnail = await this.processThumbnail(article.thumbnail, isNSFWAllowed);
|
||||
const originalImage = await this.processThumbnail(article.originalimage, isNSFWAllowed);
|
||||
const uniqueImages = this.deduplicateThumbnails([thumbnail, originalImage]);
|
||||
const primaryThumbnail = uniqueImages[0];
|
||||
|
||||
const embed: MessageEmbedResponse = {
|
||||
type: 'article',
|
||||
url: url.href,
|
||||
title: parseString(article.title, 256),
|
||||
description: parseString(article.extract, 350),
|
||||
thumbnail: primaryThumbnail ?? undefined,
|
||||
};
|
||||
|
||||
const extraImageEmbeds = uniqueImages.slice(1).map((image) => ({
|
||||
type: 'rich' as const,
|
||||
url: url.href,
|
||||
image,
|
||||
}));
|
||||
|
||||
return [embed, ...extraImageEmbeds];
|
||||
} catch (error) {
|
||||
Logger.error({error, url: url.toString()}, 'Failed to resolve Wikipedia article');
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
private deduplicateThumbnails(images: Array<ProcessedThumbnail | null>): Array<ProcessedThumbnail> {
|
||||
const seen = new Set<string>();
|
||||
const unique: Array<ProcessedThumbnail> = [];
|
||||
|
||||
for (const image of images) {
|
||||
if (!image) continue;
|
||||
|
||||
const normalized = this.normalizeUrl(image.url);
|
||||
if (!normalized) {
|
||||
unique.push(image);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (seen.has(normalized)) continue;
|
||||
seen.add(normalized);
|
||||
unique.push(image);
|
||||
}
|
||||
|
||||
return unique;
|
||||
}
|
||||
|
||||
private normalizeUrl(url?: string): string | null {
|
||||
if (!url) return null;
|
||||
|
||||
try {
|
||||
return new URL(url).href.replace(/\/$/, '');
|
||||
} catch (error) {
|
||||
Logger.debug({error, url}, 'Failed to normalize Wikipedia image URL');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user