initial commit

2026-01-01 20:42:59 +00:00
commit 2f557eda8c
9029 changed files with 1490197 additions and 0 deletions
--- a/fluxer_app/src/lib/markdown/parser/parsers/link-parsers.ts
+++ b/fluxer_app/src/lib/markdown/parser/parsers/link-parsers.ts
@@ -0,0 +1,540 @@
+/*
+ * Copyright (C) 2026 Fluxer Contributors
+ *
+ * This file is part of Fluxer.
+ *
+ * Fluxer is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Fluxer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import {APP_PROTOCOL_PREFIX} from '~/utils/appProtocol';
+import {MAX_LINK_URL_LENGTH} from '../types/constants';
+import {NodeType, ParserFlags} from '../types/enums';
+import type {Node, ParserResult} from '../types/nodes';
+import * as StringUtils from '../utils/string-utils';
+import * as URLUtils from '../utils/url-utils';
+
+const SPOOFED_LINK_PATTERN = /^\[https?:\/\/[^\s[\]]+\]\(https?:\/\/[^\s[\]]+\)$/;
+const EMAIL_PATTERN = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
+const URL_DOMAIN_PATTERN =
+	/^(?:https?:\/\/)?(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}(?:\/[^\s[\]]*)?$/;
+
+const OPEN_BRACKET = 91;
+const CLOSE_BRACKET = 93;
+const OPEN_PAREN = 40;
+const CLOSE_PAREN = 41;
+const BACKSLASH = 92;
+const LESS_THAN = 60;
+const GREATER_THAN = 62;
+const DOUBLE_QUOTE = 34;
+const SINGLE_QUOTE = 39;
+const PLUS_SIGN = 43;
+
+function containsLinkSyntax(text: string): boolean {
+	const bracketIndex = text.indexOf('[');
+	if (bracketIndex === -1) return false;
+
+	const closeBracketIndex = text.indexOf(']', bracketIndex);
+	if (closeBracketIndex === -1) return false;
+
+	if (closeBracketIndex + 1 < text.length && text[closeBracketIndex + 1] === '(') {
+		return true;
+	}
+
+	return containsLinkSyntax(text.substring(closeBracketIndex + 1));
+}
+
+export function parseLink(
+	text: string,
+	_parserFlags: number,
+	parseInline: (text: string) => Array<Node>,
+): ParserResult | null {
+	if (text.charCodeAt(0) !== OPEN_BRACKET) return null;
+
+	const linkParts = extractLinkParts(text);
+
+	if (!linkParts) {
+		if (SPOOFED_LINK_PATTERN.test(text)) {
+			return {
+				node: {type: NodeType.Text, content: text},
+				advance: text.length,
+			};
+		}
+
+		const bracketResult = findClosingBracket(text);
+
+		if (bracketResult) {
+			const {bracketPosition, linkText} = bracketResult;
+
+			if (containsLinkSyntax(linkText)) {
+				return {
+					node: {type: NodeType.Text, content: text},
+					advance: text.length,
+				};
+			}
+
+			return {
+				node: {type: NodeType.Text, content: text.slice(0, bracketPosition + 1)},
+				advance: bracketPosition + 1,
+			};
+		}
+
+		return null;
+	}
+
+	try {
+		const normalizedUrl = URLUtils.normalizeUrl(linkParts.url);
+		const isValid = URLUtils.isValidUrl(normalizedUrl);
+
+		if (isValid) {
+			if (linkParts.url.startsWith('/') && !linkParts.url.startsWith('//')) {
+				return {
+					node: {type: NodeType.Text, content: text.slice(0, linkParts.advanceBy)},
+					advance: linkParts.advanceBy,
+				};
+			}
+
+			let finalUrl = normalizedUrl;
+
+			if (finalUrl.startsWith('tel:') || finalUrl.startsWith('sms:')) {
+				const protocol = finalUrl.substring(0, finalUrl.indexOf(':') + 1);
+				const phoneNumber = finalUrl.substring(finalUrl.indexOf(':') + 1);
+
+				if (phoneNumber.startsWith('+')) {
+					const normalizedPhone = URLUtils.normalizePhoneNumber(phoneNumber);
+					finalUrl = protocol + normalizedPhone;
+				}
+			} else {
+				finalUrl = URLUtils.convertToAsciiUrl(finalUrl);
+			}
+
+			const inlineNodes = parseInline(linkParts.linkText);
+
+			return {
+				node: {
+					type: NodeType.Link,
+					text: inlineNodes.length === 1 ? inlineNodes[0] : {type: NodeType.Sequence, children: inlineNodes},
+					url: finalUrl,
+					escaped: linkParts.isEscaped,
+				},
+				advance: linkParts.advanceBy,
+			};
+		}
+	} catch {
+		return {
+			node: {type: NodeType.Text, content: text.slice(0, linkParts.advanceBy)},
+			advance: linkParts.advanceBy,
+		};
+	}
+
+	return null;
+}
+
+function extractLinkParts(text: string): {linkText: string; url: string; isEscaped: boolean; advanceBy: number} | null {
+	const bracketResult = findClosingBracket(text);
+	if (!bracketResult) return null;
+
+	const {bracketPosition, linkText} = bracketResult;
+
+	if (bracketPosition + 1 >= text.length || text.charCodeAt(bracketPosition + 1) !== OPEN_PAREN) return null;
+
+	const trimmedLinkText = linkText.trim();
+
+	if (containsLinkSyntax(trimmedLinkText)) {
+		return null;
+	}
+
+	const isEmailSpoofing = EMAIL_PATTERN.test(trimmedLinkText);
+	const isSpoofedLink = SPOOFED_LINK_PATTERN.test(text);
+
+	if (isEmailSpoofing || isSpoofedLink) {
+		return null;
+	}
+
+	const urlInfo = extractUrl(text, bracketPosition + 2);
+	if (!urlInfo) return null;
+
+	if (urlInfo.url.includes('"') || urlInfo.url.includes("'")) {
+		return null;
+	}
+
+	const isUrlOrDomainLike = URL_DOMAIN_PATTERN.test(trimmedLinkText);
+	const isLinkTextUrlWithProtocol = StringUtils.startsWithUrl(trimmedLinkText);
+
+	if (isLinkTextUrlWithProtocol && isUrlOrDomainLike) {
+		try {
+			const textDomain = extractDomainFromString(trimmedLinkText);
+			const urlDomain = extractDomainFromString(urlInfo.url);
+
+			if (!textDomain || (urlDomain && textDomain !== urlDomain)) {
+				return null;
+			}
+
+			if (shouldTreatAsMaskedLink(trimmedLinkText, urlInfo.url)) {
+				return null;
+			}
+		} catch {
+			return null;
+		}
+	}
+
+	return {
+		linkText,
+		...urlInfo,
+	};
+}
+
+function findClosingBracket(text: string): {bracketPosition: number; linkText: string} | null {
+	let position = 1;
+	let nestedBrackets = 0;
+	const textLength = text.length;
+
+	while (position < textLength) {
+		const currentChar = text.charCodeAt(position);
+
+		if (currentChar === OPEN_BRACKET) {
+			nestedBrackets++;
+			position++;
+		} else if (currentChar === CLOSE_BRACKET) {
+			if (nestedBrackets > 0) {
+				nestedBrackets--;
+				position++;
+			} else {
+				return {
+					bracketPosition: position,
+					linkText: text.slice(1, position),
+				};
+			}
+		} else if (currentChar === BACKSLASH && position + 1 < textLength) {
+			position += 2;
+		} else {
+			position++;
+		}
+
+		if (position > MAX_LINK_URL_LENGTH) break;
+	}
+
+	return null;
+}
+
+function extractUrl(text: string, startPos: number): {url: string; isEscaped: boolean; advanceBy: number} | null {
+	if (startPos >= text.length) return null;
+
+	return text.charCodeAt(startPos) === LESS_THAN
+		? extractEscapedUrl(text, startPos + 1)
+		: extractUnescapedUrl(text, startPos);
+}
+
+function extractEscapedUrl(
+	text: string,
+	urlStart: number,
+): {url: string; isEscaped: boolean; advanceBy: number} | null {
+	const textLength = text.length;
+	let currentPos = urlStart;
+
+	while (currentPos < textLength) {
+		if (text.charCodeAt(currentPos) === GREATER_THAN) {
+			const url = text.slice(urlStart, currentPos);
+
+			currentPos++;
+			while (currentPos < textLength && text.charCodeAt(currentPos) !== CLOSE_PAREN) {
+				currentPos++;
+			}
+
+			return {
+				url,
+				isEscaped: true,
+				advanceBy: currentPos + 1,
+			};
+		}
+		currentPos++;
+	}
+
+	return null;
+}
+
+function extractUnescapedUrl(
+	text: string,
+	urlStart: number,
+): {url: string; isEscaped: boolean; advanceBy: number} | null {
+	const textLength = text.length;
+	let currentPos = urlStart;
+	let nestedParens = 0;
+
+	while (currentPos < textLength) {
+		const currentChar = text.charCodeAt(currentPos);
+
+		if (currentChar === OPEN_PAREN) {
+			nestedParens++;
+			currentPos++;
+		} else if (currentChar === CLOSE_PAREN) {
+			if (nestedParens > 0) {
+				nestedParens--;
+				currentPos++;
+			} else {
+				const url = text.slice(urlStart, currentPos);
+
+				return {
+					url,
+					isEscaped: false,
+					advanceBy: currentPos + 1,
+				};
+			}
+		} else {
+			currentPos++;
+		}
+	}
+
+	return null;
+}
+
+export function extractUrlSegment(text: string, parserFlags: number): ParserResult | null {
+	if (!(parserFlags & ParserFlags.ALLOW_AUTOLINKS)) return null;
+
+	let prefixLength = 0;
+	if (text.startsWith('https://')) {
+		prefixLength = 8;
+	} else if (text.startsWith('http://')) {
+		prefixLength = 7;
+	} else if (text.startsWith(APP_PROTOCOL_PREFIX)) {
+		prefixLength = APP_PROTOCOL_PREFIX.length;
+	} else {
+		return null;
+	}
+
+	let end = prefixLength;
+	const textLength = text.length;
+
+	while (end < textLength && !StringUtils.isUrlTerminationChar(text[end])) {
+		end++;
+		if (end - prefixLength > MAX_LINK_URL_LENGTH) break;
+	}
+
+	let urlString = text.slice(0, end);
+
+	const punctuation = '.,;:!?';
+	while (
+		urlString.length > 0 &&
+		punctuation.includes(urlString[urlString.length - 1]) &&
+		!urlString.match(/\.[a-zA-Z]{2,}$/)
+	) {
+		urlString = urlString.slice(0, -1);
+		end--;
+	}
+
+	const isInQuotes =
+		text.charAt(0) === '"' ||
+		text.charAt(0) === "'" ||
+		(end < textLength && (text.charAt(end) === '"' || text.charAt(end) === "'"));
+
+	try {
+		const normalizedUrl = URLUtils.normalizeUrl(urlString);
+		const isValid = URLUtils.isValidUrl(normalizedUrl);
+
+		if (isValid) {
+			if (normalizedUrl.startsWith('mailto:') || normalizedUrl.startsWith('tel:') || normalizedUrl.startsWith('sms:')) {
+				return null;
+			}
+
+			const finalUrl = URLUtils.convertToAsciiUrl(normalizedUrl);
+
+			return {
+				node: {type: NodeType.Link, text: undefined, url: finalUrl, escaped: isInQuotes},
+				advance: urlString.length,
+			};
+		}
+	} catch (_e) {}
+
+	return null;
+}
+
+export function parseAutolink(text: string, parserFlags: number): ParserResult | null {
+	if (!(parserFlags & ParserFlags.ALLOW_AUTOLINKS)) return null;
+
+	if (text.charCodeAt(0) !== LESS_THAN) return null;
+
+	if (text.length > 1 && (text.charCodeAt(1) === DOUBLE_QUOTE || text.charCodeAt(1) === SINGLE_QUOTE)) {
+		return null;
+	}
+
+	if (!StringUtils.startsWithUrl(text.slice(1))) return null;
+
+	const end = text.indexOf('>', 1);
+	if (end === -1) return null;
+
+	const urlString = text.slice(1, end);
+	if (urlString.length > MAX_LINK_URL_LENGTH) return null;
+
+	try {
+		const normalizedUrl = URLUtils.normalizeUrl(urlString);
+		const isValid = URLUtils.isValidUrl(normalizedUrl);
+
+		if (isValid) {
+			if (normalizedUrl.startsWith('mailto:') || normalizedUrl.startsWith('tel:') || normalizedUrl.startsWith('sms:')) {
+				return null;
+			}
+
+			const finalUrl = URLUtils.convertToAsciiUrl(normalizedUrl);
+
+			return {
+				node: {type: NodeType.Link, text: undefined, url: finalUrl, escaped: true},
+				advance: end + 1,
+			};
+		}
+	} catch (_e) {}
+
+	return null;
+}
+
+export function parseEmailLink(text: string, parserFlags: number): ParserResult | null {
+	if (!(parserFlags & ParserFlags.ALLOW_AUTOLINKS)) return null;
+
+	if (text.charCodeAt(0) !== LESS_THAN) return null;
+
+	const end = text.indexOf('>', 1);
+	if (end === -1) return null;
+
+	const content = text.slice(1, end);
+
+	if (content.startsWith('http://') || content.startsWith('https://')) return null;
+	if (content.charCodeAt(0) === PLUS_SIGN) return null;
+	if (content.indexOf('@') === -1) return null;
+
+	const isValid = URLUtils.isValidEmail(content);
+
+	if (isValid) {
+		return {
+			node: {
+				type: NodeType.Link,
+				text: {type: NodeType.Text, content: content},
+				url: `mailto:${content}`,
+				escaped: true,
+			},
+			advance: end + 1,
+		};
+	}
+
+	return null;
+}
+
+export function parsePhoneLink(text: string, parserFlags: number): ParserResult | null {
+	if (!(parserFlags & ParserFlags.ALLOW_AUTOLINKS)) return null;
+
+	if (text.charCodeAt(0) !== LESS_THAN) return null;
+
+	const end = text.indexOf('>', 1);
+	if (end === -1) return null;
+
+	const content = text.slice(1, end);
+
+	if (content.charCodeAt(0) !== PLUS_SIGN) return null;
+
+	const isValid = URLUtils.isValidPhoneNumber(content);
+
+	if (isValid) {
+		const normalizedPhone = URLUtils.normalizePhoneNumber(content);
+
+		return {
+			node: {
+				type: NodeType.Link,
+				text: {type: NodeType.Text, content: content},
+				url: `tel:${normalizedPhone}`,
+				escaped: true,
+			},
+			advance: end + 1,
+		};
+	}
+
+	return null;
+}
+
+export function parseSmsLink(text: string, parserFlags: number): ParserResult | null {
+	if (!(parserFlags & ParserFlags.ALLOW_AUTOLINKS)) return null;
+
+	if (text.charCodeAt(0) !== LESS_THAN) return null;
+
+	if (!text.startsWith('<sms:')) return null;
+
+	const end = text.indexOf('>', 1);
+	if (end === -1) return null;
+
+	const content = text.slice(1, end);
+	const phoneNumber = content.slice(4);
+
+	if (phoneNumber.charCodeAt(0) !== PLUS_SIGN || !URLUtils.isValidPhoneNumber(phoneNumber)) {
+		return null;
+	}
+
+	const normalizedPhone = URLUtils.normalizePhoneNumber(phoneNumber);
+
+	return {
+		node: {
+			type: NodeType.Link,
+			text: {type: NodeType.Text, content: phoneNumber},
+			url: `sms:${normalizedPhone}`,
+			escaped: true,
+		},
+		advance: end + 1,
+	};
+}
+
+function extractDomainFromString(input: string): string {
+	if (!input) return '';
+
+	try {
+		const normalized = input.normalize('NFKC').trim();
+
+		if (!normalized) {
+			return '';
+		}
+
+		let urlCandidate = normalized;
+		if (!normalized.includes('://')) {
+			urlCandidate = normalized.startsWith('//') ? `https:${normalized}` : `https://${normalized}`;
+		}
+
+		try {
+			const url = new URL(urlCandidate);
+			return url.hostname.toLowerCase();
+		} catch {
+			const match = urlCandidate.match(/^(?:https?:\/\/)([^/?#]+)/i);
+			if (match?.[1]) {
+				return match[1].toLowerCase();
+			}
+			return '';
+		}
+	} catch {
+		return '';
+	}
+}
+
+function shouldTreatAsMaskedLink(trimmedLinkText: string, url: string): boolean {
+	const normalizedText = trimmedLinkText.trim();
+
+	try {
+		const normalizedUrl = URLUtils.normalizeUrl(url);
+		const urlObj = new URL(normalizedUrl);
+		const textUrl = new URL(normalizedText);
+
+		if (
+			urlObj.origin === textUrl.origin &&
+			urlObj.pathname === textUrl.pathname &&
+			urlObj.search === textUrl.search &&
+			urlObj.hash === textUrl.hash
+		) {
+			return false;
+		}
+	} catch {}
+
+	return true;
+}