initial commit
This commit is contained in:
540
fluxer_app/src/lib/markdown/parser/parsers/link-parsers.ts
Normal file
540
fluxer_app/src/lib/markdown/parser/parsers/link-parsers.ts
Normal file
@@ -0,0 +1,540 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Fluxer Contributors
|
||||
*
|
||||
* This file is part of Fluxer.
|
||||
*
|
||||
* Fluxer is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Fluxer is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {APP_PROTOCOL_PREFIX} from '~/utils/appProtocol';
|
||||
import {MAX_LINK_URL_LENGTH} from '../types/constants';
|
||||
import {NodeType, ParserFlags} from '../types/enums';
|
||||
import type {Node, ParserResult} from '../types/nodes';
|
||||
import * as StringUtils from '../utils/string-utils';
|
||||
import * as URLUtils from '../utils/url-utils';
|
||||
|
||||
const SPOOFED_LINK_PATTERN = /^\[https?:\/\/[^\s[\]]+\]\(https?:\/\/[^\s[\]]+\)$/;
|
||||
const EMAIL_PATTERN = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
|
||||
const URL_DOMAIN_PATTERN =
|
||||
/^(?:https?:\/\/)?(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}(?:\/[^\s[\]]*)?$/;
|
||||
|
||||
const OPEN_BRACKET = 91;
|
||||
const CLOSE_BRACKET = 93;
|
||||
const OPEN_PAREN = 40;
|
||||
const CLOSE_PAREN = 41;
|
||||
const BACKSLASH = 92;
|
||||
const LESS_THAN = 60;
|
||||
const GREATER_THAN = 62;
|
||||
const DOUBLE_QUOTE = 34;
|
||||
const SINGLE_QUOTE = 39;
|
||||
const PLUS_SIGN = 43;
|
||||
|
||||
function containsLinkSyntax(text: string): boolean {
|
||||
const bracketIndex = text.indexOf('[');
|
||||
if (bracketIndex === -1) return false;
|
||||
|
||||
const closeBracketIndex = text.indexOf(']', bracketIndex);
|
||||
if (closeBracketIndex === -1) return false;
|
||||
|
||||
if (closeBracketIndex + 1 < text.length && text[closeBracketIndex + 1] === '(') {
|
||||
return true;
|
||||
}
|
||||
|
||||
return containsLinkSyntax(text.substring(closeBracketIndex + 1));
|
||||
}
|
||||
|
||||
export function parseLink(
|
||||
text: string,
|
||||
_parserFlags: number,
|
||||
parseInline: (text: string) => Array<Node>,
|
||||
): ParserResult | null {
|
||||
if (text.charCodeAt(0) !== OPEN_BRACKET) return null;
|
||||
|
||||
const linkParts = extractLinkParts(text);
|
||||
|
||||
if (!linkParts) {
|
||||
if (SPOOFED_LINK_PATTERN.test(text)) {
|
||||
return {
|
||||
node: {type: NodeType.Text, content: text},
|
||||
advance: text.length,
|
||||
};
|
||||
}
|
||||
|
||||
const bracketResult = findClosingBracket(text);
|
||||
|
||||
if (bracketResult) {
|
||||
const {bracketPosition, linkText} = bracketResult;
|
||||
|
||||
if (containsLinkSyntax(linkText)) {
|
||||
return {
|
||||
node: {type: NodeType.Text, content: text},
|
||||
advance: text.length,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
node: {type: NodeType.Text, content: text.slice(0, bracketPosition + 1)},
|
||||
advance: bracketPosition + 1,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const normalizedUrl = URLUtils.normalizeUrl(linkParts.url);
|
||||
const isValid = URLUtils.isValidUrl(normalizedUrl);
|
||||
|
||||
if (isValid) {
|
||||
if (linkParts.url.startsWith('/') && !linkParts.url.startsWith('//')) {
|
||||
return {
|
||||
node: {type: NodeType.Text, content: text.slice(0, linkParts.advanceBy)},
|
||||
advance: linkParts.advanceBy,
|
||||
};
|
||||
}
|
||||
|
||||
let finalUrl = normalizedUrl;
|
||||
|
||||
if (finalUrl.startsWith('tel:') || finalUrl.startsWith('sms:')) {
|
||||
const protocol = finalUrl.substring(0, finalUrl.indexOf(':') + 1);
|
||||
const phoneNumber = finalUrl.substring(finalUrl.indexOf(':') + 1);
|
||||
|
||||
if (phoneNumber.startsWith('+')) {
|
||||
const normalizedPhone = URLUtils.normalizePhoneNumber(phoneNumber);
|
||||
finalUrl = protocol + normalizedPhone;
|
||||
}
|
||||
} else {
|
||||
finalUrl = URLUtils.convertToAsciiUrl(finalUrl);
|
||||
}
|
||||
|
||||
const inlineNodes = parseInline(linkParts.linkText);
|
||||
|
||||
return {
|
||||
node: {
|
||||
type: NodeType.Link,
|
||||
text: inlineNodes.length === 1 ? inlineNodes[0] : {type: NodeType.Sequence, children: inlineNodes},
|
||||
url: finalUrl,
|
||||
escaped: linkParts.isEscaped,
|
||||
},
|
||||
advance: linkParts.advanceBy,
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
return {
|
||||
node: {type: NodeType.Text, content: text.slice(0, linkParts.advanceBy)},
|
||||
advance: linkParts.advanceBy,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractLinkParts(text: string): {linkText: string; url: string; isEscaped: boolean; advanceBy: number} | null {
|
||||
const bracketResult = findClosingBracket(text);
|
||||
if (!bracketResult) return null;
|
||||
|
||||
const {bracketPosition, linkText} = bracketResult;
|
||||
|
||||
if (bracketPosition + 1 >= text.length || text.charCodeAt(bracketPosition + 1) !== OPEN_PAREN) return null;
|
||||
|
||||
const trimmedLinkText = linkText.trim();
|
||||
|
||||
if (containsLinkSyntax(trimmedLinkText)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const isEmailSpoofing = EMAIL_PATTERN.test(trimmedLinkText);
|
||||
const isSpoofedLink = SPOOFED_LINK_PATTERN.test(text);
|
||||
|
||||
if (isEmailSpoofing || isSpoofedLink) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const urlInfo = extractUrl(text, bracketPosition + 2);
|
||||
if (!urlInfo) return null;
|
||||
|
||||
if (urlInfo.url.includes('"') || urlInfo.url.includes("'")) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const isUrlOrDomainLike = URL_DOMAIN_PATTERN.test(trimmedLinkText);
|
||||
const isLinkTextUrlWithProtocol = StringUtils.startsWithUrl(trimmedLinkText);
|
||||
|
||||
if (isLinkTextUrlWithProtocol && isUrlOrDomainLike) {
|
||||
try {
|
||||
const textDomain = extractDomainFromString(trimmedLinkText);
|
||||
const urlDomain = extractDomainFromString(urlInfo.url);
|
||||
|
||||
if (!textDomain || (urlDomain && textDomain !== urlDomain)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (shouldTreatAsMaskedLink(trimmedLinkText, urlInfo.url)) {
|
||||
return null;
|
||||
}
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
linkText,
|
||||
...urlInfo,
|
||||
};
|
||||
}
|
||||
|
||||
function findClosingBracket(text: string): {bracketPosition: number; linkText: string} | null {
|
||||
let position = 1;
|
||||
let nestedBrackets = 0;
|
||||
const textLength = text.length;
|
||||
|
||||
while (position < textLength) {
|
||||
const currentChar = text.charCodeAt(position);
|
||||
|
||||
if (currentChar === OPEN_BRACKET) {
|
||||
nestedBrackets++;
|
||||
position++;
|
||||
} else if (currentChar === CLOSE_BRACKET) {
|
||||
if (nestedBrackets > 0) {
|
||||
nestedBrackets--;
|
||||
position++;
|
||||
} else {
|
||||
return {
|
||||
bracketPosition: position,
|
||||
linkText: text.slice(1, position),
|
||||
};
|
||||
}
|
||||
} else if (currentChar === BACKSLASH && position + 1 < textLength) {
|
||||
position += 2;
|
||||
} else {
|
||||
position++;
|
||||
}
|
||||
|
||||
if (position > MAX_LINK_URL_LENGTH) break;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractUrl(text: string, startPos: number): {url: string; isEscaped: boolean; advanceBy: number} | null {
|
||||
if (startPos >= text.length) return null;
|
||||
|
||||
return text.charCodeAt(startPos) === LESS_THAN
|
||||
? extractEscapedUrl(text, startPos + 1)
|
||||
: extractUnescapedUrl(text, startPos);
|
||||
}
|
||||
|
||||
function extractEscapedUrl(
|
||||
text: string,
|
||||
urlStart: number,
|
||||
): {url: string; isEscaped: boolean; advanceBy: number} | null {
|
||||
const textLength = text.length;
|
||||
let currentPos = urlStart;
|
||||
|
||||
while (currentPos < textLength) {
|
||||
if (text.charCodeAt(currentPos) === GREATER_THAN) {
|
||||
const url = text.slice(urlStart, currentPos);
|
||||
|
||||
currentPos++;
|
||||
while (currentPos < textLength && text.charCodeAt(currentPos) !== CLOSE_PAREN) {
|
||||
currentPos++;
|
||||
}
|
||||
|
||||
return {
|
||||
url,
|
||||
isEscaped: true,
|
||||
advanceBy: currentPos + 1,
|
||||
};
|
||||
}
|
||||
currentPos++;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractUnescapedUrl(
|
||||
text: string,
|
||||
urlStart: number,
|
||||
): {url: string; isEscaped: boolean; advanceBy: number} | null {
|
||||
const textLength = text.length;
|
||||
let currentPos = urlStart;
|
||||
let nestedParens = 0;
|
||||
|
||||
while (currentPos < textLength) {
|
||||
const currentChar = text.charCodeAt(currentPos);
|
||||
|
||||
if (currentChar === OPEN_PAREN) {
|
||||
nestedParens++;
|
||||
currentPos++;
|
||||
} else if (currentChar === CLOSE_PAREN) {
|
||||
if (nestedParens > 0) {
|
||||
nestedParens--;
|
||||
currentPos++;
|
||||
} else {
|
||||
const url = text.slice(urlStart, currentPos);
|
||||
|
||||
return {
|
||||
url,
|
||||
isEscaped: false,
|
||||
advanceBy: currentPos + 1,
|
||||
};
|
||||
}
|
||||
} else {
|
||||
currentPos++;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function extractUrlSegment(text: string, parserFlags: number): ParserResult | null {
|
||||
if (!(parserFlags & ParserFlags.ALLOW_AUTOLINKS)) return null;
|
||||
|
||||
let prefixLength = 0;
|
||||
if (text.startsWith('https://')) {
|
||||
prefixLength = 8;
|
||||
} else if (text.startsWith('http://')) {
|
||||
prefixLength = 7;
|
||||
} else if (text.startsWith(APP_PROTOCOL_PREFIX)) {
|
||||
prefixLength = APP_PROTOCOL_PREFIX.length;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
let end = prefixLength;
|
||||
const textLength = text.length;
|
||||
|
||||
while (end < textLength && !StringUtils.isUrlTerminationChar(text[end])) {
|
||||
end++;
|
||||
if (end - prefixLength > MAX_LINK_URL_LENGTH) break;
|
||||
}
|
||||
|
||||
let urlString = text.slice(0, end);
|
||||
|
||||
const punctuation = '.,;:!?';
|
||||
while (
|
||||
urlString.length > 0 &&
|
||||
punctuation.includes(urlString[urlString.length - 1]) &&
|
||||
!urlString.match(/\.[a-zA-Z]{2,}$/)
|
||||
) {
|
||||
urlString = urlString.slice(0, -1);
|
||||
end--;
|
||||
}
|
||||
|
||||
const isInQuotes =
|
||||
text.charAt(0) === '"' ||
|
||||
text.charAt(0) === "'" ||
|
||||
(end < textLength && (text.charAt(end) === '"' || text.charAt(end) === "'"));
|
||||
|
||||
try {
|
||||
const normalizedUrl = URLUtils.normalizeUrl(urlString);
|
||||
const isValid = URLUtils.isValidUrl(normalizedUrl);
|
||||
|
||||
if (isValid) {
|
||||
if (normalizedUrl.startsWith('mailto:') || normalizedUrl.startsWith('tel:') || normalizedUrl.startsWith('sms:')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const finalUrl = URLUtils.convertToAsciiUrl(normalizedUrl);
|
||||
|
||||
return {
|
||||
node: {type: NodeType.Link, text: undefined, url: finalUrl, escaped: isInQuotes},
|
||||
advance: urlString.length,
|
||||
};
|
||||
}
|
||||
} catch (_e) {}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function parseAutolink(text: string, parserFlags: number): ParserResult | null {
|
||||
if (!(parserFlags & ParserFlags.ALLOW_AUTOLINKS)) return null;
|
||||
|
||||
if (text.charCodeAt(0) !== LESS_THAN) return null;
|
||||
|
||||
if (text.length > 1 && (text.charCodeAt(1) === DOUBLE_QUOTE || text.charCodeAt(1) === SINGLE_QUOTE)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!StringUtils.startsWithUrl(text.slice(1))) return null;
|
||||
|
||||
const end = text.indexOf('>', 1);
|
||||
if (end === -1) return null;
|
||||
|
||||
const urlString = text.slice(1, end);
|
||||
if (urlString.length > MAX_LINK_URL_LENGTH) return null;
|
||||
|
||||
try {
|
||||
const normalizedUrl = URLUtils.normalizeUrl(urlString);
|
||||
const isValid = URLUtils.isValidUrl(normalizedUrl);
|
||||
|
||||
if (isValid) {
|
||||
if (normalizedUrl.startsWith('mailto:') || normalizedUrl.startsWith('tel:') || normalizedUrl.startsWith('sms:')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const finalUrl = URLUtils.convertToAsciiUrl(normalizedUrl);
|
||||
|
||||
return {
|
||||
node: {type: NodeType.Link, text: undefined, url: finalUrl, escaped: true},
|
||||
advance: end + 1,
|
||||
};
|
||||
}
|
||||
} catch (_e) {}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function parseEmailLink(text: string, parserFlags: number): ParserResult | null {
|
||||
if (!(parserFlags & ParserFlags.ALLOW_AUTOLINKS)) return null;
|
||||
|
||||
if (text.charCodeAt(0) !== LESS_THAN) return null;
|
||||
|
||||
const end = text.indexOf('>', 1);
|
||||
if (end === -1) return null;
|
||||
|
||||
const content = text.slice(1, end);
|
||||
|
||||
if (content.startsWith('http://') || content.startsWith('https://')) return null;
|
||||
if (content.charCodeAt(0) === PLUS_SIGN) return null;
|
||||
if (content.indexOf('@') === -1) return null;
|
||||
|
||||
const isValid = URLUtils.isValidEmail(content);
|
||||
|
||||
if (isValid) {
|
||||
return {
|
||||
node: {
|
||||
type: NodeType.Link,
|
||||
text: {type: NodeType.Text, content: content},
|
||||
url: `mailto:${content}`,
|
||||
escaped: true,
|
||||
},
|
||||
advance: end + 1,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function parsePhoneLink(text: string, parserFlags: number): ParserResult | null {
|
||||
if (!(parserFlags & ParserFlags.ALLOW_AUTOLINKS)) return null;
|
||||
|
||||
if (text.charCodeAt(0) !== LESS_THAN) return null;
|
||||
|
||||
const end = text.indexOf('>', 1);
|
||||
if (end === -1) return null;
|
||||
|
||||
const content = text.slice(1, end);
|
||||
|
||||
if (content.charCodeAt(0) !== PLUS_SIGN) return null;
|
||||
|
||||
const isValid = URLUtils.isValidPhoneNumber(content);
|
||||
|
||||
if (isValid) {
|
||||
const normalizedPhone = URLUtils.normalizePhoneNumber(content);
|
||||
|
||||
return {
|
||||
node: {
|
||||
type: NodeType.Link,
|
||||
text: {type: NodeType.Text, content: content},
|
||||
url: `tel:${normalizedPhone}`,
|
||||
escaped: true,
|
||||
},
|
||||
advance: end + 1,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function parseSmsLink(text: string, parserFlags: number): ParserResult | null {
|
||||
if (!(parserFlags & ParserFlags.ALLOW_AUTOLINKS)) return null;
|
||||
|
||||
if (text.charCodeAt(0) !== LESS_THAN) return null;
|
||||
|
||||
if (!text.startsWith('<sms:')) return null;
|
||||
|
||||
const end = text.indexOf('>', 1);
|
||||
if (end === -1) return null;
|
||||
|
||||
const content = text.slice(1, end);
|
||||
const phoneNumber = content.slice(4);
|
||||
|
||||
if (phoneNumber.charCodeAt(0) !== PLUS_SIGN || !URLUtils.isValidPhoneNumber(phoneNumber)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const normalizedPhone = URLUtils.normalizePhoneNumber(phoneNumber);
|
||||
|
||||
return {
|
||||
node: {
|
||||
type: NodeType.Link,
|
||||
text: {type: NodeType.Text, content: phoneNumber},
|
||||
url: `sms:${normalizedPhone}`,
|
||||
escaped: true,
|
||||
},
|
||||
advance: end + 1,
|
||||
};
|
||||
}
|
||||
|
||||
function extractDomainFromString(input: string): string {
|
||||
if (!input) return '';
|
||||
|
||||
try {
|
||||
const normalized = input.normalize('NFKC').trim();
|
||||
|
||||
if (!normalized) {
|
||||
return '';
|
||||
}
|
||||
|
||||
let urlCandidate = normalized;
|
||||
if (!normalized.includes('://')) {
|
||||
urlCandidate = normalized.startsWith('//') ? `https:${normalized}` : `https://${normalized}`;
|
||||
}
|
||||
|
||||
try {
|
||||
const url = new URL(urlCandidate);
|
||||
return url.hostname.toLowerCase();
|
||||
} catch {
|
||||
const match = urlCandidate.match(/^(?:https?:\/\/)([^/?#]+)/i);
|
||||
if (match?.[1]) {
|
||||
return match[1].toLowerCase();
|
||||
}
|
||||
return '';
|
||||
}
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
function shouldTreatAsMaskedLink(trimmedLinkText: string, url: string): boolean {
|
||||
const normalizedText = trimmedLinkText.trim();
|
||||
|
||||
try {
|
||||
const normalizedUrl = URLUtils.normalizeUrl(url);
|
||||
const urlObj = new URL(normalizedUrl);
|
||||
const textUrl = new URL(normalizedText);
|
||||
|
||||
if (
|
||||
urlObj.origin === textUrl.origin &&
|
||||
urlObj.pathname === textUrl.pathname &&
|
||||
urlObj.search === textUrl.search &&
|
||||
urlObj.hash === textUrl.hash
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
} catch {}
|
||||
|
||||
return true;
|
||||
}
|
||||
Reference in New Issue
Block a user