Safeguard for unknown HTML cases in status content

1. Mark up code blocks first because anything inside code blocks cannot be "enhanced".
2. Set default rejects for elements that don't need to be "enhanced". Not a complete list but best-effort. Probably allowlist works better than blocklist, but some content doesn't even start with any parent nodes.
This commit is contained in:
Lim Chee Aun 2022-12-23 09:20:34 +08:00
parent 206d40c0a4
commit 31e2196cd6

View file

@ -29,20 +29,6 @@ function enhanceContent(content, opts = {}) {
node.replaceWith(...nodes); node.replaceWith(...nodes);
}); });
// INLINE CODE
// ===========
// Convert `code` to <code>code</code>
textNodes = extractTextNodes(dom);
textNodes.forEach((node) => {
let html = node.nodeValue.replace(/</g, '&lt;').replace(/>/g, '&gt;');
if (/`[^`]+`/g.test(html)) {
html = html.replaceAll(/(`[^]+?`)/g, '<code>$1</code>');
}
fauxDiv.innerHTML = html;
const nodes = Array.from(fauxDiv.childNodes);
node.replaceWith(...nodes);
});
// CODE BLOCKS // CODE BLOCKS
// =========== // ===========
// Convert ```code``` to <pre><code>code</code></pre> // Convert ```code``` to <pre><code>code</code></pre>
@ -57,10 +43,26 @@ function enhanceContent(content, opts = {}) {
block.replaceWith(pre); block.replaceWith(pre);
}); });
// INLINE CODE
// ===========
// Convert `code` to <code>code</code>
textNodes = extractTextNodes(dom);
textNodes.forEach((node) => {
let html = node.nodeValue.replace(/</g, '&lt;').replace(/>/g, '&gt;');
if (/`[^`]+`/g.test(html)) {
html = html.replaceAll(/(`[^]+?`)/g, '<code>$1</code>');
}
fauxDiv.innerHTML = html;
const nodes = Array.from(fauxDiv.childNodes);
node.replaceWith(...nodes);
});
// TWITTER USERNAMES // TWITTER USERNAMES
// ================= // =================
// Convert @username@twitter.com to <a href="https://twitter.com/username">@username@twitter.com</a> // Convert @username@twitter.com to <a href="https://twitter.com/username">@username@twitter.com</a>
textNodes = extractTextNodes(dom); textNodes = extractTextNodes(dom, {
rejectFilter: ['A'],
});
textNodes.forEach((node) => { textNodes.forEach((node) => {
let html = node.nodeValue.replace(/</g, '&lt;').replace(/>/g, '&gt;'); let html = node.nodeValue.replace(/</g, '&lt;').replace(/>/g, '&gt;');
if (/@[a-zA-Z0-9_]+@twitter\.com/g.test(html)) { if (/@[a-zA-Z0-9_]+@twitter\.com/g.test(html)) {
@ -83,12 +85,60 @@ function enhanceContent(content, opts = {}) {
return enhancedContent; return enhancedContent;
} }
function extractTextNodes(dom) { const defaultRejectFilter = [
// Document metadata
'STYLE',
// Image and multimedia
'IMG',
'VIDEO',
'AUDIO',
'AREA',
'MAP',
'TRACK',
// Embedded content
'EMBED',
'IFRAME',
'OBJECT',
'PICTURE',
'PORTAL',
'SOURCE',
// SVG and MathML
'SVG',
'MATH',
// Scripting
'CANVAS',
'NOSCRIPT',
'SCRIPT',
// Forms
'INPUT',
'OPTION',
'TEXTAREA',
// Web Components
'SLOT',
'TEMPLATE',
];
const defaultRejectFilterMap = Object.fromEntries(
defaultRejectFilter.map((nodeName) => [nodeName, true]),
);
function extractTextNodes(dom, opts = {}) {
const textNodes = []; const textNodes = [];
const walk = document.createTreeWalker( const walk = document.createTreeWalker(
dom, dom,
NodeFilter.SHOW_TEXT, NodeFilter.SHOW_TEXT,
null, {
acceptNode(node) {
if (defaultRejectFilterMap[node.parentNode.nodeName]) {
return NodeFilter.FILTER_REJECT;
}
if (
opts.rejectFilter &&
opts.rejectFilter.includes(node.parentNode.nodeName)
) {
return NodeFilter.FILTER_REJECT;
}
return NodeFilter.FILTER_ACCEPT;
},
},
false, false,
); );
let node; let node;