From 31e2196cd6eecf8c87e61dab9d329affdffb0453 Mon Sep 17 00:00:00 2001 From: Lim Chee Aun Date: Fri, 23 Dec 2022 09:20:34 +0800 Subject: [PATCH] Safeguard for unknown HTML cases in status content 1. Mark up code blocks first because anything inside code blocks cannot be "enhanced". 2. Set default rejects for elements that don't need to be "enhanced". Not a complete list but best-effort. Probably allowlist works better than blocklist, but some content doesn't even start with any parent nodes. --- src/utils/enhance-content.js | 84 ++++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 17 deletions(-) diff --git a/src/utils/enhance-content.js b/src/utils/enhance-content.js index 417c03c2..152df8a8 100644 --- a/src/utils/enhance-content.js +++ b/src/utils/enhance-content.js @@ -29,20 +29,6 @@ function enhanceContent(content, opts = {}) { node.replaceWith(...nodes); }); - // INLINE CODE - // =========== - // Convert `code` to code - textNodes = extractTextNodes(dom); - textNodes.forEach((node) => { - let html = node.nodeValue.replace(//g, '>'); - if (/`[^`]+`/g.test(html)) { - html = html.replaceAll(/(`[^]+?`)/g, '$1'); - } - fauxDiv.innerHTML = html; - const nodes = Array.from(fauxDiv.childNodes); - node.replaceWith(...nodes); - }); - // CODE BLOCKS // =========== // Convert ```code``` to
code
@@ -57,10 +43,26 @@ function enhanceContent(content, opts = {}) { block.replaceWith(pre); }); + // INLINE CODE + // =========== + // Convert `code` to code + textNodes = extractTextNodes(dom); + textNodes.forEach((node) => { + let html = node.nodeValue.replace(//g, '>'); + if (/`[^`]+`/g.test(html)) { + html = html.replaceAll(/(`[^]+?`)/g, '$1'); + } + fauxDiv.innerHTML = html; + const nodes = Array.from(fauxDiv.childNodes); + node.replaceWith(...nodes); + }); + // TWITTER USERNAMES // ================= // Convert @username@twitter.com to @username@twitter.com - textNodes = extractTextNodes(dom); + textNodes = extractTextNodes(dom, { + rejectFilter: ['A'], + }); textNodes.forEach((node) => { let html = node.nodeValue.replace(//g, '>'); if (/@[a-zA-Z0-9_]+@twitter\.com/g.test(html)) { @@ -83,12 +85,60 @@ function enhanceContent(content, opts = {}) { return enhancedContent; } -function extractTextNodes(dom) { +const defaultRejectFilter = [ + // Document metadata + 'STYLE', + // Image and multimedia + 'IMG', + 'VIDEO', + 'AUDIO', + 'AREA', + 'MAP', + 'TRACK', + // Embedded content + 'EMBED', + 'IFRAME', + 'OBJECT', + 'PICTURE', + 'PORTAL', + 'SOURCE', + // SVG and MathML + 'SVG', + 'MATH', + // Scripting + 'CANVAS', + 'NOSCRIPT', + 'SCRIPT', + // Forms + 'INPUT', + 'OPTION', + 'TEXTAREA', + // Web Components + 'SLOT', + 'TEMPLATE', +]; +const defaultRejectFilterMap = Object.fromEntries( + defaultRejectFilter.map((nodeName) => [nodeName, true]), +); +function extractTextNodes(dom, opts = {}) { const textNodes = []; const walk = document.createTreeWalker( dom, NodeFilter.SHOW_TEXT, - null, + { + acceptNode(node) { + if (defaultRejectFilterMap[node.parentNode.nodeName]) { + return NodeFilter.FILTER_REJECT; + } + if ( + opts.rejectFilter && + opts.rejectFilter.includes(node.parentNode.nodeName) + ) { + return NodeFilter.FILTER_REJECT; + } + return NodeFilter.FILTER_ACCEPT; + }, + }, false, ); let node;