diff --git a/src/components/compose.jsx b/src/components/compose.jsx index e0842c3f..cbad4db8 100644 --- a/src/components/compose.jsx +++ b/src/components/compose.jsx @@ -1866,7 +1866,16 @@ const Textarea = forwardRef((props, ref) => { // Newline to prevent multiple line breaks at the end from being collapsed, no idea why }, 500); - const debouncedAutoDetectLanguage = useDebouncedCallback((text) => { + const debouncedAutoDetectLanguage = useDebouncedCallback(() => { + // Make use of the highlightRef to get the DOM + // Clone the dom + const dom = composeHighlightRef.current?.cloneNode(true); + if (!dom) return; + // Remove mark + dom.querySelectorAll('mark').forEach((mark) => { + mark.remove(); + }); + const text = dom.innerText?.trim(); if (!text) return; const langs = detectLangs(text); if (langs?.length) { @@ -1875,7 +1884,7 @@ const Textarea = forwardRef((props, ref) => { languages: langs, }); } - }, 1000); + }, 2000); return ( { autoResizeTextarea(target); props.onInput?.(e); throttleHighlightText(text); - debouncedAutoDetectLanguage(text); + debouncedAutoDetectLanguage(); }} style={{ width: '100%', diff --git a/src/components/status.jsx b/src/components/status.jsx index 4c782b01..921769ae 100644 --- a/src/components/status.jsx +++ b/src/components/status.jsx @@ -161,6 +161,8 @@ const SIZE_CLASS = { }; const detectLang = mem((text) => { + text = text?.trim(); + // Ref: https://github.com/komodojp/tinyld/blob/develop/docs/benchmark.md // 500 should be enough for now, also the default max chars for Mastodon if (text?.length > 500) { @@ -284,7 +286,40 @@ function Status({ emojiReactions, } = status; - let languageAutoDetected = content && detectLang(getHTMLText(content)); + const [languageAutoDetected, setLanguageAutoDetected] = useState(null); + useEffect(() => { + if (!content) return; + if (_language) return; + let timer; + timer = setTimeout(() => { + let detected = detectLang( + getHTMLText(content, { + preProcess: (dom) => { + // Remove anything that can skew the language detection + + // Remove .mention, .hashtag, pre, code, a:has(.invisible) + dom + .querySelectorAll( + '.mention, .hashtag, pre, code, a:has(.invisible)', + ) + .forEach((a) => { + a.remove(); + }); + + // Remove links that contains text that starts with https?:// + dom.querySelectorAll('a').forEach((a) => { + const text = a.innerText.trim(); + if (text.startsWith('https://') || text.startsWith('http://')) { + a.remove(); + } + }); + }, + }), + ); + setLanguageAutoDetected(detected); + }, 1000); + return () => clearTimeout(timer); + }, [content, _language]); const language = _language || languageAutoDetected; // if (!mediaAttachments?.length) mediaFirst = false; diff --git a/src/utils/getHTMLText.jsx b/src/utils/getHTMLText.jsx index 9ad74a3c..eadf9640 100644 --- a/src/utils/getHTMLText.jsx +++ b/src/utils/getHTMLText.jsx @@ -1,8 +1,10 @@ import mem from './mem'; const div = document.createElement('div'); -function getHTMLText(html) { +function getHTMLText(html, opts) { if (!html) return ''; + const { preProcess } = opts || {}; + div.innerHTML = html .replace(/<\/p>/g, '

\n\n') .replace(/<\/li>/g, '\n'); @@ -10,6 +12,8 @@ function getHTMLText(html) { br.replaceWith('\n'); }); + preProcess?.(div); + // MASTODON-SPECIFIC classes // Remove .invisible div.querySelectorAll('.invisible').forEach((el) => {