It's time for MVP-ish language auto-detection

This commit is contained in:
Lim Chee Aun 2024-05-28 17:59:17 +08:00
parent d16221e296
commit 0a4aae51b7
5 changed files with 76 additions and 5 deletions

16
package-lock.json generated
View file

@ -38,6 +38,7 @@
"react-router-dom": "6.6.2", "react-router-dom": "6.6.2",
"string-length": "6.0.0", "string-length": "6.0.0",
"swiped-events": "~1.2.0", "swiped-events": "~1.2.0",
"tinyld": "~1.3.4",
"toastify-js": "~1.12.0", "toastify-js": "~1.12.0",
"uid": "~2.0.2", "uid": "~2.0.2",
"use-debounce": "~10.0.0", "use-debounce": "~10.0.0",
@ -8009,6 +8010,21 @@
"node": ">=10" "node": ">=10"
} }
}, },
"node_modules/tinyld": {
"version": "1.3.4",
"resolved": "https://registry.npmjs.org/tinyld/-/tinyld-1.3.4.tgz",
"integrity": "sha512-u26CNoaInA4XpDU+8s/6Cq8xHc2T5M4fXB3ICfXPokUQoLzmPgSZU02TAkFwFMJCWTjk53gtkS8pETTreZwCqw==",
"bin": {
"tinyld": "bin/tinyld.js",
"tinyld-heavy": "bin/tinyld-heavy.js",
"tinyld-light": "bin/tinyld-light.js"
},
"engines": {
"node": ">= 12.10.0",
"npm": ">= 6.12.0",
"yarn": ">= 1.20.0"
}
},
"node_modules/to-fast-properties": { "node_modules/to-fast-properties": {
"version": "2.0.0", "version": "2.0.0",
"resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz", "resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz",

View file

@ -40,6 +40,7 @@
"react-router-dom": "6.6.2", "react-router-dom": "6.6.2",
"string-length": "6.0.0", "string-length": "6.0.0",
"swiped-events": "~1.2.0", "swiped-events": "~1.2.0",
"tinyld": "~1.3.4",
"toastify-js": "~1.12.0", "toastify-js": "~1.12.0",
"uid": "~2.0.2", "uid": "~2.0.2",
"use-debounce": "~10.0.0", "use-debounce": "~10.0.0",

View file

@ -15,6 +15,7 @@ import {
} from 'preact/hooks'; } from 'preact/hooks';
import { useHotkeys } from 'react-hotkeys-hook'; import { useHotkeys } from 'react-hotkeys-hook';
import stringLength from 'string-length'; import stringLength from 'string-length';
import { detectAll } from 'tinyld/light';
import { uid } from 'uid/single'; import { uid } from 'uid/single';
import { useDebouncedCallback, useThrottledCallback } from 'use-debounce'; import { useDebouncedCallback, useThrottledCallback } from 'use-debounce';
import { useSnapshot } from 'valtio'; import { useSnapshot } from 'valtio';
@ -635,6 +636,7 @@ function Compose({
const [showEmoji2Picker, setShowEmoji2Picker] = useState(false); const [showEmoji2Picker, setShowEmoji2Picker] = useState(false);
const [showGIFPicker, setShowGIFPicker] = useState(false); const [showGIFPicker, setShowGIFPicker] = useState(false);
const [autoDetectedLanguages, setAutoDetectedLanguages] = useState(null);
const [topSupportedLanguages, restSupportedLanguages] = useMemo(() => { const [topSupportedLanguages, restSupportedLanguages] = useMemo(() => {
const topLanguages = []; const topLanguages = [];
const restLanguages = []; const restLanguages = [];
@ -645,7 +647,8 @@ function Compose({
code === language || code === language ||
code === prevLanguage.current || code === prevLanguage.current ||
code === DEFAULT_LANG || code === DEFAULT_LANG ||
contentTranslationHideLanguages.includes(code) contentTranslationHideLanguages.includes(code) ||
(autoDetectedLanguages?.length && autoDetectedLanguages.includes(code))
) { ) {
topLanguages.push(l); topLanguages.push(l);
} else { } else {
@ -661,7 +664,7 @@ function Compose({
commonA.localeCompare(commonB), commonA.localeCompare(commonB),
); );
return [topLanguages, restLanguages]; return [topLanguages, restLanguages];
}, [language]); }, [language, autoDetectedLanguages]);
const replyToStatusMonthsAgo = useMemo( const replyToStatusMonthsAgo = useMemo(
() => () =>
@ -1172,6 +1175,11 @@ function Compose({
setShowMentionPicker({ setShowMentionPicker({
defaultSearchTerm: action?.defaultSearchTerm || null, defaultSearchTerm: action?.defaultSearchTerm || null,
}); });
} else if (
action?.name === 'auto-detect-language' &&
action?.languages
) {
setAutoDetectedLanguages(action.languages);
} }
}} }}
/> />
@ -1354,7 +1362,11 @@ function Compose({
)} )}
<label <label
class={`toolbar-button ${ class={`toolbar-button ${
language !== prevLanguage.current ? 'highlight' : '' language !== prevLanguage.current ||
(autoDetectedLanguages?.length &&
autoDetectedLanguages.includes(language))
? 'highlight'
: ''
}`} }`}
> >
<span class="icon-text"> <span class="icon-text">
@ -1577,6 +1589,15 @@ const getCustomEmojis = pmem(_getCustomEmojis, {
maxAge: 30 * 60 * 1000, // 30 minutes maxAge: 30 * 60 * 1000, // 30 minutes
}); });
const detectLangs = (text) => {
const langs = detectAll(text);
if (langs?.length) {
// return max 2
return langs.slice(0, 2).map((lang) => lang.lang);
}
return null;
};
const Textarea = forwardRef((props, ref) => { const Textarea = forwardRef((props, ref) => {
const { masto, instance } = api(); const { masto, instance } = api();
const [text, setText] = useState(ref.current?.value || ''); const [text, setText] = useState(ref.current?.value || '');
@ -1845,6 +1866,17 @@ const Textarea = forwardRef((props, ref) => {
// Newline to prevent multiple line breaks at the end from being collapsed, no idea why // Newline to prevent multiple line breaks at the end from being collapsed, no idea why
}, 500); }, 500);
const debouncedAutoDetectLanguage = useDebouncedCallback((text) => {
if (!text) return;
const langs = detectLangs(text);
if (langs?.length) {
onTrigger?.({
name: 'auto-detect-language',
languages: langs,
});
}
}, 1000);
return ( return (
<text-expander <text-expander
ref={textExpanderRef} ref={textExpanderRef}
@ -1912,6 +1944,7 @@ const Textarea = forwardRef((props, ref) => {
autoResizeTextarea(target); autoResizeTextarea(target);
props.onInput?.(e); props.onInput?.(e);
throttleHighlightText(text); throttleHighlightText(text);
debouncedAutoDetectLanguage(text);
}} }}
style={{ style={{
width: '100%', width: '100%',

View file

@ -23,6 +23,7 @@ import {
} from 'preact/hooks'; } from 'preact/hooks';
import punycode from 'punycode'; import punycode from 'punycode';
import { useHotkeys } from 'react-hotkeys-hook'; import { useHotkeys } from 'react-hotkeys-hook';
import { detectAll } from 'tinyld/light';
import { useLongPress } from 'use-long-press'; import { useLongPress } from 'use-long-press';
import { useSnapshot } from 'valtio'; import { useSnapshot } from 'valtio';
@ -46,6 +47,7 @@ import handleContentLinks from '../utils/handle-content-links';
import htmlContentLength from '../utils/html-content-length'; import htmlContentLength from '../utils/html-content-length';
import isMastodonLinkMaybe from '../utils/isMastodonLinkMaybe'; import isMastodonLinkMaybe from '../utils/isMastodonLinkMaybe';
import localeMatch from '../utils/locale-match'; import localeMatch from '../utils/locale-match';
import mem from '../utils/mem';
import niceDateTime from '../utils/nice-date-time'; import niceDateTime from '../utils/nice-date-time';
import openCompose from '../utils/open-compose'; import openCompose from '../utils/open-compose';
import pmem from '../utils/pmem'; import pmem from '../utils/pmem';
@ -158,6 +160,18 @@ const SIZE_CLASS = {
l: 'large', l: 'large',
}; };
const detectLang = mem((text) => {
const langs = detectAll(text);
const lang = langs[0];
if (lang?.lang && lang?.accuracy > 0.5) {
// If > 50% accurate, use it
// It can be accurate if < 50% but better be safe
// Though > 50% also can be inaccurate 🤷
return lang.lang;
}
return null;
});
function Status({ function Status({
statusID, statusID,
status, status,
@ -242,7 +256,7 @@ function Status({
sensitive, sensitive,
spoilerText, spoilerText,
visibility, // public, unlisted, private, direct visibility, // public, unlisted, private, direct
language, language: _language,
editedAt, editedAt,
filtered, filtered,
card, card,
@ -265,6 +279,9 @@ function Status({
emojiReactions, emojiReactions,
} = status; } = status;
let languageAutoDetected = content && detectLang(getHTMLText(content));
const language = _language || languageAutoDetected;
// if (!mediaAttachments?.length) mediaFirst = false; // if (!mediaAttachments?.length) mediaFirst = false;
const hasMediaAttachments = !!mediaAttachments?.length; const hasMediaAttachments = !!mediaAttachments?.length;
if (mediaFirst && hasMediaAttachments) size = 's'; if (mediaFirst && hasMediaAttachments) size = 's';
@ -1898,6 +1915,7 @@ function Status({
forceTranslate={forceTranslate || inlineTranslate} forceTranslate={forceTranslate || inlineTranslate}
mini={!isSizeLarge && !withinContext} mini={!isSizeLarge && !withinContext}
sourceLanguage={language} sourceLanguage={language}
autoDetected={languageAutoDetected}
text={getPostText(status)} text={getPostText(status)}
/> />
)} )}

View file

@ -77,6 +77,7 @@ function TranslationBlock({
onTranslate, onTranslate,
text = '', text = '',
mini, mini,
autoDetected,
}) { }) {
const targetLang = getTranslateTargetLanguage(true); const targetLang = getTranslateTargetLanguage(true);
const [uiState, setUIState] = useState('default'); const [uiState, setUIState] = useState('default');
@ -187,7 +188,9 @@ function TranslationBlock({
{uiState === 'loading' {uiState === 'loading'
? 'Translating…' ? 'Translating…'
: sourceLanguage && sourceLangText && !detectedLang : sourceLanguage && sourceLangText && !detectedLang
? `Translate from ${sourceLangText}` ? autoDetected
? `Translate from ${sourceLangText} (auto-detected)`
: `Translate from ${sourceLangText}`
: `Translate`} : `Translate`}
</span> </span>
</button> </button>