mirror of
https://github.com/streetwriters/notesnook.git
synced 2025-12-23 23:19:40 +01:00
web: add support for CJK characters in word counter (#1840)
Co-authored-by: Abdullah Atta <abdullahatta@streetwriters.co>
This commit is contained in:
@@ -406,6 +406,7 @@ function getSelectedWords(
|
||||
function countWords(str: string) {
|
||||
let count = 0;
|
||||
let shouldCount = false;
|
||||
let isScript = false;
|
||||
|
||||
for (let i = 0; i < str.length; ++i) {
|
||||
const s = str[i];
|
||||
@@ -416,9 +417,10 @@ function countWords(str: string) {
|
||||
s === "\n" ||
|
||||
s === "*" ||
|
||||
s === "/" ||
|
||||
s === "&"
|
||||
s === "&" ||
|
||||
(isScript = isCJKChar(s))
|
||||
) {
|
||||
if (!shouldCount) continue;
|
||||
if (!shouldCount && !isScript) continue;
|
||||
++count;
|
||||
shouldCount = false;
|
||||
} else {
|
||||
@@ -430,3 +432,33 @@ function countWords(str: string) {
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
// Taken from: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs
|
||||
const CJK_UNICODE_RANGES = [
|
||||
[19968, 40959], // CJK Unified Ideographs 4E00-9FFF Common
|
||||
[13312, 19903], // CJK Unified Ideographs Extension A 3400-4DBF Rare
|
||||
[131072, 173791], // CJK Unified Ideographs Extension B 20000-2A6DF Rare, historic
|
||||
[173824, 177983], // CJK Unified Ideographs Extension C 2A700–2B73F Rare, historic
|
||||
[177984, 178207], // CJK Unified Ideographs Extension D 2B740–2B81F Uncommon, some in current use
|
||||
[178208, 183983], // CJK Unified Ideographs Extension E 2B820–2CEAF Rare, historic
|
||||
[183984, 191471], // CJK Unified Ideographs Extension F 2CEB0–2EBEF Rare, historic
|
||||
[196608, 201551], // CJK Unified Ideographs Extension G 30000–3134F Rare, historic
|
||||
[201552, 205743], // CJK Unified Ideographs Extension H 31350–323AF Rare, historic
|
||||
[63744, 64255], // CJK Compatibility Ideographs F900-FAFF Duplicates, unifiable variants, corporate characters
|
||||
[194560, 195103], // CJK Compatibility Ideographs Supplement 2F800-2FA1F Unifiable variants
|
||||
[12032, 12255], // CJK Radicals / Kangxi Radicals 2F00–2FDF
|
||||
[11904, 12031], // CJK Radicals Supplement 2E80–2EFF
|
||||
[12288, 12351], // CJK Symbols and Punctuation 3000–303F
|
||||
[13056, 13311], // CJK Compatibility 3300-33FF
|
||||
[65072, 65103] // CJK Compatibility Forms FE30-FE4F
|
||||
];
|
||||
|
||||
function isCJKChar(char: string) {
|
||||
const code = char.charCodeAt(0);
|
||||
const isIn = CJK_UNICODE_RANGES.some(
|
||||
(range) => code >= range[0] && code <= range[1]
|
||||
);
|
||||
console.log(char, code, isIn);
|
||||
|
||||
return isIn;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user