I don’t think there is a ready method within Java to do this automatically. The best solution is manual implementation.
Just taking advantage of the topic, to be a future reference for other users, I’ve come across the same problem, only in another language: Javascript.
At the time, my solution was to adapt the implementation of Mono (C#) to Javascript, through these three links (it is worth mentioning that the values of the constants used by Mono nay are the same values as the constants used by Java):
Char.Cs
Unicodecategory.Cs
mono-devel-list
Just for the sake of completeness, my Javascript code looked like this at the time:
"use strict";
window.StringUtils = {
UnicodeCategory: {
UppercaseLetter: 0,
LowercaseLetter: 1,
TitlecaseLetter: 2,
ModifierLetter: 3,
OtherLetter: 4,
NonSpacingMark: 5,
SpacingCombiningMark: 6,
EnclosingMark: 7,
DecimalDigitNumber: 8,
LetterNumber: 9,
OtherNumber: 10,
SpaceSeparator: 11,
LineSeparator: 12,
ParagraphSeparator: 13,
Control: 14,
Format: 15,
Surrogate: 16,
PrivateUse: 17,
ConnectorPunctuation: 18,
DashPunctuation: 19,
OpenPunctuation: 20,
ClosePunctuation: 21,
InitialQuotePunctuation: 22,
FinalQuotePunctuation: 23,
OtherPunctuation: 24,
MathSymbol: 25,
CurrencySymbol: 26,
ModifierSymbol: 27,
OtherSymbol: 28,
OtherNotAssigned: 29
},
UnicodeCategoryStr: [
"UppercaseLetter",
"LowercaseLetter",
"TitlecaseLetter",
"ModifierLetter",
"OtherLetter",
"NonSpacingMark",
"SpacingCombiningMark",
"EnclosingMark",
"DecimalDigitNumber",
"LetterNumber",
"OtherNumber",
"SpaceSeparator",
"LineSeparator",
"ParagraphSeparator",
"Control",
"Format",
"Surrogate",
"PrivateUse",
"ConnectorPunctuation",
"DashPunctuation",
"OpenPunctuation",
"ClosePunctuation",
"InitialQuotePunctuation",
"FinalQuotePunctuation",
"OtherPunctuation",
"MathSymbol",
"CurrencySymbol",
"ModifierSymbol",
"OtherSymbol",
"OtherNotAssigned"
],
_highByteToSegment: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 8, 8, 8, 8, 8, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 8, 8, 8, 8, 8, 36, 37, 38, 39, 40, 41, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 42, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 43, 21, 21, 21, 21, 44, 8, 8, 8, 8, 8, 8, 8, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 45, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 21, 48, 49, 21, 50, 51, 52, ],
_segmentToStride: [ 0, 0, 0, 0, 8, 16, 24, 32, 40, 48, 48, 56, 64, 72, 72, 80, 0, 0, 0, 0, 88, 96, 104, 112, 48, 48, 120, 128, 72, 72, 136, 72,
144, 144, 144, 144, 144, 144, 144, 152, 152, 160, 144, 144, 144, 144, 144, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 152, 256, 144, 144, 264, 144,
144, 144, 144, 144, 272, 144, 280, 288, 288, 288, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 296, 304, 312, 320, 328, 320, 328, 336, 344, 288, 288,
352, 352, 352, 352, 352, 352, 352, 352, 352, 360, 288, 288, 368, 288, 376, 384, 392, 400, 408, 48, 416, 424, 72, 72, 72, 432, 440, 272, 144, 144, 448, 288,
48, 48, 48, 48, 48, 48, 72, 72, 72, 72, 72, 72, 144, 144, 144, 144, 456, 464, 144, 144, 144, 144, 144, 144, 472, 480, 144, 144, 144, 144, 488, 496,
288, 288, 288, 288, 288, 288, 504, 48, 48, 48, 512, 520, 528, 72, 72, 72, 72, 536, 544, 352, 552, 352, 352, 560, 568, 288, 576, 576, 576, 584, 592, 288,
288, 600, 288, 608, 616, 576, 576, 584, 624, 632, 640, 288, 24, 648, 656, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 664, 672, 680, 688, 24, 696,
704, 712, 720, 576, 576, 728, 352, 352, 352, 368, 288, 288, 288, 288, 288, 288, 576, 576, 576, 576, 736, 352, 744, 288, 288, 288, 288, 288, 288, 288, 288, 288,
288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288,
752, 576, 576, 576, 576, 576, 576, 760, 768, 776, 784, 576, 792, 24, 800, 288, 808, 816, 824, 576, 576, 832, 840, 848, 856, 864, 872, 880, 888, 24, 896, 904,
912, 920, 824, 576, 576, 832, 928, 848, 936, 944, 288, 952, 960, 24, 968, 288, 752, 976, 984, 576, 576, 832, 992, 760, 1000, 1008, 1016, 288, 1024, 24, 288, 288,
808, 816, 824, 576, 576, 832, 1032, 1040, 1048, 864, 1056, 880, 1064, 24, 1072, 288, 1080, 1088, 1096, 1104, 1112, 1088, 1120, 1128, 1136, 1144, 872, 288, 1152, 24, 1160, 288,
1168, 1176, 832, 576, 576, 832, 1184, 1192, 1200, 1208, 1216, 288, 1064, 24, 288, 288, 1224, 1176, 832, 576, 576, 832, 1184, 1232, 1240, 1248, 1256, 1264, 1064, 24, 288, 288,
1224, 1176, 832, 576, 576, 832, 576, 1128, 1272, 1144, 872, 288, 1064, 24, 288, 288, 1224, 576, 1280, 1288, 576, 576, 984, 1296, 1280, 1304, 1312, 1320, 288, 288, 1328, 288,
616, 576, 576, 576, 576, 576, 1336, 1344, 1352, 1360, 24, 1368, 288, 288, 288, 288, 1376, 1384, 1392, 616, 1400, 1408, 1336, 1416, 1424, 640, 24, 1432, 288, 288, 288, 288,
1440, 704, 1448, 1456, 24, 1464, 1472, 1480, 576, 616, 576, 576, 576, 584, 544, 1488, 1496, 1504, 352, 544, 352, 352, 352, 1512, 1520, 1528, 288, 288, 288, 288, 288, 288,
576, 576, 576, 576, 984, 1536, 1544, 1552, 24, 1560, 1568, 1576, 288, 288, 288, 288, 288, 288, 288, 288, 48, 48, 48, 48, 1584, 288, 576, 576, 576, 576, 1280, 1592,
576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 1600, 576, 576, 576, 576, 576, 576, 576, 576, 584, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 1608,
1280, 576, 576, 576, 576, 576, 576, 576, 1280, 1096, 1280, 1096, 576, 576, 576, 576, 1280, 1096, 576, 576, 576, 1280, 1096, 1280, 1096, 1280, 1280, 576, 576, 1280, 576, 576,
576, 1280, 1096, 1280, 576, 576, 576, 576, 1280, 576, 576, 584, 1616, 1624, 1464, 1632, 288, 288, 288, 288, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 728, 288,
616, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576,
576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576,
576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 1640, 1280, 288, 1648, 576, 576, 1656, 576, 576, 576, 576, 576, 576, 576, 576, 576, 1664, 1672, 288,
288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 576, 576, 576, 576, 576, 576, 1680, 1688, 1696, 768, 1704, 1712, 24, 1720, 288, 288,
1728, 1736, 24, 1720, 576, 576, 576, 576, 1744, 576, 576, 576, 576, 576, 576, 288, 576, 576, 576, 576, 576, 1752, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288,
144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 1760, 448, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 496,
72, 48, 296, 1584, 72, 48, 72, 48, 296, 1584, 72, 1768, 72, 48, 72, 296, 72, 1776, 72, 1776, 72, 1776, 1784, 1792, 1800, 1808, 1816, 1824, 72, 1832, 1840, 1848,
1856, 1864, 1872, 1880, 704, 1888, 704, 1896, 1904, 1912, 288, 288, 288, 1920, 1928, 1936, 1944, 1952, 288, 288, 1960, 1960, 288, 288, 288, 288, 352, 1968, 1976, 288, 288, 288,
1984, 1992, 2000, 2008, 2016, 2024, 2032, 2040, 288, 288, 2048, 1944, 2056, 2056, 2056, 2056, 2064, 288, 2072, 2080, 2088, 2096, 2104, 2104, 2104, 2112, 2120, 2104, 2104, 2104, 2128, 288,
2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2136, 2144, 288,
2104, 2152, 2104, 2104, 2160, 2168, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2176, 2104, 2104, 2104, 2184, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288,
2104, 2104, 2104, 2104, 2192, 288, 288, 288, 2104, 2184, 288, 288, 1944, 1944, 1944, 1944, 1944, 1944, 1944, 2200, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2208, 288, 288,
2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2216, 288, 2104, 2104, 2224, 2104, 2232, 2104, 2104, 2104, 2104, 2104, 2104, 288,
2104, 2104, 2128, 2240, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2224, 2248, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288,
2256, 2264, 2104, 2104, 2104, 2240, 2104, 2104, 2104, 2272, 2280, 2192, 2240, 288, 2288, 1944, 1944, 1944, 2296, 2104, 2104, 2104, 2240, 2192, 288, 288, 288, 288, 288, 288, 288, 288,
2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104,
288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 2104, 2104, 2104, 2304, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2128, 288,
2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2216, 288, 288, 288, 2104, 2128,
2312, 2320, 2328, 2336, 2344, 2352, 2360, 2368, 616, 576, 576, 576, 576, 576, 576, 576, 576, 576, 728, 2376, 616, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 2384,
2392, 576, 576, 576, 576, 728, 616, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 1280, 2400, 2104, 576, 576, 576, 288, 288, 288, 288, 288, 288, 288, 288, 288,
2104, 2104, 2104, 2408, 1944, 2416, 2104, 2104, 2128, 288, 288, 288, 2104, 2104, 2104, 2424, 1944, 2416, 2104, 2104, 2104, 2104, 1072, 288, 2104, 2128, 2104, 2104, 2104, 2104, 2104, 2192,
2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2192, 2432, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2104, 2216, 2104, 2104, 2104, 2192,
576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 2440, 288, 288, 288, 288, 288, 288, 288, 288, 288,
576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 2440, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288,
576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 728, 2104, 2104, 2264, 2104, 2176, 2104, 2448, 288, 288, 288, 288, 288, 288, 288,
576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 1504, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288,
2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456, 2456,
2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464, 2464,
576, 576, 576, 576, 576, 2440, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288, 288,
432, 288, 2472, 2480, 576, 2488, 1280, 2496, 2504, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 1608, 288, 288, 288, 2512, 576, 576, 576, 576, 576,
576, 576, 576, 576, 576, 576, 576, 2520, 288, 288, 576, 576, 576, 576, 576, 576, 576, 576, 1288, 576, 576, 576, 576, 576, 576, 288, 288, 288, 288, 288, 576, 1504,
288, 288, 288, 288, 2528, 288, 2536, 2544, 2552, 2560, 2568, 2576, 2584, 2592, 2600, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 2608,
2616, 16, 24, 32, 40, 48, 48, 56, 64, 72, 72, 2624, 2632, 576, 624, 576, 576, 576, 576, 2640, 576, 576, 576, 1280, 1288, 1288, 1288, 2648, 2656, 2664, 288, 2672 ],
_strideData: [ 14, 14, 14, 14, 14, 14, 14, 14,
11, 24, 24, 24, 26, 24, 24, 24,
20, 21, 24, 25, 24, 19, 24, 24,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 24, 24, 25, 25, 25, 24,
24, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 20, 24, 21, 27, 18,
27, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 20, 25, 21, 25, 14,
11, 24, 26, 26, 26, 26, 28, 28,
27, 28, 1, 22, 25, 19, 28, 27,
28, 25, 10, 10, 27, 1, 28, 24,
27, 10, 1, 23, 10, 10, 10, 24,
0, 0, 0, 0, 0, 0, 0, 25,
0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 1, 1, 1, 25,
0, 1, 0, 1, 0, 1, 0, 1,
1, 0, 1, 0, 1, 0, 1, 0,
1, 1, 0, 1, 0, 1, 0, 1,
0, 0, 1, 0, 1, 0, 1, 1,
1, 0, 0, 1, 0, 1, 0, 0,
1, 0, 0, 0, 1, 1, 0, 0,
0, 0, 1, 0, 0, 1, 0, 0,
0, 1, 1, 1, 0, 0, 1, 0,
0, 1, 0, 1, 0, 1, 0, 0,
1, 0, 1, 1, 0, 1, 0, 0,
1, 0, 0, 0, 1, 0, 1, 0,
0, 1, 1, 4, 0, 1, 1, 1,
4, 4, 4, 4, 0, 2, 1, 0,
2, 1, 0, 2, 1, 0, 1, 0,
1, 0, 1, 0, 1, 1, 0, 1,
1, 0, 2, 1, 0, 1, 0, 0,
29, 29, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29,
1, 1, 1, 1, 1, 1, 29, 29,
3, 3, 3, 3, 3, 3, 3, 3,
3, 27, 27, 3, 3, 3, 3, 3,
3, 3, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27,
3, 3, 3, 3, 3, 27, 27, 27,
27, 27, 27, 27, 27, 27, 3, 29,
5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 29,
5, 5, 5, 29, 29, 29, 29, 29,
29, 29, 29, 29, 27, 27, 29, 29,
29, 29, 3, 29, 29, 29, 24, 29,
29, 29, 29, 29, 27, 27, 0, 24,
0, 0, 0, 29, 0, 29, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 29, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 29,
1, 1, 0, 0, 0, 1, 1, 1,
1, 1, 1, 1, 29, 29, 29, 29,
0, 1, 28, 5, 5, 5, 5, 29,
7, 7, 29, 29, 0, 1, 0, 1,
0, 0, 1, 0, 1, 29, 29, 0,
1, 29, 29, 0, 1, 29, 29, 29,
0, 1, 0, 1, 0, 1, 29, 29,
0, 1, 29, 29, 29, 29, 29, 29,
29, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 29,
29, 3, 24, 24, 24, 24, 24, 24,
29, 1, 1, 1, 1, 1, 1, 1,
29, 24, 19, 29, 29, 29, 29, 29,
29, 5, 5, 5, 5, 5, 5, 5,
5, 5, 29, 5, 5, 5, 5, 5,
5, 5, 29, 5, 5, 5, 24, 5,
24, 5, 5, 24, 5, 29, 29, 29,
4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 29, 29, 29, 29, 29,
4, 4, 4, 24, 24, 29, 29, 29,
29, 29, 29, 29, 24, 29, 29, 29,
29, 29, 29, 24, 29, 29, 29, 24,
29, 4, 4, 4, 4, 4, 4, 4,
3, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 29, 29,
8, 8, 24, 24, 24, 24, 29, 29,
5, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 24, 4, 5, 5,
5, 5, 5, 5, 5, 7, 7, 5,
5, 5, 5, 5, 5, 3, 3, 5,
5, 28, 5, 5, 5, 5, 29, 29,
8, 8, 4, 4, 4, 28, 28, 29,
24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 29, 15,
4, 5, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 29, 29, 29,
4, 4, 4, 4, 4, 4, 5, 5,
5, 29, 29, 29, 29, 29, 29, 29,
29, 5, 5, 6, 29, 4, 4, 4,
4, 4, 29, 29, 5, 4, 6, 6,
6, 5, 5, 5, 5, 5, 5, 5,
5, 6, 6, 6, 6, 5, 29, 29,
4, 5, 5, 5, 5, 29, 29, 29,
4, 4, 5, 5, 24, 24, 8, 8,
24, 29, 29, 29, 29, 29, 29, 29,
29, 5, 6, 6, 29, 4, 4, 4,
4, 4, 4, 4, 4, 29, 29, 4,
4, 29, 29, 4, 4, 4, 4, 4,
4, 29, 4, 4, 4, 4, 4, 4,
4, 29, 4, 29, 29, 29, 4, 4,
4, 4, 29, 29, 5, 29, 6, 6,
6, 5, 5, 5, 5, 29, 29, 6,
6, 29, 29, 6, 6, 5, 29, 29,
29, 29, 29, 29, 29, 29, 29, 6,
29, 29, 29, 29, 4, 4, 29, 4,
4, 4, 5, 5, 29, 29, 8, 8,
4, 4, 26, 26, 10, 10, 10, 10,
10, 10, 28, 29, 29, 29, 29, 29,
29, 29, 5, 29, 29, 4, 4, 4,
4, 4, 4, 29, 29, 29, 29, 4,
4, 29, 4, 4, 29, 4, 4, 29,
6, 5, 5, 29, 29, 29, 29, 5,
5, 29, 29, 5, 5, 5, 29, 29,
29, 4, 4, 4, 4, 29, 4, 29,
29, 29, 29, 29, 29, 29, 8, 8,
5, 5, 4, 4, 4, 29, 29, 29,
4, 4, 4, 4, 29, 4, 29, 4,
4, 4, 29, 4, 4, 4, 4, 4,
4, 29, 4, 4, 29, 4, 4, 4,
6, 5, 5, 5, 5, 5, 29, 5,
5, 6, 29, 6, 6, 5, 29, 29,
4, 29, 29, 29, 29, 29, 29, 29,
4, 29, 29, 29, 29, 29, 8, 8,
4, 29, 4, 4, 29, 29, 4, 4,
4, 4, 29, 29, 5, 4, 6, 5,
6, 5, 5, 5, 29, 29, 29, 6,
29, 29, 29, 29, 29, 29, 5, 6,
4, 4, 29, 29, 29, 29, 8, 8,
28, 29, 29, 29, 29, 29, 29, 29,
29, 29, 5, 6, 29, 4, 4, 4,
4, 4, 4, 29, 29, 29, 4, 4,
4, 29, 4, 4, 4, 4, 29, 29,
29, 4, 4, 29, 4, 29, 4, 4,
29, 29, 29, 4, 4, 29, 29, 29,
4, 4, 4, 4, 4, 4, 29, 4,
4, 4, 29, 29, 29, 29, 6, 6,
5, 6, 6, 29, 29, 29, 6, 6,
6, 29, 6, 6, 6, 5, 29, 29,
29, 29, 29, 29, 29, 29, 29, 8,
10, 10, 10, 29, 29, 29, 29, 29,
29, 6, 6, 6, 29, 4, 4, 4,
4, 4, 4, 4, 4, 29, 4, 4,
4, 4, 4, 4, 29, 4, 4, 4,
4, 4, 29, 29, 29, 29, 5, 5,
5, 6, 6, 6, 6, 29, 5, 5,
5, 29, 5, 5, 5, 5, 29, 29,
29, 29, 29, 29, 29, 5, 5, 29,
29, 29, 6, 6, 29, 4, 4, 4,
4, 4, 29, 29, 29, 29, 6, 5,
6, 6, 6, 6, 6, 29, 5, 6,
6, 29, 6, 6, 5, 5, 29, 29,
29, 29, 29, 29, 29, 6, 6, 29,
29, 29, 29, 29, 29, 29, 4, 29,
6, 5, 5, 5, 29, 29, 6, 6,
4, 4, 4, 4, 4, 4, 4, 29,
29, 29, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 29, 4, 29, 29,
29, 29, 5, 29, 29, 29, 29, 6,
6, 6, 5, 5, 5, 29, 5, 29,
6, 6, 6, 6, 6, 6, 6, 6,
29, 29, 6, 6, 24, 29, 29, 29,
4, 5, 4, 4, 5, 5, 5, 5,
5, 5, 5, 29, 29, 29, 29, 26,
4, 4, 4, 4, 4, 4, 3, 5,
5, 5, 5, 5, 5, 5, 5, 24,
8, 8, 24, 24, 29, 29, 29, 29,
29, 4, 4, 29, 4, 29, 29, 4,
4, 29, 4, 29, 29, 4, 29, 29,
29, 29, 29, 29, 4, 4, 4, 4,
29, 4, 4, 4, 29, 4, 29, 4,
29, 29, 4, 4, 29, 4, 4, 4,
5, 5, 29, 5, 5, 4, 29, 29,
4, 4, 4, 4, 4, 29, 3, 29,
8, 8, 29, 29, 4, 4, 29, 29,
4, 28, 28, 28, 24, 24, 24, 24,
24, 24, 24, 28, 28, 28, 28, 28,
5, 5, 28, 28, 28, 28, 28, 28,
8, 8, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 28, 5, 28, 5,
28, 5, 20, 21, 20, 21, 6, 6,
5, 5, 5, 5, 5, 5, 5, 6,
5, 5, 5, 5, 5, 24, 5, 5,
4, 4, 4, 4, 29, 29, 29, 29,
5, 5, 5, 5, 5, 29, 28, 28,
28, 28, 28, 28, 28, 28, 5, 28,
28, 28, 28, 28, 28, 29, 29, 28,
29, 4, 4, 29, 6, 5, 5, 5,
5, 6, 5, 29, 29, 29, 5, 5,
6, 5, 29, 29, 29, 29, 29, 29,
8, 8, 24, 24, 24, 24, 24, 24,
4, 4, 4, 4, 4, 4, 6, 6,
5, 5, 29, 29, 29, 29, 29, 29,
0, 0, 0, 0, 0, 0, 29, 29,
29, 29, 29, 24, 29, 29, 29, 29,
4, 4, 29, 29, 29, 29, 29, 4,
4, 4, 29, 29, 29, 29, 29, 29,
29, 24, 24, 24, 24, 24, 24, 24,
24, 8, 8, 8, 8, 8, 8, 8,
10, 10, 10, 10, 10, 29, 29, 29,
4, 4, 4, 4, 4, 24, 24, 4,
11, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 20, 21, 29, 29, 29,
4, 4, 4, 24, 24, 24, 10, 10,
10, 29, 29, 29, 29, 29, 29, 29,
4, 4, 4, 4, 6, 6, 6, 5,
5, 5, 5, 5, 5, 5, 6, 6,
6, 6, 6, 6, 6, 6, 5, 6,
5, 5, 5, 5, 24, 24, 24, 24,
24, 24, 24, 26, 24, 29, 29, 29,
8, 8, 29, 29, 29, 29, 29, 29,
24, 24, 24, 24, 24, 24, 19, 24,
24, 24, 24, 15, 15, 15, 15, 29,
4, 4, 4, 3, 4, 4, 4, 4,
4, 5, 29, 29, 29, 29, 29, 29,
0, 1, 0, 1, 0, 1, 1, 1,
29, 0, 29, 0, 29, 0, 29, 0,
2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 29, 1, 1,
0, 0, 0, 0, 2, 27, 1, 27,
27, 27, 1, 1, 1, 29, 1, 1,
0, 0, 0, 0, 2, 27, 27, 27,
1, 1, 1, 1, 29, 29, 1, 1,
0, 0, 0, 0, 29, 27, 27, 27,
0, 0, 0, 0, 0, 27, 27, 27,
29, 29, 1, 1, 1, 29, 1, 1,
0, 0, 0, 0, 2, 27, 27, 29,
11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 15, 15, 15, 15,
19, 19, 19, 19, 19, 19, 24, 24,
22, 23, 20, 22, 22, 23, 20, 22,
12, 13, 15, 15, 15, 15, 15, 11,
24, 22, 23, 24, 24, 24, 24, 18,
18, 24, 24, 24, 25, 20, 21, 29,
24, 24, 24, 24, 24, 24, 29, 29,
29, 29, 15, 15, 15, 15, 15, 15,
10, 29, 29, 29, 10, 10, 10, 10,
10, 10, 25, 25, 25, 20, 21, 1,
10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 25, 25, 25, 20, 21, 29,
26, 26, 26, 26, 26, 26, 26, 26,
5, 5, 5, 5, 5, 7, 7, 7,
7, 5, 7, 7, 29, 29, 29, 29,
28, 28, 0, 28, 28, 28, 28, 0,
28, 28, 1, 0, 0, 0, 1, 1,
0, 0, 0, 1, 28, 0, 28, 28,
28, 0, 0, 0, 0, 0, 28, 28,
28, 28, 28, 28, 0, 28, 0, 28,
0, 28, 0, 0, 0, 0, 28, 1,
0, 0, 28, 0, 1, 4, 4, 4,
4, 1, 28, 29, 29, 29, 29, 29,
29, 29, 29, 10, 10, 10, 10, 10,
9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 29, 29, 29, 29,
25, 25, 25, 25, 25, 28, 28, 28,
28, 28, 25, 25, 28, 28, 28, 28,
25, 28, 28, 25, 28, 28, 25, 28,
28, 28, 28, 28, 28, 28, 25, 28,
28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 25, 25,
28, 28, 25, 28, 25, 28, 28, 28,
28, 28, 28, 28, 29, 29, 29, 29,
25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 29, 29, 29, 29, 29, 29,
25, 25, 25, 25, 28, 28, 28, 28,
25, 25, 28, 28, 28, 28, 28, 28,
28, 20, 21, 28, 28, 28, 28, 28,
28, 28, 28, 28, 29, 28,
29, 29, 29, 28, 28, 28, 28, 28,
4, 4, 4, 4, 4, 4, 29, 29,
28, 29, 28, 28, 28, 29, 28, 29,
16, 16, 16, 16, 16, 16, 16, 16,
17, 17, 17, 17, 17, 17, 17, 17,
29, 29, 29, 1, 1, 1, 1, 1,
29, 29, 29, 29, 29, 4, 5, 4,
4, 25, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 29, 4, 29,
4, 4, 29, 4, 4, 29, 4, 4,
29, 29, 29, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 20, 21,
5, 5, 5, 5, 29, 29, 29, 29,
24, 19, 19, 18, 18, 20, 21, 20,
21, 20, 21, 20, 21, 20, 21, 20,
21, 20, 21, 20, 21, 29, 29, 29,
29, 24, 24, 24, 24, 18, 18, 18,
24, 24, 24, 29, 24, 24, 24, 24,
19, 20, 21, 20, 21, 20, 21, 24,
24, 24, 25, 19, 25, 25, 25, 29,
24, 26, 24, 24, 29, 29, 29, 29,
4, 4, 4, 29, 4, 29, 4, 4,
4, 4, 4, 4, 4, 29, 29, 15,
29, 24, 24, 24, 26, 24, 24, 24,
1, 1, 1, 20, 25, 21, 25, 29,
29, 24, 20, 21, 24, 18, 4, 4,
4, 4, 4, 4, 4, 4, 3, 3,
29, 29, 4, 4, 4, 29, 29, 29,
26, 26, 25, 27, 28, 26, 26, 29,
28, 25, 25, 25, 25, 28, 28, 29,
29, 15, 15, 15, 28, 28, 29, 29 ],
getUnicodeCategory: function (c) {
var seg = StringUtils._highByteToSegment[c >>> 8],
stride = StringUtils._segmentToStride[((c & 0xFF) >>> 3) + (seg << 5)];
return StringUtils._strideData[(c & 0x07) + stride];
},
getUnicodeCategoryStr: function (c) {
var seg = StringUtils._highByteToSegment[c >>> 8],
stride = StringUtils._segmentToStride[((c & 0xFF) >>> 3) + (seg << 5)];
return StringUtils.UnicodeCategoryStr[StringUtils._strideData[(c & 0x07) + stride]];
},
isWhiteSpace: function (x) {
switch (x) {
case 0x0020:
case 0x0009:
case 0x000A:
case 0x000B:
case 0x000D:
case 0x000C:
case 0x0085:
case 0x00A0:
case 0x1680:
case 0x180E:
case 0x2000:
case 0x2001:
case 0x2002:
case 0x2003:
case 0x2004:
case 0x2005:
case 0x2006:
case 0x2007:
case 0x2008:
case 0x2009:
case 0x200A:
case 0x202F:
case 0x205F:
case 0x3000:
case 0x2028:
case 0x2029:
return true;
}
return false;
},
isDigit: function (c) {
return (StringUtils.getUnicodeCategory(c) === StringUtils.UnicodeCategory.DecimalDigitNumber);
},
isLetter: function (c) {
return (StringUtils.getUnicodeCategory(c) <= StringUtils.UnicodeCategory.OtherLetter);
},
isLetterOrDigit: function (c) {
var category = StringUtils.getUnicodeCategory(c);
return (category <= StringUtils.UnicodeCategory.OtherLetter ||
category === StringUtils.UnicodeCategory.DecimalDigitNumber);
},
isNumber: function (c) {
var category = StringUtils.getUnicodeCategory(c);
return (category >= StringUtils.UnicodeCategory.DecimalDigitNumber &&
category <= StringUtils.UnicodeCategory.OtherNumber);
},
isSurrogate: function (c) {
return (StringUtils.getUnicodeCategory(c) === StringUtils.UnicodeCategory.Surrogate);
}
}
Object.freeze(StringUtils.UnicodeCategory);
Object.freeze(StringUtils.UnicodeCategoryStr);
Object.freeze(StringUtils);
Although this does not answer the question, I found it quite useful, +1. Would you know if this solution covers the entire BMP? (or, if it goes beyond the BMP, even better)
– mgibsonbr
This Javascript adaptation only covers the entire BMP (U+0000 to U+FFFF), because of both the table used (which was largely optimized by the person on the 3rd link I posted), and because of my function
getUnicodeCategory
, which deals only with an isolated character. To go further, it would be necessary to detect the surrogate pairs, and convert them (as needed) to utf32, and then run the conversion of line 228 of the Char.Cs file (1st link I posted), using the tablecategory_astral_index
, that was not ported to this adaptation in Javascript.– carlosrafaelgn