str.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973
  1. #include "pocketpy/common/str.h"
  2. #include "pocketpy/common/sstream.h"
  3. #include "pocketpy/common/utils.h"
  4. #include <string.h>
  5. #include <stdio.h>
  6. #include <assert.h>
  7. c11_string* c11_string__new(const char* data) { return c11_string__new2(data, strlen(data)); }
  8. c11_string* c11_string__new2(const char* data, int size) {
  9. c11_string* retval = PK_MALLOC(sizeof(c11_string) + size + 1);
  10. c11_string__ctor2(retval, data, size);
  11. return retval;
  12. }
  13. c11_string* c11_string__new3(const char* fmt, ...) {
  14. c11_sbuf buf;
  15. c11_sbuf__ctor(&buf);
  16. va_list args;
  17. va_start(args, fmt);
  18. // c11_sbuf__write_vfmt(&buf, fmt, args);
  19. pk_vsprintf(&buf, fmt, args);
  20. va_end(args);
  21. return c11_sbuf__submit(&buf);
  22. }
  23. void c11_string__ctor(c11_string* self, const char* data) {
  24. c11_string__ctor2(self, data, strlen(data));
  25. }
  26. void c11_string__ctor2(c11_string* self, const char* data, int size) {
  27. self->size = size;
  28. char* p = (char*)self->data;
  29. memcpy(p, data, size);
  30. p[size] = '\0';
  31. }
  32. void c11_string__ctor3(c11_string* self, int size) {
  33. self->size = size;
  34. char* p = (char*)self->data;
  35. p[size] = '\0';
  36. }
  37. c11_string* c11_string__copy(c11_string* self) {
  38. int total_size = sizeof(c11_string) + self->size + 1;
  39. c11_string* retval = PK_MALLOC(total_size);
  40. memcpy(retval, self, total_size);
  41. return retval;
  42. }
  43. void c11_string__delete(c11_string* self) { PK_FREE(self); }
  44. c11_sv c11_string__sv(c11_string* self) { return (c11_sv){self->data, self->size}; }
  45. c11_string* c11_sv__replace(c11_sv self, char old, char new_) {
  46. c11_string* retval = c11_string__new2(self.data, self.size);
  47. char* p = (char*)retval->data;
  48. for(int i = 0; i < retval->size; i++) {
  49. if(p[i] == old) p[i] = new_;
  50. }
  51. return retval;
  52. }
  53. c11_string* c11_sv__replace2(c11_sv self, c11_sv old, c11_sv new_) {
  54. c11_sbuf buf;
  55. c11_sbuf__ctor(&buf);
  56. int start = 0;
  57. while(true) {
  58. int i = c11_sv__index2(self, old, start);
  59. if(i == -1) break;
  60. c11_sv tmp = c11_sv__slice2(self, start, i);
  61. c11_sbuf__write_sv(&buf, tmp);
  62. c11_sbuf__write_sv(&buf, new_);
  63. start = i + old.size;
  64. }
  65. c11_sv tmp = c11_sv__slice2(self, start, self.size);
  66. c11_sbuf__write_sv(&buf, tmp);
  67. return c11_sbuf__submit(&buf);
  68. }
  69. int c11_sv__u8_length(c11_sv sv) { return c11__byte_index_to_unicode(sv.data, sv.size); }
  70. c11_sv c11_sv__u8_getitem(c11_sv sv, int i) {
  71. i = c11__unicode_index_to_byte(sv.data, i);
  72. int size = c11__u8_header(sv.data[i], false);
  73. return c11_sv__slice2(sv, i, i + size);
  74. }
  75. c11_string* c11_sv__u8_slice(c11_sv sv, int start, int stop, int step) {
  76. c11_sbuf ss;
  77. c11_sbuf__ctor(&ss);
  78. assert(step != 0);
  79. for(int i = start; step > 0 ? i < stop : i > stop; i += step) {
  80. c11_sv unicode = c11_sv__u8_getitem(sv, i);
  81. c11_sbuf__write_sv(&ss, unicode);
  82. }
  83. return c11_sbuf__submit(&ss);
  84. }
  85. /////////////////////////////////////////
  86. c11_sv c11_sv__slice(c11_sv sv, int start) { return c11_sv__slice2(sv, start, sv.size); }
  87. c11_sv c11_sv__slice2(c11_sv sv, int start, int stop) {
  88. if(start < 0) start = 0;
  89. if(stop < start) stop = start;
  90. if(stop > sv.size) stop = sv.size;
  91. return (c11_sv){sv.data + start, stop - start};
  92. }
  93. c11_sv c11_sv__strip(c11_sv sv, c11_sv chars, bool left, bool right) {
  94. int L = 0;
  95. int R = c11_sv__u8_length(sv);
  96. if(left) {
  97. while(L < R) {
  98. c11_sv tmp = c11_sv__u8_getitem(sv, L);
  99. bool found = c11_sv__index2(chars, tmp, 0) != -1;
  100. if(!found) break;
  101. L++;
  102. }
  103. }
  104. if(right) {
  105. while(L < R) {
  106. c11_sv tmp = c11_sv__u8_getitem(sv, R - 1);
  107. bool found = c11_sv__index2(chars, tmp, 0) != -1;
  108. if(!found) break;
  109. R--;
  110. }
  111. }
  112. int start = c11__unicode_index_to_byte(sv.data, L);
  113. int stop = c11__unicode_index_to_byte(sv.data, R);
  114. return c11_sv__slice2(sv, start, stop);
  115. }
  116. int c11_sv__index(c11_sv self, char c) {
  117. for(int i = 0; i < self.size; i++) {
  118. if(self.data[i] == c) return i;
  119. }
  120. return -1;
  121. }
  122. int c11_sv__rindex(c11_sv self, char c) {
  123. for(int i = self.size - 1; i >= 0; i--) {
  124. if(self.data[i] == c) return i;
  125. }
  126. return -1;
  127. }
  128. int c11_sv__index2(c11_sv self, c11_sv sub, int start) {
  129. if(sub.size == 0) return start;
  130. int max_end = self.size - sub.size;
  131. for(int i = start; i <= max_end; i++) {
  132. int res = memcmp(self.data + i, sub.data, sub.size);
  133. if(res == 0) return i;
  134. }
  135. return -1;
  136. }
  137. int c11_sv__count(c11_sv self, c11_sv sub) {
  138. if(sub.size == 0) return self.size + 1;
  139. int cnt = 0;
  140. int start = 0;
  141. while(true) {
  142. int i = c11_sv__index2(self, sub, start);
  143. if(i == -1) break;
  144. cnt++;
  145. start = i + sub.size;
  146. }
  147. return cnt;
  148. }
  149. bool c11_sv__startswith(c11_sv self, c11_sv prefix) {
  150. if(prefix.size > self.size) return false;
  151. return memcmp(self.data, prefix.data, prefix.size) == 0;
  152. }
  153. bool c11_sv__endswith(c11_sv self, c11_sv suffix) {
  154. if(suffix.size > self.size) return false;
  155. return memcmp(self.data + self.size - suffix.size, suffix.data, suffix.size) == 0;
  156. }
  157. uint64_t c11_sv__hash(c11_sv self) {
  158. uint64_t hash = 5381;
  159. for(int i = 0; i < self.size; i++) {
  160. // hash * 33 + c
  161. hash = ((hash << 5) + hash) + (unsigned char)self.data[i];
  162. }
  163. return hash;
  164. }
  165. c11_vector /* T=c11_sv */ c11_sv__split(c11_sv self, char sep) {
  166. c11_vector retval;
  167. c11_vector__ctor(&retval, sizeof(c11_sv));
  168. const char* data = self.data;
  169. int i = 0;
  170. for(int j = 0; j < self.size; j++) {
  171. if(data[j] == sep) {
  172. assert(j >= i);
  173. c11_sv tmp = {data + i, j - i};
  174. c11_vector__push(c11_sv, &retval, tmp);
  175. i = j + 1;
  176. }
  177. }
  178. if(i <= self.size) {
  179. c11_sv tmp = {data + i, self.size - i};
  180. c11_vector__push(c11_sv, &retval, tmp);
  181. }
  182. return retval;
  183. }
  184. c11_vector /* T=c11_sv */ c11_sv__split2(c11_sv self, c11_sv sep) {
  185. if(sep.size == 1) return c11_sv__split(self, sep.data[0]);
  186. c11_vector retval;
  187. c11_vector__ctor(&retval, sizeof(c11_sv));
  188. int start = 0;
  189. const char* data = self.data;
  190. while(true) {
  191. int i = c11_sv__index2(self, sep, start);
  192. if(i == -1) break;
  193. c11_sv tmp = {data + start, i - start};
  194. c11_vector__push(c11_sv, &retval, tmp);
  195. start = i + sep.size;
  196. }
  197. c11_sv tmp = {data + start, self.size - start};
  198. c11_vector__push(c11_sv, &retval, tmp);
  199. return retval;
  200. }
  201. int c11__unicode_index_to_byte(const char* data, int i) {
  202. int j = 0;
  203. while(i > 0) {
  204. j += c11__u8_header(data[j], false);
  205. i--;
  206. }
  207. return j;
  208. }
  209. int c11__byte_index_to_unicode(const char* data, int n) {
  210. int cnt = 0;
  211. for(int i = 0; i < n; i++) {
  212. if((data[i] & 0xC0) != 0x80) cnt++;
  213. }
  214. return cnt;
  215. }
  216. //////////////
  217. bool c11_bytes__eq(c11_bytes* self, c11_bytes* other) {
  218. if(self->size != other->size) return false;
  219. return memcmp(self->data, other->data, self->size) == 0;
  220. }
  221. int c11_sv__cmp(c11_sv self, c11_sv other) {
  222. int res = strncmp(self.data, other.data, c11__min(self.size, other.size));
  223. if(res != 0) return res;
  224. return self.size - other.size;
  225. }
  226. int c11_sv__cmp2(c11_sv self, const char* other) {
  227. int size = strlen(other);
  228. int res = strncmp(self.data, other, c11__min(self.size, size));
  229. if(res != 0) return res;
  230. return self.size - size;
  231. }
  232. bool c11__streq(const char* a, const char* b) { return strcmp(a, b) == 0; }
  233. bool c11__sveq(c11_sv a, c11_sv b) {
  234. if(a.size != b.size) return false;
  235. return memcmp(a.data, b.data, a.size) == 0;
  236. }
  237. bool c11__sveq2(c11_sv a, const char* b) {
  238. int size = strlen(b);
  239. if(a.size != size) return false;
  240. return memcmp(a.data, b, size) == 0;
  241. }
  242. int c11__u8_header(unsigned char c, bool suppress) {
  243. if((c & 0b10000000) == 0) return 1;
  244. if((c & 0b11100000) == 0b11000000) return 2;
  245. if((c & 0b11110000) == 0b11100000) return 3;
  246. if((c & 0b11111000) == 0b11110000) return 4;
  247. if((c & 0b11111100) == 0b11111000) return 5;
  248. if((c & 0b11111110) == 0b11111100) return 6;
  249. if(!suppress) c11__abort("invalid utf8 char");
  250. return 0;
  251. }
  252. int c11__u8_value(int u8bytes, const char* data) {
  253. assert(u8bytes != 0);
  254. if(u8bytes == 1) return (int)data[0];
  255. uint32_t value = 0;
  256. for(int k = 0; k < u8bytes; k++) {
  257. uint8_t b = data[k];
  258. if(k == 0) {
  259. if(u8bytes == 2)
  260. value = (b & 0b00011111) << 6;
  261. else if(u8bytes == 3)
  262. value = (b & 0b00001111) << 12;
  263. else if(u8bytes == 4)
  264. value = (b & 0b00000111) << 18;
  265. } else {
  266. value |= (b & 0b00111111) << (6 * (u8bytes - k - 1));
  267. }
  268. }
  269. return (int)value;
  270. }
  271. int c11__u32_to_u8(uint32_t utf32_char, char utf8_output[4]) {
  272. int length = 0;
  273. if(utf32_char <= 0x7F) {
  274. // 1-byte UTF-8
  275. utf8_output[0] = (char)utf32_char;
  276. length = 1;
  277. } else if(utf32_char <= 0x7FF) {
  278. // 2-byte UTF-8
  279. utf8_output[0] = (char)(0xC0 | ((utf32_char >> 6) & 0x1F));
  280. utf8_output[1] = (char)(0x80 | (utf32_char & 0x3F));
  281. length = 2;
  282. } else if(utf32_char <= 0xFFFF) {
  283. // 3-byte UTF-8
  284. utf8_output[0] = (char)(0xE0 | ((utf32_char >> 12) & 0x0F));
  285. utf8_output[1] = (char)(0x80 | ((utf32_char >> 6) & 0x3F));
  286. utf8_output[2] = (char)(0x80 | (utf32_char & 0x3F));
  287. length = 3;
  288. } else if(utf32_char <= 0x10FFFF) {
  289. // 4-byte UTF-8
  290. utf8_output[0] = (char)(0xF0 | ((utf32_char >> 18) & 0x07));
  291. utf8_output[1] = (char)(0x80 | ((utf32_char >> 12) & 0x3F));
  292. utf8_output[2] = (char)(0x80 | ((utf32_char >> 6) & 0x3F));
  293. utf8_output[3] = (char)(0x80 | (utf32_char & 0x3F));
  294. length = 4;
  295. } else {
  296. // Invalid UTF-32 character
  297. return -1;
  298. }
  299. return length;
  300. }
  301. char* c11_strdup(const char* str) {
  302. int len = strlen(str);
  303. char* dst = PK_MALLOC(len + 1);
  304. memcpy(dst, str, len);
  305. dst[len] = '\0';
  306. return dst;
  307. }
  308. unsigned char* c11_memdup(const unsigned char* src, int size) {
  309. unsigned char* dst = PK_MALLOC(size);
  310. memcpy(dst, src, size);
  311. return dst;
  312. }
  313. IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
  314. *out = 0;
  315. c11_sv prefix = {.data = text.data, .size = c11__min(2, text.size)};
  316. if(base == -1) {
  317. if(c11__sveq2(prefix, "0b"))
  318. base = 2;
  319. else if(c11__sveq2(prefix, "0o"))
  320. base = 8;
  321. else if(c11__sveq2(prefix, "0x"))
  322. base = 16;
  323. else
  324. base = 10;
  325. }
  326. if(base == 10) {
  327. // 10-base 12334
  328. if(text.size == 0) return IntParsing_FAILURE;
  329. for(int i = 0; i < text.size; i++) {
  330. char c = text.data[i];
  331. if(c >= '0' && c <= '9') {
  332. *out = (*out * 10) + (c - '0');
  333. } else {
  334. return IntParsing_FAILURE;
  335. }
  336. }
  337. // "9223372036854775807".__len__() == 19
  338. if(text.size > 19) return IntParsing_OVERFLOW;
  339. return IntParsing_SUCCESS;
  340. } else if(base == 2) {
  341. // 2-base 0b101010
  342. if(c11__sveq2(prefix, "0b")) {
  343. // text.remove_prefix(2);
  344. text = (c11_sv){text.data + 2, text.size - 2};
  345. }
  346. if(text.size == 0) return IntParsing_FAILURE;
  347. for(int i = 0; i < text.size; i++) {
  348. char c = text.data[i];
  349. if(c == '0' || c == '1') {
  350. *out = (*out << 1) | (c - '0');
  351. } else {
  352. return IntParsing_FAILURE;
  353. }
  354. }
  355. // "111111111111111111111111111111111111111111111111111111111111111".__len__() == 63
  356. if(text.size > 63) return IntParsing_OVERFLOW;
  357. return IntParsing_SUCCESS;
  358. } else if(base == 8) {
  359. // 8-base 0o123
  360. if(c11__sveq2(prefix, "0o")) {
  361. // text.remove_prefix(2);
  362. text = (c11_sv){text.data + 2, text.size - 2};
  363. }
  364. if(text.size == 0) return IntParsing_FAILURE;
  365. for(int i = 0; i < text.size; i++) {
  366. char c = text.data[i];
  367. if(c >= '0' && c <= '7') {
  368. *out = (*out << 3) | (c - '0');
  369. } else {
  370. return IntParsing_FAILURE;
  371. }
  372. }
  373. // "777777777777777777777".__len__() == 21
  374. if(text.size > 21) return IntParsing_OVERFLOW;
  375. return IntParsing_SUCCESS;
  376. } else if(base == 16) {
  377. // 16-base 0x123
  378. if(c11__sveq2(prefix, "0x")) {
  379. // text.remove_prefix(2);
  380. text = (c11_sv){text.data + 2, text.size - 2};
  381. }
  382. if(text.size == 0) return IntParsing_FAILURE;
  383. for(int i = 0; i < text.size; i++) {
  384. char c = text.data[i];
  385. if(c >= '0' && c <= '9') {
  386. *out = (*out << 4) | (c - '0');
  387. } else if(c >= 'a' && c <= 'f') {
  388. *out = (*out << 4) | (c - 'a' + 10);
  389. } else if(c >= 'A' && c <= 'F') {
  390. *out = (*out << 4) | (c - 'A' + 10);
  391. } else {
  392. return IntParsing_FAILURE;
  393. }
  394. }
  395. // "7fffffffffffffff".__len__() == 16
  396. if(text.size > 16) return IntParsing_OVERFLOW;
  397. return IntParsing_SUCCESS;
  398. }
  399. return IntParsing_FAILURE;
  400. }
  401. const char* c11__search_u32_ranges(int c, const c11_u32_range* p, int n_ranges) {
  402. int lbound = 0;
  403. int ubound = n_ranges - 1;
  404. if(c < p[0].start || c > p[ubound].end) return NULL;
  405. while(ubound >= lbound) {
  406. int mid = (lbound + ubound) / 2;
  407. if(c > p[mid].end) {
  408. lbound = mid + 1;
  409. } else if(c < p[mid].start) {
  410. ubound = mid - 1;
  411. } else {
  412. return p[mid].data;
  413. }
  414. }
  415. return NULL;
  416. }
  417. const static c11_u32_range kLoRanges[] = {
  418. {170, 170 },
  419. {186, 186 },
  420. {443, 443 },
  421. {448, 451 },
  422. {660, 660 },
  423. {1488, 1514 },
  424. {1519, 1522 },
  425. {1568, 1599 },
  426. {1601, 1610 },
  427. {1646, 1647 },
  428. {1649, 1747 },
  429. {1749, 1749 },
  430. {1774, 1775 },
  431. {1786, 1788 },
  432. {1791, 1791 },
  433. {1808, 1808 },
  434. {1810, 1839 },
  435. {1869, 1957 },
  436. {1969, 1969 },
  437. {1994, 2026 },
  438. {2048, 2069 },
  439. {2112, 2136 },
  440. {2144, 2154 },
  441. {2160, 2183 },
  442. {2185, 2190 },
  443. {2208, 2248 },
  444. {2308, 2361 },
  445. {2365, 2365 },
  446. {2384, 2384 },
  447. {2392, 2401 },
  448. {2418, 2432 },
  449. {2437, 2444 },
  450. {2447, 2448 },
  451. {2451, 2472 },
  452. {2474, 2480 },
  453. {2482, 2482 },
  454. {2486, 2489 },
  455. {2493, 2493 },
  456. {2510, 2510 },
  457. {2524, 2525 },
  458. {2527, 2529 },
  459. {2544, 2545 },
  460. {2556, 2556 },
  461. {2565, 2570 },
  462. {2575, 2576 },
  463. {2579, 2600 },
  464. {2602, 2608 },
  465. {2610, 2611 },
  466. {2613, 2614 },
  467. {2616, 2617 },
  468. {2649, 2652 },
  469. {2654, 2654 },
  470. {2674, 2676 },
  471. {2693, 2701 },
  472. {2703, 2705 },
  473. {2707, 2728 },
  474. {2730, 2736 },
  475. {2738, 2739 },
  476. {2741, 2745 },
  477. {2749, 2749 },
  478. {2768, 2768 },
  479. {2784, 2785 },
  480. {2809, 2809 },
  481. {2821, 2828 },
  482. {2831, 2832 },
  483. {2835, 2856 },
  484. {2858, 2864 },
  485. {2866, 2867 },
  486. {2869, 2873 },
  487. {2877, 2877 },
  488. {2908, 2909 },
  489. {2911, 2913 },
  490. {2929, 2929 },
  491. {2947, 2947 },
  492. {2949, 2954 },
  493. {2958, 2960 },
  494. {2962, 2965 },
  495. {2969, 2970 },
  496. {2972, 2972 },
  497. {2974, 2975 },
  498. {2979, 2980 },
  499. {2984, 2986 },
  500. {2990, 3001 },
  501. {3024, 3024 },
  502. {3077, 3084 },
  503. {3086, 3088 },
  504. {3090, 3112 },
  505. {3114, 3129 },
  506. {3133, 3133 },
  507. {3160, 3162 },
  508. {3165, 3165 },
  509. {3168, 3169 },
  510. {3200, 3200 },
  511. {3205, 3212 },
  512. {3214, 3216 },
  513. {3218, 3240 },
  514. {3242, 3251 },
  515. {3253, 3257 },
  516. {3261, 3261 },
  517. {3293, 3294 },
  518. {3296, 3297 },
  519. {3313, 3314 },
  520. {3332, 3340 },
  521. {3342, 3344 },
  522. {3346, 3386 },
  523. {3389, 3389 },
  524. {3406, 3406 },
  525. {3412, 3414 },
  526. {3423, 3425 },
  527. {3450, 3455 },
  528. {3461, 3478 },
  529. {3482, 3505 },
  530. {3507, 3515 },
  531. {3517, 3517 },
  532. {3520, 3526 },
  533. {3585, 3632 },
  534. {3634, 3635 },
  535. {3648, 3653 },
  536. {3713, 3714 },
  537. {3716, 3716 },
  538. {3718, 3722 },
  539. {3724, 3747 },
  540. {3749, 3749 },
  541. {3751, 3760 },
  542. {3762, 3763 },
  543. {3773, 3773 },
  544. {3776, 3780 },
  545. {3804, 3807 },
  546. {3840, 3840 },
  547. {3904, 3911 },
  548. {3913, 3948 },
  549. {3976, 3980 },
  550. {4096, 4138 },
  551. {4159, 4159 },
  552. {4176, 4181 },
  553. {4186, 4189 },
  554. {4193, 4193 },
  555. {4197, 4198 },
  556. {4206, 4208 },
  557. {4213, 4225 },
  558. {4238, 4238 },
  559. {4352, 4680 },
  560. {4682, 4685 },
  561. {4688, 4694 },
  562. {4696, 4696 },
  563. {4698, 4701 },
  564. {4704, 4744 },
  565. {4746, 4749 },
  566. {4752, 4784 },
  567. {4786, 4789 },
  568. {4792, 4798 },
  569. {4800, 4800 },
  570. {4802, 4805 },
  571. {4808, 4822 },
  572. {4824, 4880 },
  573. {4882, 4885 },
  574. {4888, 4954 },
  575. {4992, 5007 },
  576. {5121, 5740 },
  577. {5743, 5759 },
  578. {5761, 5786 },
  579. {5792, 5866 },
  580. {5873, 5880 },
  581. {5888, 5905 },
  582. {5919, 5937 },
  583. {5952, 5969 },
  584. {5984, 5996 },
  585. {5998, 6000 },
  586. {6016, 6067 },
  587. {6108, 6108 },
  588. {6176, 6210 },
  589. {6212, 6264 },
  590. {6272, 6276 },
  591. {6279, 6312 },
  592. {6314, 6314 },
  593. {6320, 6389 },
  594. {6400, 6430 },
  595. {6480, 6509 },
  596. {6512, 6516 },
  597. {6528, 6571 },
  598. {6576, 6601 },
  599. {6656, 6678 },
  600. {6688, 6740 },
  601. {6917, 6963 },
  602. {6981, 6988 },
  603. {7043, 7072 },
  604. {7086, 7087 },
  605. {7098, 7141 },
  606. {7168, 7203 },
  607. {7245, 7247 },
  608. {7258, 7287 },
  609. {7401, 7404 },
  610. {7406, 7411 },
  611. {7413, 7414 },
  612. {7418, 7418 },
  613. {8501, 8504 },
  614. {11568, 11623 },
  615. {11648, 11670 },
  616. {11680, 11686 },
  617. {11688, 11694 },
  618. {11696, 11702 },
  619. {11704, 11710 },
  620. {11712, 11718 },
  621. {11720, 11726 },
  622. {11728, 11734 },
  623. {11736, 11742 },
  624. {12294, 12294 },
  625. {12348, 12348 },
  626. {12353, 12438 },
  627. {12447, 12447 },
  628. {12449, 12538 },
  629. {12543, 12543 },
  630. {12549, 12591 },
  631. {12593, 12686 },
  632. {12704, 12735 },
  633. {12784, 12799 },
  634. {13312, 19903 },
  635. {19968, 40980 },
  636. {40982, 42124 },
  637. {42192, 42231 },
  638. {42240, 42507 },
  639. {42512, 42527 },
  640. {42538, 42539 },
  641. {42606, 42606 },
  642. {42656, 42725 },
  643. {42895, 42895 },
  644. {42999, 42999 },
  645. {43003, 43009 },
  646. {43011, 43013 },
  647. {43015, 43018 },
  648. {43020, 43042 },
  649. {43072, 43123 },
  650. {43138, 43187 },
  651. {43250, 43255 },
  652. {43259, 43259 },
  653. {43261, 43262 },
  654. {43274, 43301 },
  655. {43312, 43334 },
  656. {43360, 43388 },
  657. {43396, 43442 },
  658. {43488, 43492 },
  659. {43495, 43503 },
  660. {43514, 43518 },
  661. {43520, 43560 },
  662. {43584, 43586 },
  663. {43588, 43595 },
  664. {43616, 43631 },
  665. {43633, 43638 },
  666. {43642, 43642 },
  667. {43646, 43695 },
  668. {43697, 43697 },
  669. {43701, 43702 },
  670. {43705, 43709 },
  671. {43712, 43712 },
  672. {43714, 43714 },
  673. {43739, 43740 },
  674. {43744, 43754 },
  675. {43762, 43762 },
  676. {43777, 43782 },
  677. {43785, 43790 },
  678. {43793, 43798 },
  679. {43808, 43814 },
  680. {43816, 43822 },
  681. {43968, 44002 },
  682. {44032, 55203 },
  683. {55216, 55238 },
  684. {55243, 55291 },
  685. {63744, 64109 },
  686. {64112, 64217 },
  687. {64285, 64285 },
  688. {64287, 64296 },
  689. {64298, 64310 },
  690. {64312, 64316 },
  691. {64318, 64318 },
  692. {64320, 64321 },
  693. {64323, 64324 },
  694. {64326, 64433 },
  695. {64467, 64829 },
  696. {64848, 64911 },
  697. {64914, 64967 },
  698. {65008, 65019 },
  699. {65136, 65140 },
  700. {65142, 65276 },
  701. {65382, 65391 },
  702. {65393, 65437 },
  703. {65440, 65470 },
  704. {65474, 65479 },
  705. {65482, 65487 },
  706. {65490, 65495 },
  707. {65498, 65500 },
  708. {65536, 65547 },
  709. {65549, 65574 },
  710. {65576, 65594 },
  711. {65596, 65597 },
  712. {65599, 65613 },
  713. {65616, 65629 },
  714. {65664, 65786 },
  715. {66176, 66204 },
  716. {66208, 66256 },
  717. {66304, 66335 },
  718. {66349, 66368 },
  719. {66370, 66377 },
  720. {66384, 66421 },
  721. {66432, 66461 },
  722. {66464, 66499 },
  723. {66504, 66511 },
  724. {66640, 66717 },
  725. {66816, 66855 },
  726. {66864, 66915 },
  727. {67072, 67382 },
  728. {67392, 67413 },
  729. {67424, 67431 },
  730. {67584, 67589 },
  731. {67592, 67592 },
  732. {67594, 67637 },
  733. {67639, 67640 },
  734. {67644, 67644 },
  735. {67647, 67669 },
  736. {67680, 67702 },
  737. {67712, 67742 },
  738. {67808, 67826 },
  739. {67828, 67829 },
  740. {67840, 67861 },
  741. {67872, 67897 },
  742. {67968, 68023 },
  743. {68030, 68031 },
  744. {68096, 68096 },
  745. {68112, 68115 },
  746. {68117, 68119 },
  747. {68121, 68149 },
  748. {68192, 68220 },
  749. {68224, 68252 },
  750. {68288, 68295 },
  751. {68297, 68324 },
  752. {68352, 68405 },
  753. {68416, 68437 },
  754. {68448, 68466 },
  755. {68480, 68497 },
  756. {68608, 68680 },
  757. {68864, 68899 },
  758. {69248, 69289 },
  759. {69296, 69297 },
  760. {69376, 69404 },
  761. {69415, 69415 },
  762. {69424, 69445 },
  763. {69488, 69505 },
  764. {69552, 69572 },
  765. {69600, 69622 },
  766. {69635, 69687 },
  767. {69745, 69746 },
  768. {69749, 69749 },
  769. {69763, 69807 },
  770. {69840, 69864 },
  771. {69891, 69926 },
  772. {69956, 69956 },
  773. {69959, 69959 },
  774. {69968, 70002 },
  775. {70006, 70006 },
  776. {70019, 70066 },
  777. {70081, 70084 },
  778. {70106, 70106 },
  779. {70108, 70108 },
  780. {70144, 70161 },
  781. {70163, 70187 },
  782. {70272, 70278 },
  783. {70280, 70280 },
  784. {70282, 70285 },
  785. {70287, 70301 },
  786. {70303, 70312 },
  787. {70320, 70366 },
  788. {70405, 70412 },
  789. {70415, 70416 },
  790. {70419, 70440 },
  791. {70442, 70448 },
  792. {70450, 70451 },
  793. {70453, 70457 },
  794. {70461, 70461 },
  795. {70480, 70480 },
  796. {70493, 70497 },
  797. {70656, 70708 },
  798. {70727, 70730 },
  799. {70751, 70753 },
  800. {70784, 70831 },
  801. {70852, 70853 },
  802. {70855, 70855 },
  803. {71040, 71086 },
  804. {71128, 71131 },
  805. {71168, 71215 },
  806. {71236, 71236 },
  807. {71296, 71338 },
  808. {71352, 71352 },
  809. {71424, 71450 },
  810. {71488, 71494 },
  811. {71680, 71723 },
  812. {71935, 71942 },
  813. {71945, 71945 },
  814. {71948, 71955 },
  815. {71957, 71958 },
  816. {71960, 71983 },
  817. {71999, 71999 },
  818. {72001, 72001 },
  819. {72096, 72103 },
  820. {72106, 72144 },
  821. {72161, 72161 },
  822. {72163, 72163 },
  823. {72192, 72192 },
  824. {72203, 72242 },
  825. {72250, 72250 },
  826. {72272, 72272 },
  827. {72284, 72329 },
  828. {72349, 72349 },
  829. {72368, 72440 },
  830. {72704, 72712 },
  831. {72714, 72750 },
  832. {72768, 72768 },
  833. {72818, 72847 },
  834. {72960, 72966 },
  835. {72968, 72969 },
  836. {72971, 73008 },
  837. {73030, 73030 },
  838. {73056, 73061 },
  839. {73063, 73064 },
  840. {73066, 73097 },
  841. {73112, 73112 },
  842. {73440, 73458 },
  843. {73648, 73648 },
  844. {73728, 74649 },
  845. {74880, 75075 },
  846. {77712, 77808 },
  847. {77824, 78894 },
  848. {82944, 83526 },
  849. {92160, 92728 },
  850. {92736, 92766 },
  851. {92784, 92862 },
  852. {92880, 92909 },
  853. {92928, 92975 },
  854. {93027, 93047 },
  855. {93053, 93071 },
  856. {93952, 94026 },
  857. {94032, 94032 },
  858. {94208, 100343},
  859. {100352, 101589},
  860. {101632, 101640},
  861. {110592, 110882},
  862. {110928, 110930},
  863. {110948, 110951},
  864. {110960, 111355},
  865. {113664, 113770},
  866. {113776, 113788},
  867. {113792, 113800},
  868. {113808, 113817},
  869. {122634, 122634},
  870. {123136, 123180},
  871. {123214, 123214},
  872. {123536, 123565},
  873. {123584, 123627},
  874. {124896, 124902},
  875. {124904, 124907},
  876. {124909, 124910},
  877. {124912, 124926},
  878. {124928, 125124},
  879. {126464, 126467},
  880. {126469, 126495},
  881. {126497, 126498},
  882. {126500, 126500},
  883. {126503, 126503},
  884. {126505, 126514},
  885. {126516, 126519},
  886. {126521, 126521},
  887. {126523, 126523},
  888. {126530, 126530},
  889. {126535, 126535},
  890. {126537, 126537},
  891. {126539, 126539},
  892. {126541, 126543},
  893. {126545, 126546},
  894. {126548, 126548},
  895. {126551, 126551},
  896. {126553, 126553},
  897. {126555, 126555},
  898. {126557, 126557},
  899. {126559, 126559},
  900. {126561, 126562},
  901. {126564, 126564},
  902. {126567, 126570},
  903. {126572, 126578},
  904. {126580, 126583},
  905. {126585, 126588},
  906. {126590, 126590},
  907. {126592, 126601},
  908. {126603, 126619},
  909. {126625, 126627},
  910. {126629, 126633},
  911. {126635, 126651},
  912. {131072, 173791},
  913. {173824, 177976},
  914. {177984, 178205},
  915. {178208, 183969},
  916. {183984, 191456},
  917. {194560, 195101},
  918. {196608, 201546},
  919. };
  920. bool c11__is_unicode_Lo_char(int c) {
  921. if(c == 0x1f955) return true;
  922. const char* data =
  923. c11__search_u32_ranges(c, kLoRanges, sizeof(kLoRanges) / sizeof(c11_u32_range));
  924. return data != NULL;
  925. }