SDL_iconv.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860
  1. /*
  2. Simple DirectMedia Layer
  3. Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org>
  4. This software is provided 'as-is', without any express or implied
  5. warranty. In no event will the authors be held liable for any damages
  6. arising from the use of this software.
  7. Permission is granted to anyone to use this software for any purpose,
  8. including commercial applications, and to alter it and redistribute it
  9. freely, subject to the following restrictions:
  10. 1. The origin of this software must not be misrepresented; you must not
  11. claim that you wrote the original software. If you use this software
  12. in a product, an acknowledgment in the product documentation would be
  13. appreciated but is not required.
  14. 2. Altered source versions must be plainly marked as such, and must not be
  15. misrepresented as being the original software.
  16. 3. This notice may not be removed or altered from any source distribution.
  17. */
  18. #include "SDL_internal.h"
  19. // This file contains portable iconv functions for SDL
  20. #if defined(HAVE_ICONV) && defined(HAVE_ICONV_H)
  21. #ifndef SDL_USE_LIBICONV
  22. // Define LIBICONV_PLUG to use iconv from the base instead of ports and avoid linker errors.
  23. #define LIBICONV_PLUG 1
  24. #endif
  25. #include <iconv.h>
  26. #include <errno.h>
  27. SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof(iconv_t) <= sizeof(SDL_iconv_t));
  28. SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
  29. {
  30. return (SDL_iconv_t)((uintptr_t)iconv_open(tocode, fromcode));
  31. }
  32. int SDL_iconv_close(SDL_iconv_t cd)
  33. {
  34. if ((size_t)cd == SDL_ICONV_ERROR) {
  35. return -1;
  36. }
  37. return iconv_close((iconv_t)((uintptr_t)cd));
  38. }
  39. size_t SDL_iconv(SDL_iconv_t cd,
  40. const char **inbuf, size_t *inbytesleft,
  41. char **outbuf, size_t *outbytesleft)
  42. {
  43. if ((size_t)cd == SDL_ICONV_ERROR) {
  44. return SDL_ICONV_ERROR;
  45. }
  46. /* iconv's second parameter may or may not be `const char const *` depending on the
  47. C runtime's whims. Casting to void * seems to make everyone happy, though. */
  48. const size_t retCode = iconv((iconv_t)((uintptr_t)cd), (void *)inbuf, inbytesleft, outbuf, outbytesleft);
  49. if (retCode == (size_t)-1) {
  50. switch (errno) {
  51. case E2BIG:
  52. return SDL_ICONV_E2BIG;
  53. case EILSEQ:
  54. return SDL_ICONV_EILSEQ;
  55. case EINVAL:
  56. return SDL_ICONV_EINVAL;
  57. default:
  58. return SDL_ICONV_ERROR;
  59. }
  60. }
  61. return retCode;
  62. }
  63. #else
  64. /* Lots of useful information on Unicode at:
  65. http://www.cl.cam.ac.uk/~mgk25/unicode.html
  66. */
  67. #define UNICODE_BOM 0xFEFF
  68. #define UNKNOWN_ASCII '?'
  69. #define UNKNOWN_UNICODE 0xFFFD
  70. enum
  71. {
  72. ENCODING_UNKNOWN,
  73. ENCODING_ASCII,
  74. ENCODING_LATIN1,
  75. ENCODING_UTF8,
  76. ENCODING_UTF16, // Needs byte order marker
  77. ENCODING_UTF16BE,
  78. ENCODING_UTF16LE,
  79. ENCODING_UTF32, // Needs byte order marker
  80. ENCODING_UTF32BE,
  81. ENCODING_UTF32LE,
  82. ENCODING_UCS2BE,
  83. ENCODING_UCS2LE,
  84. ENCODING_UCS4BE,
  85. ENCODING_UCS4LE,
  86. };
  87. #if SDL_BYTEORDER == SDL_BIG_ENDIAN
  88. #define ENCODING_UTF16NATIVE ENCODING_UTF16BE
  89. #define ENCODING_UTF32NATIVE ENCODING_UTF32BE
  90. #define ENCODING_UCS2NATIVE ENCODING_UCS2BE
  91. #define ENCODING_UCS4NATIVE ENCODING_UCS4BE
  92. #else
  93. #define ENCODING_UTF16NATIVE ENCODING_UTF16LE
  94. #define ENCODING_UTF32NATIVE ENCODING_UTF32LE
  95. #define ENCODING_UCS2NATIVE ENCODING_UCS2LE
  96. #define ENCODING_UCS4NATIVE ENCODING_UCS4LE
  97. #endif
  98. struct SDL_iconv_data_t
  99. {
  100. int src_fmt;
  101. int dst_fmt;
  102. };
  103. static struct
  104. {
  105. const char *name;
  106. int format;
  107. } encodings[] = {
  108. /* *INDENT-OFF* */ // clang-format off
  109. { "ASCII", ENCODING_ASCII },
  110. { "US-ASCII", ENCODING_ASCII },
  111. { "8859-1", ENCODING_LATIN1 },
  112. { "ISO-8859-1", ENCODING_LATIN1 },
  113. #if defined(SDL_PLATFORM_WINDOWS) || defined(SDL_PLATFORM_OS2)
  114. { "WCHAR_T", ENCODING_UTF16LE },
  115. #else
  116. { "WCHAR_T", ENCODING_UCS4NATIVE },
  117. #endif
  118. { "UTF8", ENCODING_UTF8 },
  119. { "UTF-8", ENCODING_UTF8 },
  120. { "UTF16", ENCODING_UTF16 },
  121. { "UTF-16", ENCODING_UTF16 },
  122. { "UTF16BE", ENCODING_UTF16BE },
  123. { "UTF-16BE", ENCODING_UTF16BE },
  124. { "UTF16LE", ENCODING_UTF16LE },
  125. { "UTF-16LE", ENCODING_UTF16LE },
  126. { "UTF32", ENCODING_UTF32 },
  127. { "UTF-32", ENCODING_UTF32 },
  128. { "UTF32BE", ENCODING_UTF32BE },
  129. { "UTF-32BE", ENCODING_UTF32BE },
  130. { "UTF32LE", ENCODING_UTF32LE },
  131. { "UTF-32LE", ENCODING_UTF32LE },
  132. { "UCS2", ENCODING_UCS2BE },
  133. { "UCS-2", ENCODING_UCS2BE },
  134. { "UCS-2LE", ENCODING_UCS2LE },
  135. { "UCS-2BE", ENCODING_UCS2BE },
  136. { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
  137. { "UCS4", ENCODING_UCS4BE },
  138. { "UCS-4", ENCODING_UCS4BE },
  139. { "UCS-4LE", ENCODING_UCS4LE },
  140. { "UCS-4BE", ENCODING_UCS4BE },
  141. { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
  142. /* *INDENT-ON* */ // clang-format on
  143. };
  144. static const char *getlocale(char *buffer, size_t bufsize)
  145. {
  146. const char *lang;
  147. char *ptr;
  148. lang = SDL_getenv("LC_ALL");
  149. if (!lang) {
  150. lang = SDL_getenv("LC_CTYPE");
  151. }
  152. if (!lang) {
  153. lang = SDL_getenv("LC_MESSAGES");
  154. }
  155. if (!lang) {
  156. lang = SDL_getenv("LANG");
  157. }
  158. if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
  159. lang = "ASCII";
  160. }
  161. // We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8"
  162. ptr = SDL_strchr(lang, '.');
  163. if (ptr) {
  164. lang = ptr + 1;
  165. }
  166. SDL_strlcpy(buffer, lang, bufsize);
  167. ptr = SDL_strchr(buffer, '@');
  168. if (ptr) {
  169. *ptr = '\0'; // chop end of string.
  170. }
  171. return buffer;
  172. }
  173. SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
  174. {
  175. int src_fmt = ENCODING_UNKNOWN;
  176. int dst_fmt = ENCODING_UNKNOWN;
  177. int i;
  178. char fromcode_buffer[64];
  179. char tocode_buffer[64];
  180. if (!fromcode || !*fromcode) {
  181. fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
  182. }
  183. if (!tocode || !*tocode) {
  184. tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
  185. }
  186. for (i = 0; i < SDL_arraysize(encodings); ++i) {
  187. if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
  188. src_fmt = encodings[i].format;
  189. if (dst_fmt != ENCODING_UNKNOWN) {
  190. break;
  191. }
  192. }
  193. if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
  194. dst_fmt = encodings[i].format;
  195. if (src_fmt != ENCODING_UNKNOWN) {
  196. break;
  197. }
  198. }
  199. }
  200. if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
  201. SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd));
  202. if (cd) {
  203. cd->src_fmt = src_fmt;
  204. cd->dst_fmt = dst_fmt;
  205. return cd;
  206. }
  207. }
  208. return (SDL_iconv_t)-1;
  209. }
  210. size_t SDL_iconv(SDL_iconv_t cd,
  211. const char **inbuf, size_t *inbytesleft,
  212. char **outbuf, size_t *outbytesleft)
  213. {
  214. // For simplicity, we'll convert everything to and from UCS-4
  215. const char *src;
  216. char *dst;
  217. size_t srclen, dstlen;
  218. Uint32 ch = 0;
  219. size_t total;
  220. if ((size_t)cd == SDL_ICONV_ERROR) {
  221. return SDL_ICONV_ERROR;
  222. }
  223. if (!inbuf || !*inbuf) {
  224. // Reset the context
  225. return 0;
  226. }
  227. if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
  228. return SDL_ICONV_E2BIG;
  229. }
  230. src = *inbuf;
  231. srclen = (inbytesleft ? *inbytesleft : 0);
  232. dst = *outbuf;
  233. dstlen = *outbytesleft;
  234. switch (cd->src_fmt) {
  235. case ENCODING_UTF16:
  236. // Scan for a byte order marker
  237. {
  238. Uint8 *p = (Uint8 *)src;
  239. size_t n = srclen / 2;
  240. while (n) {
  241. if (p[0] == 0xFF && p[1] == 0xFE) {
  242. cd->src_fmt = ENCODING_UTF16BE;
  243. break;
  244. } else if (p[0] == 0xFE && p[1] == 0xFF) {
  245. cd->src_fmt = ENCODING_UTF16LE;
  246. break;
  247. }
  248. p += 2;
  249. --n;
  250. }
  251. if (n == 0) {
  252. // We can't tell, default to host order
  253. cd->src_fmt = ENCODING_UTF16NATIVE;
  254. }
  255. }
  256. break;
  257. case ENCODING_UTF32:
  258. // Scan for a byte order marker
  259. {
  260. Uint8 *p = (Uint8 *)src;
  261. size_t n = srclen / 4;
  262. while (n) {
  263. if (p[0] == 0xFF && p[1] == 0xFE &&
  264. p[2] == 0x00 && p[3] == 0x00) {
  265. cd->src_fmt = ENCODING_UTF32BE;
  266. break;
  267. } else if (p[0] == 0x00 && p[1] == 0x00 &&
  268. p[2] == 0xFE && p[3] == 0xFF) {
  269. cd->src_fmt = ENCODING_UTF32LE;
  270. break;
  271. }
  272. p += 4;
  273. --n;
  274. }
  275. if (n == 0) {
  276. // We can't tell, default to host order
  277. cd->src_fmt = ENCODING_UTF32NATIVE;
  278. }
  279. }
  280. break;
  281. }
  282. switch (cd->dst_fmt) {
  283. case ENCODING_UTF16:
  284. // Default to host order, need to add byte order marker
  285. if (dstlen < 2) {
  286. return SDL_ICONV_E2BIG;
  287. }
  288. *(Uint16 *)dst = UNICODE_BOM;
  289. dst += 2;
  290. dstlen -= 2;
  291. cd->dst_fmt = ENCODING_UTF16NATIVE;
  292. break;
  293. case ENCODING_UTF32:
  294. // Default to host order, need to add byte order marker
  295. if (dstlen < 4) {
  296. return SDL_ICONV_E2BIG;
  297. }
  298. *(Uint32 *)dst = UNICODE_BOM;
  299. dst += 4;
  300. dstlen -= 4;
  301. cd->dst_fmt = ENCODING_UTF32NATIVE;
  302. break;
  303. }
  304. total = 0;
  305. while (srclen > 0) {
  306. // Decode a character
  307. switch (cd->src_fmt) {
  308. case ENCODING_ASCII:
  309. {
  310. Uint8 *p = (Uint8 *)src;
  311. ch = (Uint32)(p[0] & 0x7F);
  312. ++src;
  313. --srclen;
  314. } break;
  315. case ENCODING_LATIN1:
  316. {
  317. Uint8 *p = (Uint8 *)src;
  318. ch = (Uint32)p[0];
  319. ++src;
  320. --srclen;
  321. } break;
  322. case ENCODING_UTF8: // RFC 3629
  323. {
  324. Uint8 *p = (Uint8 *)src;
  325. size_t left = 0;
  326. bool overlong = false;
  327. if (p[0] >= 0xF0) {
  328. if ((p[0] & 0xF8) != 0xF0) {
  329. /* Skip illegal sequences
  330. return SDL_ICONV_EILSEQ;
  331. */
  332. ch = UNKNOWN_UNICODE;
  333. } else {
  334. if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) {
  335. overlong = true;
  336. }
  337. ch = (Uint32)(p[0] & 0x07);
  338. left = 3;
  339. }
  340. } else if (p[0] >= 0xE0) {
  341. if ((p[0] & 0xF0) != 0xE0) {
  342. /* Skip illegal sequences
  343. return SDL_ICONV_EILSEQ;
  344. */
  345. ch = UNKNOWN_UNICODE;
  346. } else {
  347. if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) {
  348. overlong = true;
  349. }
  350. ch = (Uint32)(p[0] & 0x0F);
  351. left = 2;
  352. }
  353. } else if (p[0] >= 0xC0) {
  354. if ((p[0] & 0xE0) != 0xC0) {
  355. /* Skip illegal sequences
  356. return SDL_ICONV_EILSEQ;
  357. */
  358. ch = UNKNOWN_UNICODE;
  359. } else {
  360. if ((p[0] & 0xDE) == 0xC0) {
  361. overlong = true;
  362. }
  363. ch = (Uint32)(p[0] & 0x1F);
  364. left = 1;
  365. }
  366. } else {
  367. if (p[0] & 0x80) {
  368. /* Skip illegal sequences
  369. return SDL_ICONV_EILSEQ;
  370. */
  371. ch = UNKNOWN_UNICODE;
  372. } else {
  373. ch = (Uint32)p[0];
  374. }
  375. }
  376. ++src;
  377. --srclen;
  378. if (srclen < left) {
  379. return SDL_ICONV_EINVAL;
  380. }
  381. while (left--) {
  382. ++p;
  383. if ((p[0] & 0xC0) != 0x80) {
  384. /* Skip illegal sequences
  385. return SDL_ICONV_EILSEQ;
  386. */
  387. ch = UNKNOWN_UNICODE;
  388. break;
  389. }
  390. ch <<= 6;
  391. ch |= (p[0] & 0x3F);
  392. ++src;
  393. --srclen;
  394. }
  395. if (overlong) {
  396. /* Potential security risk
  397. return SDL_ICONV_EILSEQ;
  398. */
  399. ch = UNKNOWN_UNICODE;
  400. }
  401. if ((ch >= 0xD800 && ch <= 0xDFFF) ||
  402. (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
  403. /* Skip illegal sequences
  404. return SDL_ICONV_EILSEQ;
  405. */
  406. ch = UNKNOWN_UNICODE;
  407. }
  408. } break;
  409. case ENCODING_UTF16BE: // RFC 2781
  410. {
  411. Uint8 *p = (Uint8 *)src;
  412. Uint16 W1, W2;
  413. if (srclen < 2) {
  414. return SDL_ICONV_EINVAL;
  415. }
  416. W1 = ((Uint16)p[0] << 8) | (Uint16)p[1];
  417. src += 2;
  418. srclen -= 2;
  419. if (W1 < 0xD800 || W1 > 0xDFFF) {
  420. ch = (Uint32)W1;
  421. break;
  422. }
  423. if (W1 > 0xDBFF) {
  424. /* Skip illegal sequences
  425. return SDL_ICONV_EILSEQ;
  426. */
  427. ch = UNKNOWN_UNICODE;
  428. break;
  429. }
  430. if (srclen < 2) {
  431. return SDL_ICONV_EINVAL;
  432. }
  433. p = (Uint8 *)src;
  434. W2 = ((Uint16)p[0] << 8) | (Uint16)p[1];
  435. src += 2;
  436. srclen -= 2;
  437. if (W2 < 0xDC00 || W2 > 0xDFFF) {
  438. /* Skip illegal sequences
  439. return SDL_ICONV_EILSEQ;
  440. */
  441. ch = UNKNOWN_UNICODE;
  442. break;
  443. }
  444. ch = (((Uint32)(W1 & 0x3FF) << 10) |
  445. (Uint32)(W2 & 0x3FF)) +
  446. 0x10000;
  447. } break;
  448. case ENCODING_UTF16LE: // RFC 2781
  449. {
  450. Uint8 *p = (Uint8 *)src;
  451. Uint16 W1, W2;
  452. if (srclen < 2) {
  453. return SDL_ICONV_EINVAL;
  454. }
  455. W1 = ((Uint16)p[1] << 8) | (Uint16)p[0];
  456. src += 2;
  457. srclen -= 2;
  458. if (W1 < 0xD800 || W1 > 0xDFFF) {
  459. ch = (Uint32)W1;
  460. break;
  461. }
  462. if (W1 > 0xDBFF) {
  463. /* Skip illegal sequences
  464. return SDL_ICONV_EILSEQ;
  465. */
  466. ch = UNKNOWN_UNICODE;
  467. break;
  468. }
  469. if (srclen < 2) {
  470. return SDL_ICONV_EINVAL;
  471. }
  472. p = (Uint8 *)src;
  473. W2 = ((Uint16)p[1] << 8) | (Uint16)p[0];
  474. src += 2;
  475. srclen -= 2;
  476. if (W2 < 0xDC00 || W2 > 0xDFFF) {
  477. /* Skip illegal sequences
  478. return SDL_ICONV_EILSEQ;
  479. */
  480. ch = UNKNOWN_UNICODE;
  481. break;
  482. }
  483. ch = (((Uint32)(W1 & 0x3FF) << 10) |
  484. (Uint32)(W2 & 0x3FF)) +
  485. 0x10000;
  486. } break;
  487. case ENCODING_UCS2LE:
  488. {
  489. Uint8 *p = (Uint8 *)src;
  490. if (srclen < 2) {
  491. return SDL_ICONV_EINVAL;
  492. }
  493. ch = ((Uint32)p[1] << 8) | (Uint32)p[0];
  494. src += 2;
  495. srclen -= 2;
  496. } break;
  497. case ENCODING_UCS2BE:
  498. {
  499. Uint8 *p = (Uint8 *)src;
  500. if (srclen < 2) {
  501. return SDL_ICONV_EINVAL;
  502. }
  503. ch = ((Uint32)p[0] << 8) | (Uint32)p[1];
  504. src += 2;
  505. srclen -= 2;
  506. } break;
  507. case ENCODING_UCS4BE:
  508. case ENCODING_UTF32BE:
  509. {
  510. Uint8 *p = (Uint8 *)src;
  511. if (srclen < 4) {
  512. return SDL_ICONV_EINVAL;
  513. }
  514. ch = ((Uint32)p[0] << 24) |
  515. ((Uint32)p[1] << 16) |
  516. ((Uint32)p[2] << 8) | (Uint32)p[3];
  517. src += 4;
  518. srclen -= 4;
  519. } break;
  520. case ENCODING_UCS4LE:
  521. case ENCODING_UTF32LE:
  522. {
  523. Uint8 *p = (Uint8 *)src;
  524. if (srclen < 4) {
  525. return SDL_ICONV_EINVAL;
  526. }
  527. ch = ((Uint32)p[3] << 24) |
  528. ((Uint32)p[2] << 16) |
  529. ((Uint32)p[1] << 8) | (Uint32)p[0];
  530. src += 4;
  531. srclen -= 4;
  532. } break;
  533. }
  534. // Encode a character
  535. switch (cd->dst_fmt) {
  536. case ENCODING_ASCII:
  537. {
  538. Uint8 *p = (Uint8 *)dst;
  539. if (dstlen < 1) {
  540. return SDL_ICONV_E2BIG;
  541. }
  542. if (ch > 0x7F) {
  543. *p = UNKNOWN_ASCII;
  544. } else {
  545. *p = (Uint8)ch;
  546. }
  547. ++dst;
  548. --dstlen;
  549. } break;
  550. case ENCODING_LATIN1:
  551. {
  552. Uint8 *p = (Uint8 *)dst;
  553. if (dstlen < 1) {
  554. return SDL_ICONV_E2BIG;
  555. }
  556. if (ch > 0xFF) {
  557. *p = UNKNOWN_ASCII;
  558. } else {
  559. *p = (Uint8)ch;
  560. }
  561. ++dst;
  562. --dstlen;
  563. } break;
  564. case ENCODING_UTF8: // RFC 3629
  565. {
  566. Uint8 *p = (Uint8 *)dst;
  567. if (ch > 0x10FFFF) {
  568. ch = UNKNOWN_UNICODE;
  569. }
  570. if (ch <= 0x7F) {
  571. if (dstlen < 1) {
  572. return SDL_ICONV_E2BIG;
  573. }
  574. *p = (Uint8)ch;
  575. ++dst;
  576. --dstlen;
  577. } else if (ch <= 0x7FF) {
  578. if (dstlen < 2) {
  579. return SDL_ICONV_E2BIG;
  580. }
  581. p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
  582. p[1] = 0x80 | (Uint8)(ch & 0x3F);
  583. dst += 2;
  584. dstlen -= 2;
  585. } else if (ch <= 0xFFFF) {
  586. if (dstlen < 3) {
  587. return SDL_ICONV_E2BIG;
  588. }
  589. p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
  590. p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
  591. p[2] = 0x80 | (Uint8)(ch & 0x3F);
  592. dst += 3;
  593. dstlen -= 3;
  594. } else {
  595. if (dstlen < 4) {
  596. return SDL_ICONV_E2BIG;
  597. }
  598. p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
  599. p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
  600. p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
  601. p[3] = 0x80 | (Uint8)(ch & 0x3F);
  602. dst += 4;
  603. dstlen -= 4;
  604. }
  605. } break;
  606. case ENCODING_UTF16BE: // RFC 2781
  607. {
  608. Uint8 *p = (Uint8 *)dst;
  609. if (ch > 0x10FFFF) {
  610. ch = UNKNOWN_UNICODE;
  611. }
  612. if (ch < 0x10000) {
  613. if (dstlen < 2) {
  614. return SDL_ICONV_E2BIG;
  615. }
  616. p[0] = (Uint8)(ch >> 8);
  617. p[1] = (Uint8)ch;
  618. dst += 2;
  619. dstlen -= 2;
  620. } else {
  621. Uint16 W1, W2;
  622. if (dstlen < 4) {
  623. return SDL_ICONV_E2BIG;
  624. }
  625. ch = ch - 0x10000;
  626. W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
  627. W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
  628. p[0] = (Uint8)(W1 >> 8);
  629. p[1] = (Uint8)W1;
  630. p[2] = (Uint8)(W2 >> 8);
  631. p[3] = (Uint8)W2;
  632. dst += 4;
  633. dstlen -= 4;
  634. }
  635. } break;
  636. case ENCODING_UTF16LE: // RFC 2781
  637. {
  638. Uint8 *p = (Uint8 *)dst;
  639. if (ch > 0x10FFFF) {
  640. ch = UNKNOWN_UNICODE;
  641. }
  642. if (ch < 0x10000) {
  643. if (dstlen < 2) {
  644. return SDL_ICONV_E2BIG;
  645. }
  646. p[1] = (Uint8)(ch >> 8);
  647. p[0] = (Uint8)ch;
  648. dst += 2;
  649. dstlen -= 2;
  650. } else {
  651. Uint16 W1, W2;
  652. if (dstlen < 4) {
  653. return SDL_ICONV_E2BIG;
  654. }
  655. ch = ch - 0x10000;
  656. W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
  657. W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
  658. p[1] = (Uint8)(W1 >> 8);
  659. p[0] = (Uint8)W1;
  660. p[3] = (Uint8)(W2 >> 8);
  661. p[2] = (Uint8)W2;
  662. dst += 4;
  663. dstlen -= 4;
  664. }
  665. } break;
  666. case ENCODING_UCS2BE:
  667. {
  668. Uint8 *p = (Uint8 *)dst;
  669. if (ch > 0xFFFF) {
  670. ch = UNKNOWN_UNICODE;
  671. }
  672. if (dstlen < 2) {
  673. return SDL_ICONV_E2BIG;
  674. }
  675. p[0] = (Uint8)(ch >> 8);
  676. p[1] = (Uint8)ch;
  677. dst += 2;
  678. dstlen -= 2;
  679. } break;
  680. case ENCODING_UCS2LE:
  681. {
  682. Uint8 *p = (Uint8 *)dst;
  683. if (ch > 0xFFFF) {
  684. ch = UNKNOWN_UNICODE;
  685. }
  686. if (dstlen < 2) {
  687. return SDL_ICONV_E2BIG;
  688. }
  689. p[1] = (Uint8)(ch >> 8);
  690. p[0] = (Uint8)ch;
  691. dst += 2;
  692. dstlen -= 2;
  693. } break;
  694. case ENCODING_UTF32BE:
  695. if (ch > 0x10FFFF) {
  696. ch = UNKNOWN_UNICODE;
  697. }
  698. SDL_FALLTHROUGH;
  699. case ENCODING_UCS4BE:
  700. if (ch > 0x7FFFFFFF) {
  701. ch = UNKNOWN_UNICODE;
  702. }
  703. {
  704. Uint8 *p = (Uint8 *)dst;
  705. if (dstlen < 4) {
  706. return SDL_ICONV_E2BIG;
  707. }
  708. p[0] = (Uint8)(ch >> 24);
  709. p[1] = (Uint8)(ch >> 16);
  710. p[2] = (Uint8)(ch >> 8);
  711. p[3] = (Uint8)ch;
  712. dst += 4;
  713. dstlen -= 4;
  714. }
  715. break;
  716. case ENCODING_UTF32LE:
  717. if (ch > 0x10FFFF) {
  718. ch = UNKNOWN_UNICODE;
  719. }
  720. SDL_FALLTHROUGH;
  721. case ENCODING_UCS4LE:
  722. if (ch > 0x7FFFFFFF) {
  723. ch = UNKNOWN_UNICODE;
  724. }
  725. {
  726. Uint8 *p = (Uint8 *)dst;
  727. if (dstlen < 4) {
  728. return SDL_ICONV_E2BIG;
  729. }
  730. p[3] = (Uint8)(ch >> 24);
  731. p[2] = (Uint8)(ch >> 16);
  732. p[1] = (Uint8)(ch >> 8);
  733. p[0] = (Uint8)ch;
  734. dst += 4;
  735. dstlen -= 4;
  736. }
  737. break;
  738. }
  739. // Update state
  740. *inbuf = src;
  741. *inbytesleft = srclen;
  742. *outbuf = dst;
  743. *outbytesleft = dstlen;
  744. ++total;
  745. }
  746. return total;
  747. }
  748. int SDL_iconv_close(SDL_iconv_t cd)
  749. {
  750. if (cd == (SDL_iconv_t)-1) {
  751. return -1;
  752. }
  753. SDL_free(cd);
  754. return 0;
  755. }
  756. #endif // !HAVE_ICONV
  757. char *SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)
  758. {
  759. SDL_iconv_t cd;
  760. char *string;
  761. size_t stringsize;
  762. char *outbuf;
  763. size_t outbytesleft;
  764. size_t retCode = 0;
  765. if (!tocode || !*tocode) {
  766. tocode = "UTF-8";
  767. }
  768. if (!fromcode || !*fromcode) {
  769. fromcode = "UTF-8";
  770. }
  771. cd = SDL_iconv_open(tocode, fromcode);
  772. if (cd == (SDL_iconv_t)-1) {
  773. return NULL;
  774. }
  775. stringsize = inbytesleft;
  776. string = (char *)SDL_malloc(stringsize + sizeof(Uint32));
  777. if (!string) {
  778. SDL_iconv_close(cd);
  779. return NULL;
  780. }
  781. outbuf = string;
  782. outbytesleft = stringsize;
  783. SDL_memset(outbuf, 0, sizeof(Uint32));
  784. while (inbytesleft > 0) {
  785. const size_t oldinbytesleft = inbytesleft;
  786. retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
  787. switch (retCode) {
  788. case SDL_ICONV_E2BIG:
  789. {
  790. const ptrdiff_t diff = (ptrdiff_t) (outbuf - string);
  791. char *oldstring = string;
  792. stringsize *= 2;
  793. string = (char *)SDL_realloc(string, stringsize + sizeof(Uint32));
  794. if (!string) {
  795. SDL_free(oldstring);
  796. SDL_iconv_close(cd);
  797. return NULL;
  798. }
  799. outbuf = string + diff;
  800. outbytesleft = stringsize - diff;
  801. SDL_memset(outbuf, 0, sizeof(Uint32));
  802. continue;
  803. }
  804. case SDL_ICONV_EILSEQ:
  805. // Try skipping some input data - not perfect, but...
  806. ++inbuf;
  807. --inbytesleft;
  808. break;
  809. case SDL_ICONV_EINVAL:
  810. case SDL_ICONV_ERROR:
  811. // We can't continue...
  812. inbytesleft = 0;
  813. break;
  814. }
  815. // Avoid infinite loops when nothing gets converted
  816. if (oldinbytesleft == inbytesleft) {
  817. break;
  818. }
  819. }
  820. SDL_memset(outbuf, 0, sizeof(Uint32));
  821. SDL_iconv_close(cd);
  822. return string;
  823. }