unicode: add utf16decode with utf8 buffer size check · tsiry-sandratraina.com/rockbox-zig@1f548f7

+2 -2

apps/cuesheet.c

··· 265 265 { 266 266 if (char_enc == CHAR_ENC_UTF_16_LE) 267 267 { 268 - s = utf16LEdecode(line, utf16_buf, line_len); 268 + s = utf16decode(line, utf16_buf, line_len>>1, sizeof(utf16_buf) - 1, true); 269 269 /* terminate the string at the newline */ 270 270 *s = '\0'; 271 271 strcpy(line, utf16_buf); ··· 275 275 } 276 276 else if (char_enc == CHAR_ENC_UTF_16_BE) 277 277 { 278 - s = utf16BEdecode(line, utf16_buf, line_len); 278 + s = utf16decode(line, utf16_buf, line_len>>1, sizeof(utf16_buf) - 1, false); 279 279 *s = '\0'; 280 280 strcpy(line, utf16_buf); 281 281 }

+23 -26

firmware/common/unicode.c

··· 265 265 return utf8; 266 266 } 267 267 268 - static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size) 268 + FORCE_INLINE static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size) 269 269 { 270 270 const int tail = utf8_ucs_get_extra_bytes_count(ucs); 271 271 *utf8_size -= tail + 1; ··· 420 420 return utf8; 421 421 } 422 422 423 - /* Recode a UTF-16 string with little-endian byte ordering to UTF-8 */ 424 - unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, 425 - int count) 423 + unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8, 424 + int count, int utf8_size, bool le) 426 425 { 426 + if (utf8_size == -1) 427 + utf8_size = INT_MAX; 428 + 429 + // little-endian flag is used as significant byte index 430 + if (le) 431 + le = 1; 432 + 427 433 unsigned long ucs; 428 434 429 - while (count > 0) { 435 + while (count > 0 && utf8_size > 0) { 430 436 /* Check for a surrogate pair */ 431 - if (utf16[1] >= 0xD8 && utf16[1] < 0xE0) { 432 - ucs = 0x10000 + ((utf16[0] << 10) | ((utf16[1] - 0xD8) << 18) 433 - | utf16[2] | ((utf16[3] - 0xDC) << 8)); 437 + if (*(utf16 + le) >= 0xD8 && *(utf16 + le) < 0xE0) { 438 + ucs = 0x10000 + ((utf16[1 - le] << 10) | ((utf16[le] - 0xD8) << 18) 439 + | utf16[2 + (1 - le)] | ((utf16[2 + le] - 0xDC) << 8)); 434 440 utf16 += 4; 435 441 count -= 2; 436 442 } else { 437 - ucs = getle16(utf16); 443 + ucs = utf16[le] << 8 | utf16[1 - le]; 438 444 utf16 += 2; 439 445 count -= 1; 440 446 } 441 - utf8 = utf8encode(ucs, utf8); 447 + utf8 = utf8encode_ex(ucs, utf8, &utf8_size); 442 448 } 443 449 return utf8; 444 450 } 445 451 446 452 /* Recode a UTF-16 string with big-endian byte ordering to UTF-8 */ 447 - unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, 453 + unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, 448 454 int count) 449 455 { 450 - unsigned long ucs; 456 + return utf16decode(utf16, utf8, count, -1, true); 457 + } 451 458 452 - while (count > 0) { 453 - if (*utf16 >= 0xD8 && *utf16 < 0xE0) { /* Check for a surrogate pair */ 454 - ucs = 0x10000 + (((utf16[0] - 0xD8) << 18) | (utf16[1] << 10) 455 - | ((utf16[2] - 0xDC) << 8) | utf16[3]); 456 - utf16 += 4; 457 - count -= 2; 458 - } else { 459 - ucs = getbe16(utf16); 460 - utf16 += 2; 461 - count -= 1; 462 - } 463 - utf8 = utf8encode(ucs, utf8); 464 - } 465 - return utf8; 459 + unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, 460 + int count) 461 + { 462 + return utf16decode(utf16, utf8, count, -1, false); 466 463 } 467 464 468 465 bool utf16_has_bom(const unsigned char *utf16, bool *le)

+1

firmware/include/rbunicode.h

··· 61 61 62 62 unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count); 63 63 unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count); 64 + unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8, int count, int utf8_size, bool le); 64 65 bool utf16_has_bom(const unsigned char *utf16, bool *le); 65 66 unsigned long utf8length(const unsigned char *utf8); 66 67 const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs);

+8 -14

lib/rbcodec/metadata/id3tags.c

··· 574 574 unsigned char *str = string; 575 575 unsigned char* utf8 = utf8buf; 576 576 577 - int i = 0; 578 - int templen = 0; 579 - 580 577 switch (str[0]) { 581 578 case 0x01: /* Unicode with or without BOM */ 582 579 case 0x02: 583 580 (*len)--; 584 581 str++; 585 582 bool le; 586 - 587 - 583 + int i = 0; 588 584 /* Handle frames with more than one string 589 585 (needed for TXXX frames).*/ 590 586 do { ··· 593 589 str += BOM_UTF_16_SIZE; 594 590 *len -= BOM_UTF_16_SIZE; 595 591 } 592 + string = str; 596 593 597 594 while ((i < *len) && (str[0] || str[1])) { 598 - if(le) 599 - utf8 = utf16LEdecode(str, utf8, 1); 600 - else 601 - utf8 = utf16BEdecode(str, utf8, 1); 602 - 603 595 str+=2; 604 596 i += 2; 605 597 } 606 598 599 + utf8 = utf16decode(string, utf8, (str-string)>>1 /*(str-string)/2*/, utf8buf_size, le); 607 600 *utf8++ = 0; /* Terminate the string */ 608 - templen += (strlen(&utf8buf[templen]) + 1); 601 + utf8buf_size -= utf8 - utf8buf; 609 602 str += 2; 610 - i+=2; 611 - } while(i < *len); 612 - *len = templen - 1; 603 + i += 2; 604 + } while(i < *len && utf8buf_size > 0); 605 + *len = utf8 - utf8buf - 1; 613 606 break; 607 + 614 608 /* case 0x03: UTF-8 encoded string handled by parse_as_utf8 */ 615 609 616 610 case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */

Configure Feed

Configure Feed