unicode: add iso_decode_ex with utf8 buffer size check · tsiry-sandratraina.com/rockbox-zig@004304d

+2 -4

apps/cuesheet.c

··· 314 314 break; 315 315 316 316 size_t count = MAX_NAME*3 + 1; 317 - size_t count8859 = MAX_NAME; 318 317 319 318 switch (option) 320 319 { ··· 339 338 340 339 dest = cue->file; 341 340 count = MAX_PATH; 342 - count8859 = MAX_PATH/3; 343 341 break; 344 342 case eCS_TRACK: 345 343 /*Fall-Through*/ ··· 357 355 { 358 356 if (char_enc == CHAR_ENC_ISO_8859_1) 359 357 { 360 - dest = iso_decode(string, dest, -1, 361 - MIN(strlen(string), count8859)); 358 + dest = iso_decode_ex(string, dest, -1, 359 + strlen(string), count - 1); 362 360 *dest = '\0'; 363 361 } 364 362 else

+1 -8

apps/playlist.c

··· 350 350 buf_len = i; 351 351 dest = temp; 352 352 353 - /* Convert char by char, so as to not overflow temp (iso_decode should 354 - * preferably handle this). No more than 4 bytes should be generated for 355 - * each input char. 356 - */ 357 - for (i = 0; i < buf_len && dest < (temp + buf_max - 4); i++) 358 - { 359 - dest = iso_decode(&buf[i], dest, -1, 1); 360 - } 353 + dest = iso_decode_ex(buf, dest, -1, buf_len, buf_max - 1); 361 354 362 355 *dest = 0; 363 356 strcpy(buf, temp);

+35 -7

firmware/common/unicode.c

··· 245 245 return -1; 246 246 } 247 247 248 - /* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */ 249 - unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8) 248 + /* returns number of additional bytes required in encoded string (bytes_count - 1) */ 249 + static int utf8_ucs_get_extra_bytes_count(unsigned long ucs) 250 250 { 251 251 int tail = 0; 252 252 ··· 254 254 while (ucs >> (5*tail + 6)) 255 255 tail++; 256 256 257 + return tail; 258 + } 259 + 260 + static unsigned char * utf8encode_internal(unsigned long ucs, unsigned char *utf8, int tail) 261 + { 257 262 *utf8++ = (ucs >> (6*tail)) | utf8comp[tail]; 258 263 while (tail--) 259 264 *utf8++ = ((ucs >> (6*tail)) & (MASK ^ 0xFF)) | COMP; 265 + return utf8; 266 + } 260 267 261 - return utf8; 268 + static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size) 269 + { 270 + const int tail = utf8_ucs_get_extra_bytes_count(ucs); 271 + *utf8_size -= tail + 1; 272 + return *utf8_size < 0 ? utf8 : utf8encode_internal(ucs, utf8, tail); 273 + } 274 + 275 + /* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */ 276 + unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8) 277 + { 278 + return utf8encode_internal(ucs, utf8, utf8_ucs_get_extra_bytes_count(ucs)); 279 + } 280 + 281 + unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, int cp, int count) 282 + { 283 + return iso_decode_ex(iso, utf8, cp, count, -1); 262 284 } 263 285 264 286 /* Recode an iso encoded string to UTF-8 */ 265 - unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, 266 - int cp, int count) 287 + unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size) 267 288 { 289 + if (utf8_size == -1) 290 + utf8_size = INT_MAX; 291 + 268 292 uint16_t *table = NULL; 269 293 270 294 cp_lock_enter(); ··· 322 346 323 347 cp_lock_leave(); 324 348 325 - while (count--) { 349 + while (count-- && utf8_size > 0) { 326 350 unsigned short ucs, tmp; 327 351 328 352 if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */ 353 + { 329 354 *utf8++ = *iso++; 355 + --utf8_size; 356 + } 330 357 331 358 else { 332 359 /* tid tells us which table to use and how */ ··· 375 402 376 403 if (ucs == 0) /* unknown char, use replacement char */ 377 404 ucs = 0xfffd; 378 - utf8 = utf8encode(ucs, utf8); 405 + 406 + utf8 = utf8encode_ex(ucs, utf8, &utf8_size); 379 407 } 380 408 } 381 409

+2

firmware/include/rbunicode.h

··· 57 57 /* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */ 58 58 unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8); 59 59 unsigned char* iso_decode(const unsigned char *latin1, unsigned char *utf8, int cp, int count); 60 + unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size); 61 + 60 62 unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count); 61 63 unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count); 62 64 bool utf16_has_bom(const unsigned char *utf16, bool *le);

+15 -17

lib/rbcodec/metadata/id3tags.c

··· 568 568 569 569 /* Must be called after parse_as_utf8. Checks to see if the passed in string is a 16-bit wide Unicode v2 570 570 string. If it is, we convert it to a UTF-8 string. If it's not unicode, 571 - we convert from the default codepage */ 572 - static void unicode_munge(char* string, char* utf8buf, int *len) { 571 + we convert from the default codepage 572 + NOTE: real UTF-8 buffer size is expected to be utf8buf_size + 1 (additional byte for string terminator) */ 573 + static void unicode_munge(unsigned char* string, unsigned char* utf8buf, int *len, int utf8buf_size) { 574 + unsigned char *str = string; 575 + unsigned char* utf8 = utf8buf; 576 + 573 577 int i = 0; 574 - unsigned char *str = (unsigned char *)string; 575 578 int templen = 0; 576 - unsigned char* utf8 = (unsigned char *)utf8buf; 577 579 578 580 switch (str[0]) { 579 - case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */ 580 - str++; 581 - (*len)--; 582 - utf8 = iso_decode(str, utf8, -1, *len); 583 - *utf8 = 0; 584 - *len = (intptr_t)utf8 - (intptr_t)utf8buf; 585 - break; 586 - 587 581 case 0x01: /* Unicode with or without BOM */ 588 582 case 0x02: 589 583 (*len)--; ··· 618 612 *len = templen - 1; 619 613 break; 620 614 /* case 0x03: UTF-8 encoded string handled by parse_as_utf8 */ 615 + 616 + case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */ 617 + str++; 618 + (*len)--; 619 + //fallthrough 621 620 default: /* Plain old string */ 622 - utf8 = iso_decode(str, utf8, -1, *len); 621 + utf8 = iso_decode_ex(str, utf8, -1, *len, utf8buf_size); 623 622 *utf8 = 0; 624 - *len = (intptr_t)utf8 - (intptr_t)utf8buf; 623 + *len = utf8 - utf8buf; 625 624 break; 626 625 } 627 626 } ··· 1103 1102 { 1104 1103 //limit stack allocation to avoid stack overflow 1105 1104 utf8_size = ID3V2_BUF_SIZE; 1106 - bytesread = ID3V2_BUF_SIZE/3; 1107 1105 } 1108 - char utf8buf[utf8_size + 1]; 1109 - unicode_munge( tag, utf8buf, &bytesread); 1106 + unsigned char utf8buf[utf8_size + 1]; 1107 + unicode_munge( (unsigned char *)tag, utf8buf, &bytesread, utf8_size); 1110 1108 if(bytesread >= buffersize - bufferpos) 1111 1109 bytesread = buffersize - bufferpos - 1; 1112 1110

Configure Feed

Configure Feed