···350350 buf_len = i;
351351 dest = temp;
352352353353- /* Convert char by char, so as to not overflow temp (iso_decode should
354354- * preferably handle this). No more than 4 bytes should be generated for
355355- * each input char.
356356- */
357357- for (i = 0; i < buf_len && dest < (temp + buf_max - 4); i++)
358358- {
359359- dest = iso_decode(&buf[i], dest, -1, 1);
360360- }
353353+ dest = iso_decode_ex(buf, dest, -1, buf_len, buf_max - 1);
361354362355 *dest = 0;
363356 strcpy(buf, temp);
+35-7
firmware/common/unicode.c
···245245 return -1;
246246}
247247248248-/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
249249-unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
248248+/* returns number of additional bytes required in encoded string (bytes_count - 1) */
249249+static int utf8_ucs_get_extra_bytes_count(unsigned long ucs)
250250{
251251 int tail = 0;
252252···254254 while (ucs >> (5*tail + 6))
255255 tail++;
256256257257+ return tail;
258258+}
259259+260260+static unsigned char * utf8encode_internal(unsigned long ucs, unsigned char *utf8, int tail)
261261+{
257262 *utf8++ = (ucs >> (6*tail)) | utf8comp[tail];
258263 while (tail--)
259264 *utf8++ = ((ucs >> (6*tail)) & (MASK ^ 0xFF)) | COMP;
265265+ return utf8;
266266+}
260267261261- return utf8;
268268+static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size)
269269+{
270270+ const int tail = utf8_ucs_get_extra_bytes_count(ucs);
271271+ *utf8_size -= tail + 1;
272272+ return *utf8_size < 0 ? utf8 : utf8encode_internal(ucs, utf8, tail);
273273+}
274274+275275+/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
276276+unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
277277+{
278278+ return utf8encode_internal(ucs, utf8, utf8_ucs_get_extra_bytes_count(ucs));
279279+}
280280+281281+unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, int cp, int count)
282282+{
283283+ return iso_decode_ex(iso, utf8, cp, count, -1);
262284}
263285264286/* Recode an iso encoded string to UTF-8 */
265265-unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
266266- int cp, int count)
287287+unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size)
267288{
289289+ if (utf8_size == -1)
290290+ utf8_size = INT_MAX;
291291+268292 uint16_t *table = NULL;
269293270294 cp_lock_enter();
···322346323347 cp_lock_leave();
324348325325- while (count--) {
349349+ while (count-- && utf8_size > 0) {
326350 unsigned short ucs, tmp;
327351328352 if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */
353353+ {
329354 *utf8++ = *iso++;
355355+ --utf8_size;
356356+ }
330357331358 else {
332359 /* tid tells us which table to use and how */
···375402376403 if (ucs == 0) /* unknown char, use replacement char */
377404 ucs = 0xfffd;
378378- utf8 = utf8encode(ucs, utf8);
405405+406406+ utf8 = utf8encode_ex(ucs, utf8, &utf8_size);
379407 }
380408 }
381409
+2
firmware/include/rbunicode.h
···5757/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
5858unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8);
5959unsigned char* iso_decode(const unsigned char *latin1, unsigned char *utf8, int cp, int count);
6060+unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size);
6161+6062unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
6163unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
6264bool utf16_has_bom(const unsigned char *utf16, bool *le);
+15-17
lib/rbcodec/metadata/id3tags.c
···568568569569/* Must be called after parse_as_utf8. Checks to see if the passed in string is a 16-bit wide Unicode v2
570570 string. If it is, we convert it to a UTF-8 string. If it's not unicode,
571571- we convert from the default codepage */
572572-static void unicode_munge(char* string, char* utf8buf, int *len) {
571571+ we convert from the default codepage
572572+ NOTE: real UTF-8 buffer size is expected to be utf8buf_size + 1 (additional byte for string terminator) */
573573+static void unicode_munge(unsigned char* string, unsigned char* utf8buf, int *len, int utf8buf_size) {
574574+ unsigned char *str = string;
575575+ unsigned char* utf8 = utf8buf;
576576+573577 int i = 0;
574574- unsigned char *str = (unsigned char *)string;
575578 int templen = 0;
576576- unsigned char* utf8 = (unsigned char *)utf8buf;
577579578580 switch (str[0]) {
579579- case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
580580- str++;
581581- (*len)--;
582582- utf8 = iso_decode(str, utf8, -1, *len);
583583- *utf8 = 0;
584584- *len = (intptr_t)utf8 - (intptr_t)utf8buf;
585585- break;
586586-587581 case 0x01: /* Unicode with or without BOM */
588582 case 0x02:
589583 (*len)--;
···618612 *len = templen - 1;
619613 break;
620614 /* case 0x03: UTF-8 encoded string handled by parse_as_utf8 */
615615+616616+ case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
617617+ str++;
618618+ (*len)--;
619619+ //fallthrough
621620 default: /* Plain old string */
622622- utf8 = iso_decode(str, utf8, -1, *len);
621621+ utf8 = iso_decode_ex(str, utf8, -1, *len, utf8buf_size);
623622 *utf8 = 0;
624624- *len = (intptr_t)utf8 - (intptr_t)utf8buf;
623623+ *len = utf8 - utf8buf;
625624 break;
626625 }
627626}
···11031102 {
11041103 //limit stack allocation to avoid stack overflow
11051104 utf8_size = ID3V2_BUF_SIZE;
11061106- bytesread = ID3V2_BUF_SIZE/3;
11071105 }
11081108- char utf8buf[utf8_size + 1];
11091109- unicode_munge( tag, utf8buf, &bytesread);
11061106+ unsigned char utf8buf[utf8_size + 1];
11071107+ unicode_munge( (unsigned char *)tag, utf8buf, &bytesread, utf8_size);
11101108 if(bytesread >= buffersize - bufferpos)
11111109 bytesread = buffersize - bufferpos - 1;
11121110