Rockbox open source high quality audio player as a Music Player Daemon
mpris rockbox mpd libadwaita audio rust zig deno
2
fork

Configure Feed

Select the types of activity you want to include in your feed.

metadata: asf: Use system utf16decode conversion

Change-Id: I606bf5365c84cbee4badd1ac1cbaace1207834f4

authored by

Roman Artiukhin and committed by
Solomon Peachy
de8d1437 1f548f74

+43 -49
+43 -49
lib/rbcodec/metadata/asf.c
··· 33 33 #include "metadata_common.h" 34 34 #include "metadata_parsers.h" 35 35 #include <codecs/libasf/asf.h> 36 - 36 + #include "rbunicode.h" 37 37 /* TODO: Just read the GUIDs into a 16-byte array, and use memcmp to compare */ 38 38 struct guid_s { 39 39 uint32_t v1; ··· 154 154 return 0; 155 155 } 156 156 157 + static int is_valid_utf16(const unsigned char *data, size_t length) 158 + { 159 + if (length < 2) return 0; // Not enough data for even one UTF-16 character 160 + 161 + // Get the last two bytes as a UTF-16 character (little-endian) 162 + uint16_t last = data[length - 2] | (data[length - 1] << 8); 163 + 164 + // Check if the last character is a high surrogate 165 + if (last >= 0xD800 && last <= 0xDBFF) { 166 + return 0; // Invalid if it's the last character 167 + } 168 + 169 + // Check if the last character is a low surrogate 170 + if (last >= 0xDC00 && last <= 0xDFFF) { 171 + if (length < 4) return 0; // Invalid if there's no preceding character 172 + uint16_t second_last = data[length - 4] | (data[length - 3] << 8); 173 + 174 + // Invalid if not preceded by a high surrogate 175 + return second_last >= 0xD800 && second_last <= 0xDBFF; 176 + } 177 + 178 + // If it's not a surrogate, it's valid 179 + return 1; 180 + } 181 + 157 182 /* Decode a LE utf16 string from a disk buffer into a fixed-sized 158 183 utf8 buffer. 159 184 */ 160 - 161 185 static void asf_utf16LEdecode(int fd, 162 186 uint16_t utf16bytes, 163 187 unsigned char **utf8, 164 188 int* utf8bytes 165 189 ) 166 190 { 167 - unsigned long ucs; 191 + const int reserve_bytes = 6; 168 192 int n; 169 - unsigned char utf16buf[256]; 170 - unsigned char* utf16 = utf16buf; 171 - unsigned char* newutf8; 172 - 173 - n = read(fd, utf16buf, MIN(sizeof(utf16buf), utf16bytes)); 174 - utf16bytes -= n; 175 - 176 - while (n > 0) { 177 - /* Check for a surrogate pair */ 178 - if (utf16[1] >= 0xD8 && utf16[1] < 0xE0) { 179 - if (n < 4) { 180 - /* Run out of utf16 bytes, read some more */ 181 - utf16buf[0] = utf16[0]; 182 - utf16buf[1] = utf16[1]; 183 - 184 - n = read(fd, utf16buf + 2, MIN(sizeof(utf16buf)-2, utf16bytes)); 185 - utf16 = utf16buf; 186 - utf16bytes -= n; 187 - n += 2; 188 - } 193 + unsigned char utf16buf[258]; 194 + unsigned char* newutf8 = *utf8; 195 + const int utf8bytes_initial = *utf8bytes; 189 196 190 - if (n < 4) { 191 - /* Truncated utf16 string, abort */ 192 - break; 193 - } 194 - ucs = 0x10000 + ((utf16[0] << 10) | ((utf16[1] - 0xD8) << 18) 195 - | utf16[2] | ((utf16[3] - 0xDC) << 8)); 196 - utf16 += 4; 197 - n -= 4; 198 - } else { 199 - ucs = (utf16[0] | (utf16[1] << 8)); 200 - utf16 += 2; 201 - n -= 2; 197 + while ((n = read(fd, utf16buf, MIN(sizeof(utf16buf) - 2, utf16bytes))) >= 2) 198 + { 199 + // If the UTF-16 string ends with an incomplete surrogate pair, try to complete it. 200 + if (!is_valid_utf16(utf16buf, n)) 201 + { 202 + n += read(fd, utf16buf + n, 2); 202 203 } 204 + newutf8 = utf16decode(utf16buf, newutf8, n>>1, *utf8bytes - reserve_bytes, true); 205 + *utf8bytes = utf8bytes_initial - (newutf8 - *utf8); 206 + utf16bytes -= n; 203 207 204 - if (*utf8bytes > 6) { 205 - newutf8 = utf8encode(ucs, *utf8); 206 - *utf8bytes -= (newutf8 - *utf8); 207 - *utf8 += (newutf8 - *utf8); 208 - } 209 - 210 - /* We have run out of utf16 bytes, read more if available */ 211 - if ((n == 0) && (utf16bytes > 0)) { 212 - n = read(fd, utf16buf, MIN(sizeof(utf16buf), utf16bytes)); 213 - utf16 = utf16buf; 214 - utf16bytes -= n; 215 - } 208 + if (*utf8bytes <= reserve_bytes) 209 + break; 216 210 } 217 211 218 - *utf8[0] = 0; 212 + *newutf8 = 0; 219 213 --*utf8bytes; 214 + *utf8 = newutf8; 220 215 221 216 if (utf16bytes > 0) { 222 217 /* Skip any remaining bytes */ 223 218 lseek(fd, utf16bytes, SEEK_CUR); 224 219 } 225 - return; 226 220 } 227 221 228 222 static int asf_parse_header(int fd, struct mp3entry* id3,