···77 "strconv"
88 "strings"
99 "sync"
1010+ "unicode/utf8"
1011)
11121213// listBinEntry holds path (')' as segment separator) and size from list.bin; Size is 0 when not present.
···318319 IsLocaleFallback bool
319320}
320321322322+// utf8ToMojibake re-encodes non-ASCII runes as if each of their UTF-8 bytes
323323+// were a Latin-1 codepoint. This matches filenames extracted by tools that
324324+// misinterpret UTF-8 paths as Latin-1 (double-encoding). For example,
325325+// U+FF12 (fullwidth 2, bytes EF BC 92) becomes U+00EF U+00BC U+0092
326326+// (bytes C3 AF C2 BC C2 92).
327327+func utf8ToMojibake(s string) string {
328328+ var b strings.Builder
329329+ changed := false
330330+ for _, r := range s {
331331+ if r >= 0x80 {
332332+ var buf [4]byte
333333+ n := utf8.EncodeRune(buf[:], r)
334334+ for i := 0; i < n; i++ {
335335+ b.WriteRune(rune(buf[i]))
336336+ }
337337+ changed = true
338338+ } else {
339339+ b.WriteRune(r)
340340+ }
341341+ }
342342+ if !changed {
343343+ return s
344344+ }
345345+ return b.String()
346346+}
347347+348348+// normalizeFullwidth replaces fullwidth Unicode characters (U+FF01–U+FF5E)
349349+// with their ASCII equivalents (U+0021–U+007E).
350350+func normalizeFullwidth(s string) string {
351351+ var b strings.Builder
352352+ changed := false
353353+ for _, r := range s {
354354+ if r >= 0xFF01 && r <= 0xFF5E {
355355+ b.WriteByte(byte(r - 0xFF01 + 0x21))
356356+ changed = true
357357+ } else {
358358+ b.WriteRune(r)
359359+ }
360360+ }
361361+ if !changed {
362362+ return s
363363+ }
364364+ return b.String()
365365+}
366366+367367+func hasNonASCII(s string) bool {
368368+ for _, r := range s {
369369+ if r >= 0x80 {
370370+ return true
371371+ }
372372+ }
373373+ return false
374374+}
375375+321376// pathStrToFullPaths converts a list.bin path string (using ')' separators) into filesystem
322377// candidates. The original locale path is returned first; if the path contains ja or ko,
323378// an en locale fallback is appended (marked IsLocaleFallback so callers can skip MD5 validation).
379379+// For paths with non-ASCII characters, mojibake (double-encoded) and fullwidth-to-ASCII
380380+// variants are also tried.
324381func pathStrToFullPaths(revision, assetType, pathStr string) []pathCandidate {
325382 fsPath := strings.ReplaceAll(pathStr, ")", "/")
326383 if strings.Contains(fsPath, "..") || filepath.IsAbs(fsPath) || strings.HasPrefix(fsPath, "/") {
···335392 fallback bool
336393 }
337394 entries := []tagged{{pathStr, false}}
395395+ if hasNonASCII(pathStr) {
396396+ entries = append(entries, tagged{utf8ToMojibake(pathStr), false})
397397+ entries = append(entries, tagged{normalizeFullwidth(pathStr), false})
398398+ }
338399 if strings.Contains(pathStr, ")ja)") {
339400 entries = append(entries, tagged{strings.ReplaceAll(pathStr, ")ja)", ")en)"), true})
340401 }