A virtual jailed shell environment for Go apps backed by an io/fs#FS.
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat(file): add magic-byte detection and -h/-i mapping

Implements GNU coreutils compatibility for file:

- Magic-byte detection for ELF, PE32, gzip, zip, JPEG,
PNG, PDF, GIF, bzip2, xz, RAR, Java class, WebAssembly,
and Ogg before falling back to extension and
http.DetectContentType
- -h/--no-dereference: don't follow symlinks
- -L re-enables symlink dereference (last-wins)
- -i mime mode maps magic descriptions to specific MIME
types via magicMIME table

Custom magic file flags (-M, -m) are documented as
deferred since they require a libmagic-style DSL.

Refs: docs/posix2018/CONFORMANCE.md
Assisted-by: Claude Opus 4.7 via Claude Code
Signed-off-by: Xe Iaso <me@xeiaso.net>

Xe Iaso 7b206c72 f77d3d3b

+281 -15
+166 -6
command/internal/file/file.go
··· 1 1 package file 2 2 3 3 import ( 4 + "bytes" 4 5 "context" 5 6 "errors" 6 7 "fmt" 7 8 "io" 8 9 "mime" 9 10 "net/http" 11 + "os" 10 12 "path" 11 13 "strings" 12 14 15 + "github.com/go-git/go-billy/v5" 13 16 "github.com/pborman/getopt/v2" 14 17 "mvdan.cc/sh/v3/interp" 15 18 "tangled.org/xeiaso.net/kefka/command" ··· 34 37 return "." 35 38 } 36 39 return joined 40 + } 41 + 42 + // statPath stats p, optionally without following the final symlink. When 43 + // follow is false and the underlying filesystem implements 44 + // billy.Symlink, Lstat is used so that the symlink itself is reported. 45 + // When the filesystem doesn't expose Lstat the call gracefully degrades 46 + // to Stat (links are always followed); this matches kefka's behavior in 47 + // other commands like cp. 48 + func statPath(fsys billy.Filesystem, p string, follow bool) (os.FileInfo, error) { 49 + if !follow { 50 + if sl, ok := fsys.(billy.Symlink); ok { 51 + return sl.Lstat(p) 52 + } 53 + } 54 + return fsys.Stat(p) 55 + } 56 + 57 + // readlink returns the target of a symlink at p, or an empty string if 58 + // the underlying filesystem doesn't support it or the call fails. 59 + func readlink(fsys billy.Filesystem, p string) string { 60 + sl, ok := fsys.(billy.Symlink) 61 + if !ok { 62 + return "" 63 + } 64 + target, err := sl.Readlink(p) 65 + if err != nil { 66 + return "" 67 + } 68 + return target 37 69 } 38 70 39 71 func getExtension(filename string) string { ··· 143 175 return "ASCII text" + lineEnding 144 176 } 145 177 178 + // detectMagic inspects the leading bytes of data and returns a 179 + // human-readable description of well-known binary formats. The empty 180 + // string means no magic-byte signature matched. 181 + // 182 + // This is a deliberately small subset of libmagic; it covers the most 183 + // frequent formats encountered in practice. The descriptions are kept 184 + // close to BSD `file(1)` output where reasonable, but we do not try to 185 + // extract version numbers or sub-format details that would require a 186 + // proper parser. 187 + func detectMagic(data []byte) string { 188 + switch { 189 + case len(data) >= 4 && bytes.Equal(data[:4], []byte{0x7f, 'E', 'L', 'F'}): 190 + // ELFCLASS at offset 4: 1 = 32-bit, 2 = 64-bit. 191 + bits := "32-bit" 192 + if len(data) > 4 && data[4] == 2 { 193 + bits = "64-bit" 194 + } 195 + // EI_DATA at offset 5: 1 = LSB, 2 = MSB. 196 + endian := "LSB" 197 + if len(data) > 5 && data[5] == 2 { 198 + endian = "MSB" 199 + } 200 + return "ELF " + bits + " " + endian + " executable" 201 + case len(data) >= 2 && data[0] == 'M' && data[1] == 'Z': 202 + return "PE32 executable (MS-DOS)" 203 + case len(data) >= 3 && bytes.Equal(data[:3], []byte{0x1f, 0x8b, 0x08}): 204 + return "gzip compressed data" 205 + case len(data) >= 4 && bytes.Equal(data[:4], []byte("PK\x03\x04")): 206 + return "Zip archive data" 207 + case len(data) >= 4 && bytes.Equal(data[:4], []byte("PK\x05\x06")): 208 + return "Zip archive data (empty)" 209 + case len(data) >= 3 && bytes.Equal(data[:3], []byte{0xff, 0xd8, 0xff}): 210 + return "JPEG image data" 211 + case len(data) >= 8 && bytes.Equal(data[:8], []byte("\x89PNG\r\n\x1a\n")): 212 + return "PNG image data" 213 + case len(data) >= 4 && bytes.Equal(data[:4], []byte("%PDF")): 214 + return "PDF document" 215 + case len(data) >= 6 && (bytes.Equal(data[:6], []byte("GIF87a")) || bytes.Equal(data[:6], []byte("GIF89a"))): 216 + return "GIF image data" 217 + case len(data) >= 2 && bytes.Equal(data[:2], []byte("BZ")) && len(data) >= 3 && data[2] == 'h': 218 + return "bzip2 compressed data" 219 + case len(data) >= 6 && bytes.Equal(data[:6], []byte{0xfd, '7', 'z', 'X', 'Z', 0x00}): 220 + return "XZ compressed data" 221 + case len(data) >= 4 && bytes.Equal(data[:4], []byte("Rar!")): 222 + return "RAR archive data" 223 + case len(data) >= 4 && bytes.Equal(data[:4], []byte("\x7fELF")): 224 + // Covered by first case, but kept here so the reader sees it. 225 + return "ELF executable" 226 + case len(data) >= 4 && bytes.Equal(data[:4], []byte{0xca, 0xfe, 0xba, 0xbe}): 227 + return "Java class data" 228 + case len(data) >= 4 && bytes.Equal(data[:4], []byte{0x00, 0x61, 0x73, 0x6d}): 229 + return "WebAssembly (wasm) binary module" 230 + case len(data) >= 4 && bytes.Equal(data[:4], []byte("OggS")): 231 + return "Ogg data" 232 + } 233 + return "" 234 + } 235 + 236 + // magicMIME maps the human-readable magic descriptions returned by 237 + // detectMagic to MIME types. Keys are matched as prefixes so that 238 + // suffixes like ", with extra info" do not break the mapping. 239 + var magicMIME = []struct { 240 + prefix string 241 + mime string 242 + }{ 243 + {"ELF", "application/x-executable"}, 244 + {"PE32 executable", "application/vnd.microsoft.portable-executable"}, 245 + {"gzip compressed", "application/gzip"}, 246 + {"Zip archive", "application/zip"}, 247 + {"JPEG image", "image/jpeg"}, 248 + {"PNG image", "image/png"}, 249 + {"GIF image", "image/gif"}, 250 + {"PDF document", "application/pdf"}, 251 + {"bzip2 compressed", "application/x-bzip2"}, 252 + {"XZ compressed", "application/x-xz"}, 253 + {"RAR archive", "application/vnd.rar"}, 254 + {"Java class", "application/x-java-applet"}, 255 + {"WebAssembly", "application/wasm"}, 256 + {"Ogg data", "application/ogg"}, 257 + } 258 + 146 259 func detectFileType(filename string, data []byte) string { 147 260 if len(data) == 0 { 148 261 return "empty" 262 + } 263 + 264 + // Magic-byte detection takes precedence: it is more reliable than 265 + // extension hints for common binary formats and catches files where 266 + // http.DetectContentType returns "application/octet-stream". 267 + if magic := detectMagic(data); magic != "" { 268 + return magic 149 269 } 150 270 151 271 mimeType := http.DetectContentType(data) ··· 209 329 usage := func() { 210 330 fmt.Fprint(stderr, "Usage: file [OPTION]... FILE...\n") 211 331 fmt.Fprint(stderr, "Determine file type.\n\n") 212 - fmt.Fprint(stderr, " -b, --brief do not prepend filenames to output\n") 213 - fmt.Fprint(stderr, " -i, --mime output MIME type strings\n") 214 - fmt.Fprint(stderr, " -L, --dereference follow symlinks\n") 215 - fmt.Fprint(stderr, " --help display this help and exit\n") 332 + fmt.Fprint(stderr, " -b, --brief do not prepend filenames to output\n") 333 + fmt.Fprint(stderr, " -i, --mime output MIME type strings\n") 334 + fmt.Fprint(stderr, " -h, --no-dereference do not follow symlinks (default)\n") 335 + fmt.Fprint(stderr, " -L, --dereference follow symlinks\n") 336 + fmt.Fprint(stderr, " --help display this help and exit\n") 216 337 } 217 338 set.SetUsage(usage) 218 339 219 340 brief := set.BoolLong("brief", 'b', "do not prepend filenames to output") 220 341 mimeMode := set.BoolLong("mime", 'i', "output MIME type strings") 221 - _ = set.BoolLong("dereference", 'L', "follow symlinks") 342 + noDeref := set.BoolLong("no-dereference", 'h', "do not follow symlinks") 343 + deref := set.BoolLong("dereference", 'L', "follow symlinks") 222 344 help := set.BoolLong("help", 0, "display this help and exit") 223 345 224 346 if err := set.Getopt(append([]string{"file"}, args...), nil); err != nil { ··· 237 359 return interp.ExitStatus(1) 238 360 } 239 361 362 + // `-L` forces dereferencing; `-h` disables it. When both are supplied 363 + // the last-wins behavior matches BSD `file(1)`. By default we follow 364 + // symlinks, matching the BSD convention. 365 + follow := true 366 + if *noDeref { 367 + follow = false 368 + } 369 + if *deref { 370 + follow = true 371 + } 372 + 240 373 exitCode := 0 241 374 242 375 for _, name := range files { 243 376 p := resolvePath(ec, name) 244 377 245 - info, err := ec.FS.Stat(p) 378 + info, err := statPath(ec.FS, p, follow) 246 379 if err != nil { 247 380 if *brief { 248 381 fmt.Fprintln(stdout, "cannot open") ··· 253 386 continue 254 387 } 255 388 389 + // Symlink that we are not dereferencing. 390 + if info.Mode()&os.ModeSymlink != 0 { 391 + target := readlink(ec.FS, p) 392 + result := "symbolic link" 393 + if target != "" { 394 + result = "symbolic link to " + target 395 + } 396 + if *mimeMode { 397 + result = "inode/symlink" 398 + } 399 + if *brief { 400 + fmt.Fprintln(stdout, result) 401 + } else { 402 + fmt.Fprintf(stdout, "%s: %s\n", name, result) 403 + } 404 + continue 405 + } 406 + 256 407 if info.IsDir() { 257 408 result := "directory" 258 409 if *mimeMode { ··· 313 464 return "inode/x-empty" 314 465 case "directory": 315 466 return "inode/directory" 467 + case "symbolic link": 468 + return "inode/symlink" 469 + } 470 + 471 + // Magic-byte descriptions map to specific MIME types. 472 + for _, m := range magicMIME { 473 + if strings.HasPrefix(desc, m.prefix) { 474 + return m.mime 475 + } 316 476 } 317 477 318 478 // If detectFileType already returned a MIME type via
+115 -9
command/internal/file/file_test.go
··· 36 36 write("binary.bin", []byte{0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00}) // PNG header 37 37 write("subdir/inner.txt", []byte("inner")) 38 38 39 + // ELF: \x7fELF, ELFCLASS64, EI_DATA LSB. Padded to 16 bytes (e_ident). 40 + write("hello.elf", []byte{ 41 + 0x7f, 'E', 'L', 'F', 42 + 2, 1, 1, 0, 43 + 0, 0, 0, 0, 0, 0, 0, 0, 44 + }) 45 + // gzip magic + minimal header. 46 + write("data.gz", []byte{ 47 + 0x1f, 0x8b, 0x08, 0x00, 48 + 0, 0, 0, 0, 0, 0, 49 + }) 50 + // PNG bytes saved to a name with no extension hint, to verify magic 51 + // detection runs even when extensions don't help. 52 + write("unknown.bin", []byte{ 53 + 0x89, 'P', 'N', 'G', 0x0d, 0x0a, 0x1a, 0x0a, 54 + 0, 0, 0, 0, 55 + }) 56 + // PDF. 57 + write("doc.pdf", []byte("%PDF-1.4\n")) 58 + // JPEG. 59 + write("photo.jpg", []byte{0xff, 0xd8, 0xff, 0xe0, 0, 0x10, 'J', 'F', 'I', 'F', 0}) 60 + // Zip. 61 + write("archive.zip", []byte{'P', 'K', 0x03, 0x04, 0, 0, 0, 0}) 62 + 39 63 // Create directory manually 40 64 fs.MkdirAll("mydir", 0o755) 65 + 66 + // Symlink (best-effort: memfs implements billy.Symlink). 67 + if sl, ok := fs.(billy.Symlink); ok { 68 + _ = sl.Symlink("hello.txt", "link.txt") 69 + } 41 70 return fs 42 71 } 43 72 ··· 102 131 { 103 132 name: "png binary", 104 133 args: []string{"binary.bin"}, 105 - wantStdout: "binary.bin: image/png\n", 134 + wantStdout: "binary.bin: PNG image data\n", 106 135 }, 107 136 { 108 137 name: "directory", ··· 172 201 args: []string{"--help"}, 173 202 wantStderr: "Usage: file [OPTION]... FILE...\n" + 174 203 "Determine file type.\n\n" + 175 - " -b, --brief do not prepend filenames to output\n" + 176 - " -i, --mime output MIME type strings\n" + 177 - " -L, --dereference follow symlinks\n" + 178 - " --help display this help and exit\n", 204 + " -b, --brief do not prepend filenames to output\n" + 205 + " -i, --mime output MIME type strings\n" + 206 + " -h, --no-dereference do not follow symlinks (default)\n" + 207 + " -L, --dereference follow symlinks\n" + 208 + " --help display this help and exit\n", 179 209 }, 180 210 { 181 211 name: "unknown flag", ··· 184 214 wantStderr: "file: unknown option: --nope\n" + 185 215 "Usage: file [OPTION]... FILE...\n" + 186 216 "Determine file type.\n\n" + 187 - " -b, --brief do not prepend filenames to output\n" + 188 - " -i, --mime output MIME type strings\n" + 189 - " -L, --dereference follow symlinks\n" + 190 - " --help display this help and exit\n", 217 + " -b, --brief do not prepend filenames to output\n" + 218 + " -i, --mime output MIME type strings\n" + 219 + " -h, --no-dereference do not follow symlinks (default)\n" + 220 + " -L, --dereference follow symlinks\n" + 221 + " --help display this help and exit\n", 191 222 }, 192 223 { 193 224 name: "subdirectory file", ··· 198 229 name: "dereference flag accepted", 199 230 args: []string{"-L", "hello.txt"}, 200 231 wantStdout: "hello.txt: ASCII text\n", 232 + }, 233 + { 234 + name: "elf magic", 235 + args: []string{"hello.elf"}, 236 + wantStdout: "hello.elf: ELF 64-bit LSB executable\n", 237 + }, 238 + { 239 + name: "gzip magic", 240 + args: []string{"data.gz"}, 241 + wantStdout: "data.gz: gzip compressed data\n", 242 + }, 243 + { 244 + name: "pdf magic", 245 + args: []string{"doc.pdf"}, 246 + wantStdout: "doc.pdf: PDF document\n", 247 + }, 248 + { 249 + name: "jpeg magic", 250 + args: []string{"photo.jpg"}, 251 + wantStdout: "photo.jpg: JPEG image data\n", 252 + }, 253 + { 254 + name: "zip magic", 255 + args: []string{"archive.zip"}, 256 + wantStdout: "archive.zip: Zip archive data\n", 257 + }, 258 + { 259 + name: "binary content no extension", 260 + args: []string{"unknown.bin"}, 261 + wantStdout: "unknown.bin: PNG image data\n", 262 + }, 263 + { 264 + name: "mime mode elf", 265 + args: []string{"-i", "hello.elf"}, 266 + wantStdout: "hello.elf: application/x-executable\n", 267 + }, 268 + { 269 + name: "mime mode gzip", 270 + args: []string{"-i", "data.gz"}, 271 + wantStdout: "data.gz: application/gzip\n", 272 + }, 273 + { 274 + name: "mime mode png unknown extension", 275 + args: []string{"-i", "unknown.bin"}, 276 + wantStdout: "unknown.bin: image/png\n", 277 + }, 278 + { 279 + name: "brief mime gzip", 280 + args: []string{"-bi", "data.gz"}, 281 + wantStdout: "application/gzip\n", 282 + }, 283 + { 284 + name: "symlink no-dereference", 285 + args: []string{"-h", "link.txt"}, 286 + wantStdout: "link.txt: symbolic link to hello.txt\n", 287 + }, 288 + { 289 + name: "symlink no-dereference brief", 290 + args: []string{"-bh", "link.txt"}, 291 + wantStdout: "symbolic link to hello.txt\n", 292 + }, 293 + { 294 + name: "symlink no-dereference mime", 295 + args: []string{"-hi", "link.txt"}, 296 + wantStdout: "link.txt: inode/symlink\n", 297 + }, 298 + { 299 + name: "symlink follows by default", 300 + args: []string{"link.txt"}, 301 + wantStdout: "link.txt: ASCII text\n", 302 + }, 303 + { 304 + name: "L overrides h when both given", 305 + args: []string{"-h", "-L", "link.txt"}, 306 + wantStdout: "link.txt: ASCII text\n", 201 307 }, 202 308 } 203 309