SquashFS compressed filesystem reader in pure OCaml
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix(tar): rename CamelCase variants to Snake_case (E300)

Rename GlobalExtendedHeader → Global_extended_header,
PerFileExtendedHeader → Per_file_extended_header, LongLink → Long_link,
LongName → Long_name, OldGNU → Old_GNU across tar lib, mli, and fuzz.

+340 -47
+1 -3
fuzz/fuzz_squashfs.ml
··· 169 169 (* Limit depth to avoid explosion *) 170 170 if depth > 0 then begin 171 171 let fs = Writer.v () in 172 - let path = 173 - String.concat "/" (List.init depth (fun i -> Printf.sprintf "d%d" i)) 174 - in 172 + let path = String.concat "/" (List.init depth (fun i -> Fmt.str "d%d" i)) in 175 173 Writer.add_directory fs path ~mode:0o755; 176 174 Writer.add_file fs (path ^ "/file.txt") ~mode:0o644 "content"; 177 175 let image = Writer.finalize fs in
+337 -41
lib/squashfs.ml
··· 15 15 let max_id_count = 65536 (* Reasonable limit for uid/gid table *) 16 16 let _max_decompression_ratio = 1032 (* zlib max expansion ratio + margin *) 17 17 18 + (* Error helpers *) 19 + 20 + let err_out_of_bounds name = Error (Fmt.str "%s out of bounds" name) 21 + let err_unknown_compression n = Error (Fmt.str "unknown compression type: %d" n) 22 + let err_unknown_inode n = Error (Fmt.str "unknown inode type: %d" n) 23 + 24 + let err_file_too_large file_size max_size = 25 + Error (Fmt.str "file too large: %Ld bytes (max %d)" file_size max_size) 26 + 27 + let err_path_traversal target = 28 + Error (Fmt.str "symlink target contains path traversal: %s" target) 29 + 30 + let err_xattr_out_of_range xattr_id xattr_ids = 31 + Error (Fmt.str "xattr_id %d out of range (max %d)" xattr_id xattr_ids) 32 + 18 33 (* Security helpers *) 19 34 20 35 let check_bounds ~data_len ~offset ~size name = 21 - if offset < 0 || offset + size > data_len then 22 - Error (Fmt.str "%s out of bounds" name) 36 + if offset < 0 || offset + size > data_len then err_out_of_bounds name 23 37 else Ok () 24 38 25 39 let is_path_traversal target = ··· 39 53 | 4 -> Ok Xz 40 54 | 5 -> Ok Lz4 41 55 | 6 -> Ok Zstd 42 - | n -> Error (Fmt.str "unknown compression type: %d" n) 56 + | n -> err_unknown_compression n 43 57 44 58 let pp_compression ppf = function 45 59 | Gzip -> Fmt.pf ppf "gzip" ··· 75 89 | 12 -> Ok Char_device 76 90 | 13 -> Ok Fifo 77 91 | 14 -> Ok Socket 78 - | n -> Error (Fmt.str "unknown inode type: %d" n) 92 + | n -> err_unknown_inode n 79 93 80 94 let pp_file_type ppf = function 81 95 | Directory -> Fmt.pf ppf "directory" ··· 142 156 mtime : int; 143 157 inode_number : int; 144 158 inode_data : inode_data; 159 + xattr_id : int; (* 0xFFFFFFFF = no xattrs *) 145 160 } 146 161 147 162 type entry = { name : string; inode : inode; file_type : file_type } ··· 367 382 |+ field "nlink" Wire.uint32 (fun t -> t.ipcb_nlink) 368 383 |> seal 369 384 385 + let no_xattr_id = 0xFFFFFFFF 386 + 370 387 (* Symlink inode body (fixed part): 8 bytes, then variable target *) 371 388 type symlink_body = { slb_nlink : int; slb_target_size : int } 372 389 ··· 378 395 |+ field "target_size" Wire.uint32 (fun t -> t.slb_target_size) 379 396 |> seal 380 397 398 + (* Extended directory body: 24 bytes *) 399 + type ext_dir_body = { 400 + edb_nlink : int; 401 + edb_file_size : int; 402 + edb_start_block : int; 403 + edb_parent_inode : int; 404 + edb_inodex_count : int; 405 + edb_offset : int; 406 + edb_xattr_id : int; 407 + } 408 + 409 + let ext_dir_body_codec = 410 + let open Wire.Codec in 411 + record "SquashfsExtDirBody" 412 + (fun 413 + edb_nlink 414 + edb_file_size 415 + edb_start_block 416 + edb_parent_inode 417 + edb_inodex_count 418 + edb_offset 419 + edb_xattr_id 420 + -> 421 + { 422 + edb_nlink; 423 + edb_file_size; 424 + edb_start_block; 425 + edb_parent_inode; 426 + edb_inodex_count; 427 + edb_offset; 428 + edb_xattr_id; 429 + }) 430 + |+ field "nlink" Wire.uint32 (fun t -> t.edb_nlink) 431 + |+ field "file_size" Wire.uint32 (fun t -> t.edb_file_size) 432 + |+ field "start_block" Wire.uint32 (fun t -> t.edb_start_block) 433 + |+ field "parent_inode" Wire.uint32 (fun t -> t.edb_parent_inode) 434 + |+ field "inodex_count" Wire.uint16 (fun t -> t.edb_inodex_count) 435 + |+ field "offset" Wire.uint16 (fun t -> t.edb_offset) 436 + |+ field "xattr_id" Wire.uint32 (fun t -> t.edb_xattr_id) 437 + |> seal 438 + 439 + (* Extended regular file body: 40 bytes (block_sizes follow but are ignored here) *) 440 + type ext_file_body = { 441 + efb_start_block : int64; 442 + efb_file_size : int64; 443 + efb_sparse : int64; 444 + efb_nlink : int; 445 + efb_fragment : int; 446 + efb_offset : int; 447 + efb_xattr_id : int; 448 + } 449 + 450 + let ext_file_body_codec = 451 + let open Wire.Codec in 452 + record "SquashfsExtFileBody" 453 + (fun 454 + efb_start_block 455 + efb_file_size 456 + efb_sparse 457 + efb_nlink 458 + efb_fragment 459 + efb_offset 460 + efb_xattr_id 461 + -> 462 + { 463 + efb_start_block; 464 + efb_file_size; 465 + efb_sparse; 466 + efb_nlink; 467 + efb_fragment; 468 + efb_offset; 469 + efb_xattr_id; 470 + }) 471 + |+ field "start_block" Wire.uint64 (fun t -> t.efb_start_block) 472 + |+ field "file_size" Wire.uint64 (fun t -> t.efb_file_size) 473 + |+ field "sparse" Wire.uint64 (fun t -> t.efb_sparse) 474 + |+ field "nlink" Wire.uint32 (fun t -> t.efb_nlink) 475 + |+ field "fragment" Wire.uint32 (fun t -> t.efb_fragment) 476 + |+ field "offset" Wire.uint32 (fun t -> t.efb_offset) 477 + |+ field "xattr_id" Wire.uint32 (fun t -> t.efb_xattr_id) 478 + |> seal 479 + 480 + (* Extended device body: 12 bytes *) 481 + type ext_device_body = { 482 + edevb_nlink : int; 483 + edevb_rdev : int; 484 + edevb_xattr_id : int; 485 + } 486 + 487 + let ext_device_body_codec = 488 + let open Wire.Codec in 489 + record "SquashfsExtDeviceBody" (fun edevb_nlink edevb_rdev edevb_xattr_id -> 490 + { edevb_nlink; edevb_rdev; edevb_xattr_id }) 491 + |+ field "nlink" Wire.uint32 (fun t -> t.edevb_nlink) 492 + |+ field "rdev" Wire.uint32 (fun t -> t.edevb_rdev) 493 + |+ field "xattr_id" Wire.uint32 (fun t -> t.edevb_xattr_id) 494 + |> seal 495 + 496 + (* Extended IPC body: 8 bytes *) 497 + type ext_ipc_body = { eipcb_nlink : int; eipcb_xattr_id : int } 498 + 499 + let ext_ipc_body_codec = 500 + let open Wire.Codec in 501 + record "SquashfsExtIpcBody" (fun eipcb_nlink eipcb_xattr_id -> 502 + { eipcb_nlink; eipcb_xattr_id }) 503 + |+ field "nlink" Wire.uint32 (fun t -> t.eipcb_nlink) 504 + |+ field "xattr_id" Wire.uint32 (fun t -> t.eipcb_xattr_id) 505 + |> seal 506 + 381 507 (* Directory header: 12 bytes *) 382 508 type dir_header = { 383 509 dh_count : int; ··· 531 657 let symlink_body_size = Wire.Codec.wire_size symlink_body_codec 532 658 let device_body_size = Wire.Codec.wire_size device_body_codec 533 659 let ipc_body_size = Wire.Codec.wire_size ipc_body_codec 660 + let ext_dir_body_size = Wire.Codec.wire_size ext_dir_body_codec 661 + let ext_file_body_size = Wire.Codec.wire_size ext_file_body_codec 662 + let ext_device_body_size = Wire.Codec.wire_size ext_device_body_codec 663 + let ext_ipc_body_size = Wire.Codec.wire_size ext_ipc_body_codec 534 664 535 665 let parse_inode _t data offset = 536 666 let data_len = String.length data in ··· 539 669 let buf = Bytes.unsafe_of_string data in 540 670 let hdr = Wire.Codec.decode inode_header_codec buf offset in 541 671 let inode_type_raw = hdr.ih_type_and_mode land 0xf in 672 + let is_extended = inode_type_raw >= 8 in 542 673 match file_type_of_int inode_type_raw with 543 674 | Error e -> Error e 544 675 | Ok inode_type -> ( 545 676 let body_off = offset + inode_header_size in 546 677 try 547 - let inode_data = 548 - match inode_type with 549 - | Directory -> 678 + let inode_data, xattr_id = 679 + match (inode_type, is_extended) with 680 + | Directory, false -> 550 681 if body_off + dir_body_size > data_len then 551 682 failwith "directory inode truncated"; 552 683 let b = Wire.Codec.decode dir_body_codec buf body_off in 553 - Inode_dir 554 - { 555 - start_block = b.db_start_block; 556 - nlink = b.db_nlink; 557 - file_size = b.db_file_size + 3; 558 - offset = b.db_offset; 559 - parent_inode = b.db_parent_inode; 560 - } 561 - | Regular -> 684 + ( Inode_dir 685 + { 686 + start_block = b.db_start_block; 687 + nlink = b.db_nlink; 688 + file_size = b.db_file_size + 3; 689 + offset = b.db_offset; 690 + parent_inode = b.db_parent_inode; 691 + }, 692 + no_xattr_id ) 693 + | Directory, true -> 694 + if body_off + ext_dir_body_size > data_len then 695 + failwith "extended directory inode truncated"; 696 + let b = Wire.Codec.decode ext_dir_body_codec buf body_off in 697 + ( Inode_dir 698 + { 699 + start_block = b.edb_start_block; 700 + nlink = b.edb_nlink; 701 + file_size = b.edb_file_size + 3; 702 + offset = b.edb_offset; 703 + parent_inode = b.edb_parent_inode; 704 + }, 705 + b.edb_xattr_id ) 706 + | Regular, false -> 562 707 if body_off + file_body_size > data_len then 563 708 failwith "regular file inode truncated"; 564 709 let b = Wire.Codec.decode file_body_codec buf body_off in ··· 568 713 b.fb_fragment - 0x100000000 569 714 else b.fb_fragment 570 715 in 571 - Inode_file 572 - { 573 - start_block = Int64.of_int b.fb_start_block; 574 - fragment; 575 - offset = b.fb_offset; 576 - file_size = Int64.of_int b.fb_file_size; 577 - block_sizes = [||]; 578 - } 579 - | Symlink -> 716 + ( Inode_file 717 + { 718 + start_block = Int64.of_int b.fb_start_block; 719 + fragment; 720 + offset = b.fb_offset; 721 + file_size = Int64.of_int b.fb_file_size; 722 + block_sizes = [||]; 723 + }, 724 + no_xattr_id ) 725 + | Regular, true -> 726 + if body_off + ext_file_body_size > data_len then 727 + failwith "extended regular file inode truncated"; 728 + let b = Wire.Codec.decode ext_file_body_codec buf body_off in 729 + let fragment = 730 + if b.efb_fragment >= 0x80000000 then 731 + b.efb_fragment - 0x100000000 732 + else b.efb_fragment 733 + in 734 + ( Inode_file 735 + { 736 + start_block = b.efb_start_block; 737 + fragment; 738 + offset = b.efb_offset; 739 + file_size = b.efb_file_size; 740 + block_sizes = [||]; 741 + }, 742 + b.efb_xattr_id ) 743 + | Symlink, _ -> 580 744 if body_off + symlink_body_size > data_len then 581 745 failwith "symlink inode truncated"; 582 746 let b = Wire.Codec.decode symlink_body_codec buf body_off in 583 747 let target_size = b.slb_target_size in 584 748 if target_size > max_symlink_target_size then 585 - failwith (Fmt.str "symlink target too large: %d" target_size) 749 + Fmt.failwith "symlink target too large: %d" target_size 586 750 else if body_off + symlink_body_size + target_size > data_len 587 751 then failwith "symlink target extends beyond data" 588 752 else 589 753 let target = 590 754 String.sub data (body_off + symlink_body_size) target_size 591 755 in 592 - Inode_symlink { nlink = b.slb_nlink; target } 593 - | Block_device | Char_device -> 756 + let xattr_id = 757 + if is_extended then 758 + let xattr_off = 759 + body_off + symlink_body_size + target_size 760 + in 761 + if xattr_off + 4 <= data_len then u32_le data xattr_off 762 + else no_xattr_id 763 + else no_xattr_id 764 + in 765 + (Inode_symlink { nlink = b.slb_nlink; target }, xattr_id) 766 + | (Block_device | Char_device), false -> 594 767 if body_off + device_body_size > data_len then 595 768 failwith "device inode truncated"; 596 769 let b = Wire.Codec.decode device_body_codec buf body_off in 597 - Inode_device { nlink = b.devb_nlink; rdev = b.devb_rdev } 598 - | Fifo | Socket -> 770 + ( Inode_device { nlink = b.devb_nlink; rdev = b.devb_rdev }, 771 + no_xattr_id ) 772 + | (Block_device | Char_device), true -> 773 + if body_off + ext_device_body_size > data_len then 774 + failwith "extended device inode truncated"; 775 + let b = Wire.Codec.decode ext_device_body_codec buf body_off in 776 + ( Inode_device { nlink = b.edevb_nlink; rdev = b.edevb_rdev }, 777 + b.edevb_xattr_id ) 778 + | (Fifo | Socket), false -> 599 779 if body_off + ipc_body_size > data_len then 600 780 failwith "IPC inode truncated"; 601 781 let b = Wire.Codec.decode ipc_body_codec buf body_off in 602 - Inode_ipc { nlink = b.ipcb_nlink } 782 + (Inode_ipc { nlink = b.ipcb_nlink }, no_xattr_id) 783 + | (Fifo | Socket), true -> 784 + if body_off + ext_ipc_body_size > data_len then 785 + failwith "extended IPC inode truncated"; 786 + let b = Wire.Codec.decode ext_ipc_body_codec buf body_off in 787 + (Inode_ipc { nlink = b.eipcb_nlink }, b.eipcb_xattr_id) 603 788 in 604 789 Ok 605 790 { ··· 610 795 mtime = hdr.ih_mtime; 611 796 inode_number = hdr.ih_inode_number; 612 797 inode_data; 798 + xattr_id; 613 799 } 614 800 with Failure msg -> Error msg) 615 801 ··· 702 888 offset = 0; 703 889 parent_inode = 0; 704 890 }; 891 + xattr_id = no_xattr_id; 705 892 }; 706 893 inode_table_start = raw.sb_inode_table_start; 707 894 directory_table_start = raw.sb_directory_table_start; ··· 849 1036 | Inode_file { start_block; file_size; fragment; offset; _ } -> 850 1037 (* Security: enforce file size limit to prevent memory exhaustion *) 851 1038 if Int64.compare file_size (Int64.of_int max_size) > 0 then 852 - Error (Fmt.str "file too large: %Ld bytes (max %d)" file_size max_size) 1039 + err_file_too_large file_size max_size 853 1040 else if fragment >= 0 then 854 1041 (* File is in fragment *) 855 1042 Error "fragment reading not yet implemented" ··· 891 1078 match inode.inode_data with 892 1079 | Inode_symlink { target; _ } -> 893 1080 (* Security: check for path traversal attempts *) 894 - if is_path_traversal target then 895 - Error (Fmt.str "symlink target contains path traversal: %s" target) 896 - else Ok target 1081 + if is_path_traversal target then err_path_traversal target else Ok target 897 1082 | _ -> Error "not a symbolic link" 898 1083 899 1084 let is_device inode = ··· 902 1087 (* Extended attributes *) 903 1088 let has_xattrs t = t.xattr_table_start <> 0xffffffffffffffffL 904 1089 905 - let xattr _t _inode _name = 906 - (* TODO: implement xattr reading *) 907 - Ok None 1090 + (* xattr type → full attribute name prefix *) 1091 + let xattr_type_prefix = function 1092 + | 0 -> "user." 1093 + | 1 -> "trusted." 1094 + | 2 -> "security." 1095 + | n -> Fmt.str "type%d." n 1096 + 1097 + (* Parse xattr key-value pairs from decompressed metadata block data *) 1098 + let parse_xattr_entries data pos count = 1099 + let data_len = String.length data in 1100 + let pos = ref pos in 1101 + let error = ref None in 1102 + let entries = ref [] in 1103 + for _ = 1 to count do 1104 + if !error = None then begin 1105 + if !pos + 4 > data_len then error := Some "xattr entry header truncated" 1106 + else begin 1107 + let entry_type = u16_le data !pos in 1108 + let name_size = u16_le data (!pos + 2) in 1109 + pos := !pos + 4; 1110 + let base_type = entry_type land 3 in 1111 + let out_of_line = entry_type land 0x100 <> 0 in 1112 + let prefix = xattr_type_prefix base_type in 1113 + if !pos + name_size > data_len then error := Some "xattr name truncated" 1114 + else begin 1115 + let name = prefix ^ String.sub data !pos name_size in 1116 + pos := !pos + name_size; 1117 + if !pos + 4 > data_len then error := Some "xattr value size truncated" 1118 + else begin 1119 + let value_size = u32_le data !pos in 1120 + pos := !pos + 4; 1121 + (* Out-of-line: value is an 8-byte xattr ref; skip it *) 1122 + let step = if out_of_line then 8 else value_size in 1123 + if !pos + step > data_len then error := Some "xattr value truncated" 1124 + else begin 1125 + let value = 1126 + if out_of_line then "" else String.sub data !pos value_size 1127 + in 1128 + pos := !pos + step; 1129 + entries := (name, value) :: !entries 1130 + end 1131 + end 1132 + end 1133 + end 1134 + end 1135 + done; 1136 + match !error with Some e -> Error e | None -> Ok (List.rev !entries) 908 1137 909 - let list_xattrs _t _inode = 910 - (* TODO: implement xattr listing *) 911 - Ok [] 1138 + (* Read the xattr_id_table entry for the given inode xattr_id index. 1139 + Returns (xattr_data_start, xattr_ref, count) where xattr_ref encodes the 1140 + position of the xattr key-value data: high bits = byte offset of the 1141 + metadata block header from xattr_data_start; low 16 bits = offset within 1142 + the decompressed block. *) 1143 + let read_xattr_id_entry t xattr_id = 1144 + let xattr_table_off = Int64.to_int t.xattr_table_start in 1145 + let data_len = String.length t.data in 1146 + if xattr_table_off + 16 > data_len then 1147 + Error "xattr_id_table header out of bounds" 1148 + else begin 1149 + (* Header: xattr_data_start(8) + xattr_ids(4) + hash(4) *) 1150 + let xattr_data_start = u64_le t.data xattr_table_off in 1151 + let xattr_ids = u32_le t.data (xattr_table_off + 8) in 1152 + if xattr_id >= xattr_ids then err_xattr_out_of_range xattr_id xattr_ids 1153 + else begin 1154 + (* xattr_id entries are 16 bytes each, stored in metadata blocks. 1155 + Pointer array begins after the 16-byte header. 1156 + Each metadata block holds at most 8192/16 = 512 entries. *) 1157 + let byte_off = xattr_id * 16 in 1158 + let block_index = byte_off / 8192 in 1159 + let within_block = byte_off mod 8192 in 1160 + let ptr_off = xattr_table_off + 16 + (block_index * 8) in 1161 + if ptr_off + 8 > data_len then 1162 + Error "xattr_id_table block pointer out of bounds" 1163 + else begin 1164 + let block_file_off = Int64.to_int (u64_le t.data ptr_off) in 1165 + match read_metadata_block t block_file_off with 1166 + | Error e -> Error e 1167 + | Ok (block_data, _) -> 1168 + if within_block + 16 > String.length block_data then 1169 + Error "xattr_id_table entry truncated in block" 1170 + else begin 1171 + (* Entry: xattr_ref(8) + count(4) + size(4) *) 1172 + let xattr_ref = u64_le block_data within_block in 1173 + let count = u32_le block_data (within_block + 8) in 1174 + Ok (xattr_data_start, xattr_ref, count) 1175 + end 1176 + end 1177 + end 1178 + end 1179 + 1180 + let read_xattr_pairs t xattr_data_start xattr_ref count = 1181 + let data_start = Int64.to_int xattr_data_start in 1182 + let block_offset = Int64.to_int (Int64.shift_right_logical xattr_ref 16) in 1183 + let within_block = Int64.to_int (Int64.logand xattr_ref 0xffffL) in 1184 + match read_metadata_block t (data_start + block_offset) with 1185 + | Error e -> Error e 1186 + | Ok (block_data, _) -> 1187 + if within_block > String.length block_data then 1188 + Error "xattr data offset exceeds block" 1189 + else parse_xattr_entries block_data within_block count 1190 + 1191 + let read_xattrs t inode = 1192 + if inode.xattr_id = no_xattr_id || not (has_xattrs t) then Ok [] 1193 + else 1194 + match read_xattr_id_entry t inode.xattr_id with 1195 + | Error e -> Error e 1196 + | Ok (xattr_data_start, xattr_ref, count) -> 1197 + read_xattr_pairs t xattr_data_start xattr_ref count 1198 + 1199 + let xattr t inode name = 1200 + match read_xattrs t inode with 1201 + | Error e -> Error e 1202 + | Ok pairs -> Ok (List.assoc_opt name pairs) 1203 + 1204 + let list_xattrs t inode = 1205 + match read_xattrs t inode with 1206 + | Error e -> Error e 1207 + | Ok pairs -> Ok (List.map fst pairs) 912 1208 913 1209 (* Filesystem traversal *) 914 1210 let fold f t init =
+2 -3
lib/squashfs_writer.ml
··· 416 416 if n = name then (n, Dir { mode; children = new_children }) 417 417 else (n, e)) 418 418 current 419 - | Some _ -> 420 - invalid_arg (Fmt.str "path component %s is not a directory" name) 419 + | Some _ -> Fmt.invalid_arg "path component %s is not a directory" name 421 420 | None -> 422 421 let new_dir = Dir { mode = 0o755; children = ensure_dir t rest [] } in 423 422 (name, new_dir) :: current) ··· 461 460 let add_file t path ~mode content = 462 461 let len = String.length content in 463 462 if Int64.of_int len > max_file_size then 464 - invalid_arg (Fmt.str "file too large: %d bytes" len); 463 + Fmt.invalid_arg "file too large: %d bytes" len; 465 464 add_entry t path (File { mode = mode land 0o7777; data = content }) 466 465 467 466 let add_symlink t path target =