xar unarchiver (.xar, .pkg, .xip)
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix checksumming and heap offset calculations, detect compression algos (magic)

+59 -67
+6 -8
README.md
··· 4 4 5 5 ## status 6 6 7 - - [ ] Header 7 + - [x] Header 8 8 - [x] Magic 9 9 - [x] Header size 10 10 - [x] Version ··· 14 14 - [x] None 15 15 - [x] SHA1 16 16 - [x] MD5 17 - - [ ] Custom 18 17 - [x] TOC 19 18 - [x] Extraction 20 19 - [x] Parsing 21 - - [ ] Checksum validation 20 + - [x] Checksum validation 22 21 - [x] Heap 23 22 - [x] Extraction 24 - - [x] gzip 23 + - [x] gzip/zlib 25 24 - [x] bzip2 26 25 - [x] Checksum validation 27 26 ··· 30 29 - archive info 31 30 - file listing 32 31 - checksum validation 33 - - TOC unfinished, heap works (experimental) 34 - - decompression **(experimental!)** 35 - - bzip2 appears to be unreliable (i could get a test archive to decompress several files, but fail on others?) 36 - - gzip is implemented but UNTESTED 32 + - decompression 37 33 38 34 ## usage 39 35 ··· 44 40 --no-extract Only validate checksums, don't extract files 45 41 --help, -h Show this help message 46 42 ``` 43 + 44 + Files are extracted to `./<xar_file>.extracted/` 47 45 48 46 ### examples 49 47
+53 -59
czar.cr
··· 117 117 end 118 118 end 119 119 120 - def validate_heap_checksum(file : IO, header : XARHeader, xar : XAR) : Bool 121 - return true if xar.checksum.style == XARChecksumAlgo::NONE 122 - 123 - puts "Validating heap checksum..." 120 + def normalize_checksum(checksum_str : String) : String 121 + return "" if checksum_str.empty? 122 + checksum_str.downcase 123 + end 124 124 125 - # Calculate the total heap size (all file data) 126 - heap_start = header.header_size.to_u64 + header.length_compressed 127 - heap_size = 0_u64 125 + def detect_compression_format(data : Bytes) : XARFileEncoding 126 + return XARFileEncoding::NONE if data.size < 2 128 127 129 - xar.files.each do |xarfile| 130 - next if xarfile.type == XARFileType::DIRECTORY 131 - heap_size = [heap_size, xarfile.data.offset + xarfile.data.size].max 128 + # Check for GZIP magic bytes (1f 8b) 129 + if data[0] == 0x1f && data[1] == 0x8b 130 + return XARFileEncoding::GZIP 132 131 end 133 132 134 - if heap_size == 0 135 - puts "No heap data found" 136 - return true 133 + # Check for BZIP2 magic bytes (42 5a = "BZ") 134 + if data[0] == 0x42 && data[1] == 0x5a 135 + return XARFileEncoding::BZIP2 137 136 end 138 137 139 - # Read the entire heap data 140 - file.seek heap_start 141 - heap_data = Bytes.new(heap_size) 142 - file.read(heap_data) 143 - 144 - # Calculate checksum of heap data 145 - calculated = calculate_checksum(heap_data, xar.checksum.style) 146 - 147 - # Read the stored checksum 148 - file.seek heap_start + xar.checksum.offset 149 - stored_checksum_data = Bytes.new(xar.checksum.size) 150 - file.read(stored_checksum_data) 151 - stored_checksum = stored_checksum_data.hexstring 152 - 153 - if calculated.downcase == stored_checksum.downcase 154 - puts "✓ Heap checksum valid (#{xar.checksum.style})" 155 - return true 156 - else 157 - puts "✗ Heap checksum INVALID!" 158 - puts " Expected: #{stored_checksum.downcase}" 159 - puts " Calculated: #{calculated.downcase}" 160 - return false 138 + # Check for ZLIB magic bytes (78 da, 78 9c, 78 01, etc.) 139 + if data[0] == 0x78 140 + return XARFileEncoding::GZIP # Treat ZLIB as GZIP-compatible 161 141 end 142 + 143 + return XARFileEncoding::NONE 162 144 end 163 145 164 146 def xar_decode_data(entity : XML::Node, data : XARFileData = XARFileData.new) 165 147 data.offset = (xml_value(entity, "offset").first rescue 0).to_u64 166 148 data.size = (xml_value(entity, "size").first rescue 0).to_u64 167 149 data.length = (xml_value(entity, "length").first rescue 0).to_u64 168 - data.checksum_extracted = xml_value(entity, "extracted-checksum").first rescue "" 150 + data.checksum_extracted = normalize_checksum((xml_value(entity, "extracted-checksum").first rescue "")) 169 151 data.checksum_extracted_style = XARChecksumAlgo.parse(xml_select(entity, "extracted-checksum").first["style"]) rescue XARChecksumAlgo::NONE 170 - data.checksum_archived = xml_value(entity, "archived-checksum").first rescue "" 152 + data.checksum_archived = normalize_checksum((xml_value(entity, "archived-checksum").first rescue "")) 171 153 data.checksum_archived_style = XARChecksumAlgo.parse(xml_select(entity, "archived-checksum").first["style"]) rescue XARChecksumAlgo::NONE 172 154 data.encoding = XARFileEncoding.parse(xml_select(entity, "encoding").first["style"].split("/x-").last) rescue XARFileEncoding::NONE 173 155 data ··· 280 262 xar.checksum.offset = xml_value(elem, "offset").first.to_u64 281 263 puts "TOC is checksummed as #{xar.checksum.style}, #{xar.checksum.size} bytes at offset #{xar.checksum.offset}" 282 264 283 - file.seek xar.checksum.offset 284 - toc_checksum = Bytes.new xar.checksum.size 285 - file.read(toc_checksum) 286 - validate_checksum(toc_data, toc_checksum.hexstring, xar.checksum.style, "TOC") 287 265 288 266 xml_select(toc, "file").each do |entity| 289 267 xar.files += xar_decode_file entity ··· 292 270 puts "contains #{xar.files.select { |e| e.type == XARFileType::FILE }.size} files across #{xar.files.select { |e| e.type == XARFileType::DIRECTORY }.size} directories" 293 271 puts xar.files.map { |e| "#{e.path}#{e.name}" }.join " " 294 272 295 - # Validate heap checksum if present 296 - heap_valid = validate_heap_checksum(file, header, xar) 297 - if !heap_valid && strict_mode 298 - perror "Heap checksum validation failed (strict mode enabled)" 299 - elsif !heap_valid 300 - puts "Warning: Heap checksum validation failed, continuing anyway" 301 - end 273 + # Get heap offset 274 + heap_start = header.header_size.to_u64 + header.length_compressed 275 + file.seek(0, IO::Seek::End) 276 + file_size = file.tell 277 + file.seek(0) 278 + heap_size = file_size - heap_start 279 + 280 + file.seek heap_start 281 + heap_data = Bytes.new(heap_size) 282 + file.read(heap_data) 283 + 302 284 303 285 # Unarchive files (or just validate if --no-extract is specified) 304 286 validation_results = { ··· 318 300 # Log file metadata 319 301 puts "Processing file: #{output_path}" 320 302 puts " Offset: #{xarfile.data.offset}" 321 - puts " Size: #{xarfile.data.size}" 303 + puts " Compressed size: #{xarfile.data.length}" 304 + puts " Uncompressed size: #{xarfile.data.size}" 322 305 puts " Encoding: #{xarfile.data.encoding}" 323 306 324 - # Seek to the correct offset in the archive file 325 - file.seek header.header_size.to_u64 + header.length_compressed + xarfile.data.offset 307 + # Extract compressed data from heap 308 + # In XAR format: length = compressed size, size = uncompressed size 309 + compressed_size = xarfile.data.length 310 + if xarfile.data.offset + compressed_size > heap_data.size 311 + puts " Error: Requested data extends beyond heap boundary (offset: #{xarfile.data.offset}, compressed_size: #{compressed_size}, heap_size: #{heap_data.size})" 312 + next 313 + end 326 314 327 - # Read the compressed data 328 - compressed_data = Bytes.new(xarfile.data.size) 329 - file.read(compressed_data) 315 + compressed_data = heap_data[xarfile.data.offset, compressed_size] 330 316 331 317 validation_results["files_processed"] += 1 332 318 ··· 334 320 archived_valid = validate_checksum(compressed_data, xarfile.data.checksum_archived, 335 321 xarfile.data.checksum_archived_style, 336 322 "archived data for #{xarfile.name}") 323 + 324 + # Auto-detect compression format based on magic bytes 325 + actual_encoding = detect_compression_format(compressed_data) 337 326 338 327 # Decompress the data if necessary 339 - decompressed_data = case xarfile.data.encoding 328 + decompressed_data = case actual_encoding 340 329 when XARFileEncoding::GZIP 341 330 begin 342 - Compress::Gzip::Reader.new(SliceIO.new(compressed_data)).getb_to_end 331 + # Check if it's actually ZLIB 332 + is_zlib = compressed_data[0] == 0x78 333 + if is_zlib 334 + Compress::Zlib::Reader.new(SliceIO.new(compressed_data)).getb_to_end 335 + else 336 + Compress::Gzip::Reader.new(SliceIO.new(compressed_data)).getb_to_end 337 + end 343 338 rescue e 344 - puts "Error decompressing GZIP data for #{xarfile.name}: #{e}" 339 + puts "Error decompressing #{is_zlib ? "ZLIB" : "GZIP"} data for #{xarfile.name}: #{e}" 345 340 next 346 341 end 347 342 when XARFileEncoding::BZIP2 ··· 401 396 puts "Checksum failures: #{validation_results["checksum_failures"]}" 402 397 puts " - Archived checksum failures: #{validation_results["archived_checksum_failures"]}" 403 398 puts " - Extracted checksum failures: #{validation_results["extracted_checksum_failures"]}" 404 - puts "Heap checksum: #{heap_valid ? "✓ Valid" : "✗ Invalid"}" 405 399 406 - if validation_results["checksum_failures"] > 0 || !heap_valid 400 + if validation_results["checksum_failures"] > 0 407 401 puts "\nWarning: Some checksum validations failed. The extracted files may be corrupted." 408 402 else 409 403 puts "\n✓ All checksums validated successfully!"