xar unarchiver (.xar, .pkg, .xip)
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 405 lines 13 kB view raw
1#!/usr/bin/env crystal 2require "binary_parser" 3require "compress/zlib" 4require "compress/gzip" 5require "xml" 6require "digest/md5" 7require "digest/sha1" 8 9require "./sliceio" 10require "./bzip2" 11 12def perror(msg : String) 13 STDERR.write Slice.new((msg + "\n").to_unsafe, (msg + "\n").size) 14 exit 1 15end 16 17def xml_select(xml : XML::Node, node : String) 18 perror "error in xml_select" if xml.nil? 19 xml.children.select { |e| e.name == node } 20end 21 22def xml_value(xml : XML::Node, name : String) 23 perror "error in xml_value" if xml.nil? 24 xml.children.select { |e| e.name == name }.map { |e| e.content } 25end 26 27enum XARChecksumAlgo 28 NONE 29 SHA1 30 MD5 31end 32 33enum XARFileType 34 FILE 35 DIRECTORY 36end 37 38enum XARFileEncoding 39 NONE 40 GZIP 41 BZIP2 42end 43 44class XARHeader < BinaryParser 45 endian :big 46 string :magic, {count: 4} 47 uint16 :header_size 48 uint16 :version 49 uint64 :length_compressed 50 uint64 :length_uncompressed 51 uint32 :checksum_algo 52end 53 54class XARFileData 55 property offset : UInt64 = 0 56 property size : UInt64 = 0 57 property length : UInt64 = 0 58 property checksum_extracted : String = "" 59 property checksum_extracted_style : XARChecksumAlgo = XARChecksumAlgo::NONE 60 property checksum_archived : String = "" 61 property checksum_archived_style : XARChecksumAlgo = XARChecksumAlgo::NONE 62 property encoding : XARFileEncoding = XARFileEncoding::NONE 63end 64 65class XARFileEAttrs < XARFileData 66 property name : String = "" 67end 68 69class XARChecksum 70 property style : XARChecksumAlgo = XARChecksumAlgo::NONE 71 property size : UInt64 = 0 72 property offset : UInt64 = 0 73end 74 75class XARFile 76 property path : String = "" 77 property name : String = "" 78 property type : XARFileType = XARFileType::FILE 79 property mode : Array(UInt8) = [0_u8, 0_u8, 0_u8, 0_u8] 80 property uid : UInt64 = 0 81 property gid : UInt64 = 0 82 property user : String = "" 83 property group : String = "" 84 property size : UInt64 = 0 85 property data : XARFileData = XARFileData.new 86 property ea : XARFileEAttrs = XARFileEAttrs.new 87end 88 89class XAR 90 property checksum : XARChecksum = XARChecksum.new 91 property files : Array(XARFile) = [] of XARFile 92end 93 94def calculate_checksum(data : Bytes, algo : XARChecksumAlgo) : String 95 case algo 96 when XARChecksumAlgo::MD5 97 Digest::MD5.hexdigest(data) 98 when XARChecksumAlgo::SHA1 99 Digest::SHA1.hexdigest(data) 100 else 101 "" 102 end 103end 104 105def validate_checksum(data : Bytes, expected : String, algo : XARChecksumAlgo, description : String) : Bool 106 return true if algo == XARChecksumAlgo::NONE || expected.empty? 107 108 calculated = calculate_checksum(data, algo) 109 if calculated.downcase == expected.downcase 110 puts "#{description} checksum valid (#{algo})" 111 return true 112 else 113 puts "#{description} checksum INVALID!" 114 puts " Expected: #{expected.downcase}" 115 puts " Calculated: #{calculated.downcase}" 116 return false 117 end 118end 119 120def normalize_checksum(checksum_str : String) : String 121 return "" if checksum_str.empty? 122 checksum_str.downcase 123end 124 125def detect_compression_format(data : Bytes) : XARFileEncoding 126 return XARFileEncoding::NONE if data.size < 2 127 128 # Check for GZIP magic bytes (1f 8b) 129 if data[0] == 0x1f && data[1] == 0x8b 130 return XARFileEncoding::GZIP 131 end 132 133 # Check for BZIP2 magic bytes (42 5a = "BZ") 134 if data[0] == 0x42 && data[1] == 0x5a 135 return XARFileEncoding::BZIP2 136 end 137 138 # Check for ZLIB magic bytes (78 da, 78 9c, 78 01, etc.) 139 if data[0] == 0x78 140 return XARFileEncoding::GZIP # Treat ZLIB as GZIP-compatible 141 end 142 143 return XARFileEncoding::NONE 144end 145 146def xar_decode_data(entity : XML::Node, data : XARFileData = XARFileData.new) 147 data.offset = (xml_value(entity, "offset").first rescue 0).to_u64 148 data.size = (xml_value(entity, "size").first rescue 0).to_u64 149 data.length = (xml_value(entity, "length").first rescue 0).to_u64 150 data.checksum_extracted = normalize_checksum((xml_value(entity, "extracted-checksum").first rescue "")) 151 data.checksum_extracted_style = XARChecksumAlgo.parse(xml_select(entity, "extracted-checksum").first["style"]) rescue XARChecksumAlgo::NONE 152 data.checksum_archived = normalize_checksum((xml_value(entity, "archived-checksum").first rescue "")) 153 data.checksum_archived_style = XARChecksumAlgo.parse(xml_select(entity, "archived-checksum").first["style"]) rescue XARChecksumAlgo::NONE 154 data.encoding = XARFileEncoding.parse(xml_select(entity, "encoding").first["style"].split("/x-").last) rescue XARFileEncoding::NONE 155 data 156end 157 158def xar_decode_ea(entity : XML::Node, ea : XARFileEAttrs = XARFileEAttrs.new) 159 xar_decode_data entity, ea 160 ea.name = xml_value(entity, "name").first rescue "" 161 ea 162end 163 164def xar_decode_file(entity : XML::Node, path : String = "./") 165 file = XARFile.new 166 file.path = path 167 file.name = xml_value(entity, "name").first rescue "" 168 file.type = XARFileType.parse(xml_value(entity, "type").first) rescue XARFileType::FILE 169 file.mode = (xml_value(entity, "mode").first rescue "0000").split("").map { |p| p.to_u8 } 170 file.uid = (xml_value(entity, "uid").first rescue 0).to_u64 171 file.gid = (xml_value(entity, "gid").first rescue 0).to_u64 172 file.user = xml_value(entity, "user").first rescue "" 173 file.group = xml_value(entity, "group").first rescue "" 174 file.size = (xml_value(entity, "size").first rescue 0).to_u64 175 176 data = xml_select(entity, "data") 177 unless data.empty? 178 xar_decode_data data.first, file.data 179 end 180 ea = xml_select(entity, "ea") 181 unless ea.empty? 182 xar_decode_ea ea.first, file.ea 183 end 184 files = [file] 185 children = xml_select(entity, "file") 186 if children.size > 0 187 if file.type != XARFileType::DIRECTORY 188 puts "warn: found a #{file.type} with #{children.size} children" 189 end 190 children.each do |child| 191 files += xar_decode_file child, "#{path}#{file.name}/" 192 end 193 end 194 files 195end 196 197# Parse command line options 198strict_mode = false 199no_extract = false 200filename = "" 201 202i = 0 203while i < ARGV.size 204 case ARGV[i] 205 when "--strict" 206 strict_mode = true 207 when "--no-extract" 208 no_extract = true 209 when "--help", "-h" 210 puts "Usage: #{PROGRAM_NAME} [options] <xar_file>" 211 puts "Options:" 212 puts " --strict Fail extraction if any checksum validation fails" 213 puts " --no-extract Only validate checksums, don't extract files" 214 puts " --help, -h Show this help message" 215 exit 0 216 else 217 if filename.empty? 218 filename = ARGV[i] 219 else 220 perror "multiple filenames provided" 221 end 222 end 223 i += 1 224end 225 226perror "no filename given" if filename.empty? 227 228File.open(filename, "r") do |file| 229 header = XARHeader.new 230 header.load file 231 232 perror "not a xar file" if header.magic != "xar!" 233 234 puts "#{header.magic}" 235 puts "header size #{header.header_size}" 236 puts "format version #{header.version}" 237 puts "TOC length (compressed) #{header.length_compressed}" 238 puts "TOC length (uncompressed) #{header.length_uncompressed}" 239 puts "checksum algo #{XARChecksumAlgo.new(header.checksum_algo.to_i32).to_s}" 240 241 toc_data = Bytes.new header.length_uncompressed 242 file.seek header.header_size 243 244 Compress::Zlib::Reader.open file do |zfile| 245 zfile.read toc_data 246 end 247 248 xar_xml = XML.parse String.new(toc_data) 249 xar_obj = xml_select(xar_xml, "xar") 250 perror "empty xar object" if xar_obj.empty? 251 252 tocs = xml_select(xar_obj.first, "toc") 253 perror "empty TOC" if tocs.empty? 254 255 toc = tocs.first 256 puts "reading TOC" 257 258 xar = XAR.new 259 elem = xml_select(toc, "checksum").first 260 xar.checksum.style = XARChecksumAlgo.parse elem["style"] 261 xar.checksum.size = xml_value(elem, "size").first.to_u64 262 xar.checksum.offset = xml_value(elem, "offset").first.to_u64 263 puts "TOC is checksummed as #{xar.checksum.style}, #{xar.checksum.size} bytes at offset #{xar.checksum.offset}" 264 265 266 xml_select(toc, "file").each do |entity| 267 xar.files += xar_decode_file entity 268 end 269 270 puts "contains #{xar.files.select { |e| e.type == XARFileType::FILE }.size} files across #{xar.files.select { |e| e.type == XARFileType::DIRECTORY }.size} directories" 271 puts xar.files.map { |e| "#{e.path}#{e.name}" }.join " " 272 273 # Get heap offset 274 heap_start = header.header_size.to_u64 + header.length_compressed 275 file.seek(0, IO::Seek::End) 276 file_size = file.tell 277 file.seek(0) 278 heap_size = file_size - heap_start 279 280 file.seek heap_start 281 heap_data = Bytes.new(heap_size) 282 file.read(heap_data) 283 284 285 # Unarchive files (or just validate if --no-extract is specified) 286 validation_results = { 287 "files_processed" => 0, 288 "files_extracted" => 0, 289 "checksum_failures" => 0, 290 "archived_checksum_failures" => 0, 291 "extracted_checksum_failures" => 0, 292 } 293 294 xar.files.each do |xarfile| 295 next if xarfile.type == XARFileType::DIRECTORY 296 297 output_path = File.join("#{filename}.extracted", xarfile.path, xarfile.name) 298 Dir.mkdir_p(File.dirname(output_path)) unless File.exists?(File.dirname(output_path)) 299 300 # Log file metadata 301 puts "Processing file: #{output_path}" 302 puts " Offset: #{xarfile.data.offset}" 303 puts " Compressed size: #{xarfile.data.length}" 304 puts " Uncompressed size: #{xarfile.data.size}" 305 puts " Encoding: #{xarfile.data.encoding}" 306 307 # Extract compressed data from heap 308 # In XAR format: length = compressed size, size = uncompressed size 309 compressed_size = xarfile.data.length 310 if xarfile.data.offset + compressed_size > heap_data.size 311 puts " Error: Requested data extends beyond heap boundary (offset: #{xarfile.data.offset}, compressed_size: #{compressed_size}, heap_size: #{heap_data.size})" 312 next 313 end 314 315 compressed_data = heap_data[xarfile.data.offset, compressed_size] 316 317 validation_results["files_processed"] += 1 318 319 # Validate archived checksum (on compressed data) 320 archived_valid = validate_checksum(compressed_data, xarfile.data.checksum_archived, 321 xarfile.data.checksum_archived_style, 322 "archived data for #{xarfile.name}") 323 324 # Auto-detect compression format based on magic bytes 325 actual_encoding = detect_compression_format(compressed_data) 326 327 # Decompress the data if necessary 328 decompressed_data = case actual_encoding 329 when XARFileEncoding::GZIP 330 begin 331 # Check if it's actually ZLIB 332 is_zlib = compressed_data[0] == 0x78 333 if is_zlib 334 Compress::Zlib::Reader.new(SliceIO.new(compressed_data)).getb_to_end 335 else 336 Compress::Gzip::Reader.new(SliceIO.new(compressed_data)).getb_to_end 337 end 338 rescue e 339 puts "Error decompressing #{is_zlib ? "ZLIB" : "GZIP"} data for #{xarfile.name}: #{e}" 340 next 341 end 342 when XARFileEncoding::BZIP2 343 begin 344 Bzip2::Reader.new(SliceIO.new(compressed_data)).getb_to_end 345 rescue e 346 puts "Error decompressing BZIP2 data for #{xarfile.name}: #{e}" 347 next 348 end 349 else 350 compressed_data 351 end 352 353 # Validate extracted checksum (on decompressed data) 354 extracted_valid = validate_checksum(decompressed_data, xarfile.data.checksum_extracted, 355 xarfile.data.checksum_extracted_style, 356 "extracted data for #{xarfile.name}") 357 358 # Track validation results 359 unless archived_valid 360 validation_results["archived_checksum_failures"] += 1 361 end 362 unless extracted_valid 363 validation_results["extracted_checksum_failures"] += 1 364 end 365 366 # Handle checksum validation results 367 checksum_failed = !archived_valid || !extracted_valid 368 if checksum_failed 369 validation_results["checksum_failures"] += 1 370 end 371 372 if checksum_failed && strict_mode 373 perror "Checksum validation failed for #{output_path} (strict mode enabled)" 374 elsif checksum_failed 375 puts "Warning: Checksum validation failed for #{output_path}, extracting anyway" 376 end 377 378 # Write the file (unless --no-extract is specified) 379 if no_extract 380 puts "Validated: #{output_path} (not extracted)" 381 else 382 begin 383 File.write(output_path, decompressed_data) 384 puts "Extracted: #{output_path}" 385 validation_results["files_extracted"] += 1 386 rescue e 387 perror "Error writing file #{output_path}: #{e}" 388 end 389 end 390 end 391 392 # Print validation summary 393 puts "\n=== #{no_extract ? "Validation" : "Extraction"} Summary ===" 394 puts "Files processed: #{validation_results["files_processed"]}" 395 puts "Files extracted: #{validation_results["files_extracted"]}" unless no_extract 396 puts "Checksum failures: #{validation_results["checksum_failures"]}" 397 puts " - Archived checksum failures: #{validation_results["archived_checksum_failures"]}" 398 puts " - Extracted checksum failures: #{validation_results["extracted_checksum_failures"]}" 399 400 if validation_results["checksum_failures"] > 0 401 puts "\nWarning: Some checksum validations failed. The extracted files may be corrupted." 402 else 403 puts "\n✓ All checksums validated successfully!" 404 end 405end