xar unarchiver (.xar, .pkg, .xip)
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

implement checksumming

+228 -20
+30 -3
README.md
··· 10 10 - [x] Version 11 11 - [x] TOC length (compressed) 12 12 - [x] TOC length (uncompressed) 13 - - [ ] Checksum algorithm 13 + - [x] Checksum algorithm 14 14 - [x] None 15 15 - [x] SHA1 16 16 - [x] MD5 ··· 18 18 - [x] TOC 19 19 - [x] Extraction 20 20 - [x] Parsing 21 - - [ ] Heap 22 - - [ ] Extraction 21 + - [ ] Checksum validation 22 + - [x] Heap 23 + - [x] Extraction 23 24 - [x] gzip 24 25 - [x] bzip2 26 + - [x] Checksum validation 25 27 26 28 ## current features 27 29 28 30 - archive info 29 31 - file listing 32 + - checksum validation 33 + - TOC unfinished, heap works (experimental) 30 34 - decompression **(experimental!)** 31 35 - bzip2 appears to be unreliable (i could get a test archive to decompress several files, but fail on others?) 32 36 - gzip is implemented but UNTESTED 37 + 38 + ## usage 39 + 40 + ``` 41 + Usage: czar [options] <xar_file> 42 + Options: 43 + --strict Fail extraction if any checksum validation fails 44 + --no-extract Only validate checksums, don't extract files 45 + --help, -h Show this help message 46 + ``` 47 + 48 + ### examples 49 + 50 + ```bash 51 + # Extract archive with checksum validation 52 + ./czar archive.xar 53 + 54 + # Validate checksums only (no extraction) 55 + ./czar --no-extract archive.xar 56 + 57 + # Strict mode - fail if any checksum is invalid 58 + ./czar --strict archive.xar 59 + ``` 33 60 34 61 --- 35 62
+198 -17
czar.cr
··· 3 3 require "compress/zlib" 4 4 require "compress/gzip" 5 5 require "xml" 6 + require "digest/md5" 7 + require "digest/sha1" 6 8 7 9 require "./sliceio" 8 10 require "./bzip2" ··· 89 91 property files : Array(XARFile) = [] of XARFile 90 92 end 91 93 94 + def calculate_checksum(data : Bytes, algo : XARChecksumAlgo) : String 95 + case algo 96 + when XARChecksumAlgo::MD5 97 + Digest::MD5.hexdigest(data) 98 + when XARChecksumAlgo::SHA1 99 + Digest::SHA1.hexdigest(data) 100 + else 101 + "" 102 + end 103 + end 104 + 105 + def validate_checksum(data : Bytes, expected : String, algo : XARChecksumAlgo, description : String) : Bool 106 + return true if algo == XARChecksumAlgo::NONE || expected.empty? 107 + 108 + calculated = calculate_checksum(data, algo) 109 + if calculated.downcase == expected.downcase 110 + puts "✓ #{description} checksum valid (#{algo})" 111 + return true 112 + else 113 + puts "✗ #{description} checksum INVALID!" 114 + puts " Expected: #{expected.downcase}" 115 + puts " Calculated: #{calculated.downcase}" 116 + return false 117 + end 118 + end 119 + 120 + def validate_heap_checksum(file : IO, header : XARHeader, xar : XAR) : Bool 121 + return true if xar.checksum.style == XARChecksumAlgo::NONE 122 + 123 + puts "Validating heap checksum..." 124 + 125 + # Calculate the total heap size (all file data) 126 + heap_start = header.header_size.to_u64 + header.length_compressed 127 + heap_size = 0_u64 128 + 129 + xar.files.each do |xarfile| 130 + next if xarfile.type == XARFileType::DIRECTORY 131 + heap_size = [heap_size, xarfile.data.offset + xarfile.data.size].max 132 + end 133 + 134 + if heap_size == 0 135 + puts "No heap data found" 136 + return true 137 + end 138 + 139 + # Read the entire heap data 140 + file.seek heap_start 141 + heap_data = Bytes.new(heap_size) 142 + file.read(heap_data) 143 + 144 + # Calculate checksum of heap data 145 + calculated = calculate_checksum(heap_data, xar.checksum.style) 146 + 147 + # Read the stored checksum 148 + file.seek heap_start + xar.checksum.offset 149 + stored_checksum_data = Bytes.new(xar.checksum.size) 150 + file.read(stored_checksum_data) 151 + stored_checksum = String.new(stored_checksum_data).strip 152 + 153 + if calculated.downcase == stored_checksum.downcase 154 + puts "✓ Heap checksum valid (#{xar.checksum.style})" 155 + return true 156 + else 157 + puts "✗ Heap checksum INVALID!" 158 + puts " Expected: #{stored_checksum.downcase}" 159 + puts " Calculated: #{calculated.downcase}" 160 + return false 161 + end 162 + end 163 + 92 164 def xar_decode_data(entity : XML::Node, data : XARFileData = XARFileData.new) 93 165 data.offset = (xml_value(entity, "offset").first rescue 0).to_u64 94 166 data.size = (xml_value(entity, "size").first rescue 0).to_u64 ··· 140 212 files 141 213 end 142 214 143 - perror "no filename given" if ARGV.size == 0 215 + # Parse command line options 216 + strict_mode = false 217 + no_extract = false 218 + filename = "" 144 219 145 - File.open(ARGV.first, "r") do |file| 220 + i = 0 221 + while i < ARGV.size 222 + case ARGV[i] 223 + when "--strict" 224 + strict_mode = true 225 + when "--no-extract" 226 + no_extract = true 227 + when "--help", "-h" 228 + puts "Usage: #{PROGRAM_NAME} [options] <xar_file>" 229 + puts "Options:" 230 + puts " --strict Fail extraction if any checksum validation fails" 231 + puts " --no-extract Only validate checksums, don't extract files" 232 + puts " --help, -h Show this help message" 233 + exit 0 234 + else 235 + if filename.empty? 236 + filename = ARGV[i] 237 + else 238 + perror "multiple filenames provided" 239 + end 240 + end 241 + i += 1 242 + end 243 + 244 + perror "no filename given" if filename.empty? 245 + 246 + File.open(filename, "r") do |file| 146 247 header = XARHeader.new 147 248 header.load file 148 249 ··· 162 263 zfile.read toc_data 163 264 end 164 265 266 + # TODO: toc checksum 267 + 165 268 xar_xml = XML.parse String.new(toc_data) 166 269 xar_obj = xml_select(xar_xml, "xar") 167 270 perror "empty xar object" if xar_obj.empty? ··· 184 287 end 185 288 186 289 puts "contains #{xar.files.select { |e| e.type == XARFileType::FILE }.size} files across #{xar.files.select { |e| e.type == XARFileType::DIRECTORY }.size} directories" 187 - puts xar.files.map{ |e| "#{e.path}#{e.name}" }.join " " 290 + puts xar.files.map { |e| "#{e.path}#{e.name}" }.join " " 291 + 292 + # Validate heap checksum if present 293 + heap_valid = validate_heap_checksum(file, header, xar) 294 + if !heap_valid && strict_mode 295 + perror "Heap checksum validation failed (strict mode enabled)" 296 + elsif !heap_valid 297 + puts "Warning: Heap checksum validation failed, continuing anyway" 298 + end 299 + 300 + # Unarchive files (or just validate if --no-extract is specified) 301 + validation_results = { 302 + "files_processed" => 0, 303 + "files_extracted" => 0, 304 + "checksum_failures" => 0, 305 + "archived_checksum_failures" => 0, 306 + "extracted_checksum_failures" => 0, 307 + } 188 308 189 - # Unarchive files 190 309 xar.files.each do |xarfile| 191 310 next if xarfile.type == XARFileType::DIRECTORY 192 311 193 - output_path = File.join("#{ARGV.first}.extracted", xarfile.path, xarfile.name) 312 + output_path = File.join("#{filename}.extracted", xarfile.path, xarfile.name) 194 313 Dir.mkdir_p(File.dirname(output_path)) unless File.exists?(File.dirname(output_path)) 195 314 196 315 # Log file metadata ··· 206 325 compressed_data = Bytes.new(xarfile.data.size) 207 326 file.read(compressed_data) 208 327 328 + validation_results["files_processed"] += 1 329 + 330 + # Validate archived checksum (on compressed data) 331 + archived_valid = validate_checksum(compressed_data, xarfile.data.checksum_archived, 332 + xarfile.data.checksum_archived_style, 333 + "archived data for #{xarfile.name}") 334 + 209 335 # Decompress the data if necessary 210 336 decompressed_data = case xarfile.data.encoding 211 - when XARFileEncoding::GZIP 212 - Compress::Gzip::Reader.new(SliceIO.new(compressed_data)).getb_to_end 213 - when XARFileEncoding::BZIP2 214 - Bzip2::Reader.new(SliceIO.new(compressed_data)).getb_to_end 337 + when XARFileEncoding::GZIP 338 + begin 339 + Compress::Gzip::Reader.new(SliceIO.new(compressed_data)).getb_to_end 340 + rescue e 341 + puts "Error decompressing GZIP data for #{xarfile.name}: #{e}" 342 + next 343 + end 344 + when XARFileEncoding::BZIP2 345 + begin 346 + Bzip2::Reader.new(SliceIO.new(compressed_data)).getb_to_end 347 + rescue e 348 + puts "Error decompressing BZIP2 data for #{xarfile.name}: #{e}" 349 + next 350 + end 351 + else 352 + compressed_data 353 + end 354 + 355 + # Validate extracted checksum (on decompressed data) 356 + extracted_valid = validate_checksum(decompressed_data, xarfile.data.checksum_extracted, 357 + xarfile.data.checksum_extracted_style, 358 + "extracted data for #{xarfile.name}") 359 + 360 + # Track validation results 361 + unless archived_valid 362 + validation_results["archived_checksum_failures"] += 1 363 + end 364 + unless extracted_valid 365 + validation_results["extracted_checksum_failures"] += 1 366 + end 367 + 368 + # Handle checksum validation results 369 + checksum_failed = !archived_valid || !extracted_valid 370 + if checksum_failed 371 + validation_results["checksum_failures"] += 1 372 + end 373 + 374 + if checksum_failed && strict_mode 375 + perror "Checksum validation failed for #{output_path} (strict mode enabled)" 376 + elsif checksum_failed 377 + puts "Warning: Checksum validation failed for #{output_path}, extracting anyway" 378 + end 379 + 380 + # Write the file (unless --no-extract is specified) 381 + if no_extract 382 + puts "Validated: #{output_path} (not extracted)" 215 383 else 216 - compressed_data 384 + begin 385 + File.write(output_path, decompressed_data) 386 + puts "Extracted: #{output_path}" 387 + validation_results["files_extracted"] += 1 388 + rescue e 389 + perror "Error writing file #{output_path}: #{e}" 390 + end 217 391 end 392 + end 218 393 219 - # Write the decompressed data to the output file 220 - begin 221 - File.write(output_path, decompressed_data) 222 - puts "Extracted: #{output_path}" 223 - rescue e 224 - perror "Error writing file #{output_path}: #{e}" 225 - end 394 + # Print validation summary 395 + puts "\n=== #{no_extract ? "Validation" : "Extraction"} Summary ===" 396 + puts "Files processed: #{validation_results["files_processed"]}" 397 + puts "Files extracted: #{validation_results["files_extracted"]}" unless no_extract 398 + puts "Checksum failures: #{validation_results["checksum_failures"]}" 399 + puts " - Archived checksum failures: #{validation_results["archived_checksum_failures"]}" 400 + puts " - Extracted checksum failures: #{validation_results["extracted_checksum_failures"]}" 401 + puts "Heap checksum: #{heap_valid ? "✓ Valid" : "✗ Invalid"}" 402 + 403 + if validation_results["checksum_failures"] > 0 || !heap_valid 404 + puts "\nWarning: Some checksum validations failed. The extracted files may be corrupted." 405 + else 406 + puts "\n✓ All checksums validated successfully!" 226 407 end 227 408 end