xar unarchiver (.xar, .pkg, .xip)
1#!/usr/bin/env crystal
2require "binary_parser"
3require "compress/zlib"
4require "compress/gzip"
5require "xml"
6require "digest/md5"
7require "digest/sha1"
8
9require "./sliceio"
10require "./bzip2"
11
12def perror(msg : String)
13 STDERR.write Slice.new((msg + "\n").to_unsafe, (msg + "\n").size)
14 exit 1
15end
16
17def xml_select(xml : XML::Node, node : String)
18 perror "error in xml_select" if xml.nil?
19 xml.children.select { |e| e.name == node }
20end
21
22def xml_value(xml : XML::Node, name : String)
23 perror "error in xml_value" if xml.nil?
24 xml.children.select { |e| e.name == name }.map { |e| e.content }
25end
26
27enum XARChecksumAlgo
28 NONE
29 SHA1
30 MD5
31end
32
33enum XARFileType
34 FILE
35 DIRECTORY
36end
37
38enum XARFileEncoding
39 NONE
40 GZIP
41 BZIP2
42end
43
44class XARHeader < BinaryParser
45 endian :big
46 string :magic, {count: 4}
47 uint16 :header_size
48 uint16 :version
49 uint64 :length_compressed
50 uint64 :length_uncompressed
51 uint32 :checksum_algo
52end
53
54class XARFileData
55 property offset : UInt64 = 0
56 property size : UInt64 = 0
57 property length : UInt64 = 0
58 property checksum_extracted : String = ""
59 property checksum_extracted_style : XARChecksumAlgo = XARChecksumAlgo::NONE
60 property checksum_archived : String = ""
61 property checksum_archived_style : XARChecksumAlgo = XARChecksumAlgo::NONE
62 property encoding : XARFileEncoding = XARFileEncoding::NONE
63end
64
65class XARFileEAttrs < XARFileData
66 property name : String = ""
67end
68
69class XARChecksum
70 property style : XARChecksumAlgo = XARChecksumAlgo::NONE
71 property size : UInt64 = 0
72 property offset : UInt64 = 0
73end
74
75class XARFile
76 property path : String = ""
77 property name : String = ""
78 property type : XARFileType = XARFileType::FILE
79 property mode : Array(UInt8) = [0_u8, 0_u8, 0_u8, 0_u8]
80 property uid : UInt64 = 0
81 property gid : UInt64 = 0
82 property user : String = ""
83 property group : String = ""
84 property size : UInt64 = 0
85 property data : XARFileData = XARFileData.new
86 property ea : XARFileEAttrs = XARFileEAttrs.new
87end
88
89class XAR
90 property checksum : XARChecksum = XARChecksum.new
91 property files : Array(XARFile) = [] of XARFile
92end
93
94def calculate_checksum(data : Bytes, algo : XARChecksumAlgo) : String
95 case algo
96 when XARChecksumAlgo::MD5
97 Digest::MD5.hexdigest(data)
98 when XARChecksumAlgo::SHA1
99 Digest::SHA1.hexdigest(data)
100 else
101 ""
102 end
103end
104
105def validate_checksum(data : Bytes, expected : String, algo : XARChecksumAlgo, description : String) : Bool
106 return true if algo == XARChecksumAlgo::NONE || expected.empty?
107
108 calculated = calculate_checksum(data, algo)
109 if calculated.downcase == expected.downcase
110 puts "✓ #{description} checksum valid (#{algo})"
111 return true
112 else
113 puts "✗ #{description} checksum INVALID!"
114 puts " Expected: #{expected.downcase}"
115 puts " Calculated: #{calculated.downcase}"
116 return false
117 end
118end
119
120def normalize_checksum(checksum_str : String) : String
121 return "" if checksum_str.empty?
122 checksum_str.downcase
123end
124
125def detect_compression_format(data : Bytes) : XARFileEncoding
126 return XARFileEncoding::NONE if data.size < 2
127
128 # Check for GZIP magic bytes (1f 8b)
129 if data[0] == 0x1f && data[1] == 0x8b
130 return XARFileEncoding::GZIP
131 end
132
133 # Check for BZIP2 magic bytes (42 5a = "BZ")
134 if data[0] == 0x42 && data[1] == 0x5a
135 return XARFileEncoding::BZIP2
136 end
137
138 # Check for ZLIB magic bytes (78 da, 78 9c, 78 01, etc.)
139 if data[0] == 0x78
140 return XARFileEncoding::GZIP # Treat ZLIB as GZIP-compatible
141 end
142
143 return XARFileEncoding::NONE
144end
145
146def xar_decode_data(entity : XML::Node, data : XARFileData = XARFileData.new)
147 data.offset = (xml_value(entity, "offset").first rescue 0).to_u64
148 data.size = (xml_value(entity, "size").first rescue 0).to_u64
149 data.length = (xml_value(entity, "length").first rescue 0).to_u64
150 data.checksum_extracted = normalize_checksum((xml_value(entity, "extracted-checksum").first rescue ""))
151 data.checksum_extracted_style = XARChecksumAlgo.parse(xml_select(entity, "extracted-checksum").first["style"]) rescue XARChecksumAlgo::NONE
152 data.checksum_archived = normalize_checksum((xml_value(entity, "archived-checksum").first rescue ""))
153 data.checksum_archived_style = XARChecksumAlgo.parse(xml_select(entity, "archived-checksum").first["style"]) rescue XARChecksumAlgo::NONE
154 data.encoding = XARFileEncoding.parse(xml_select(entity, "encoding").first["style"].split("/x-").last) rescue XARFileEncoding::NONE
155 data
156end
157
158def xar_decode_ea(entity : XML::Node, ea : XARFileEAttrs = XARFileEAttrs.new)
159 xar_decode_data entity, ea
160 ea.name = xml_value(entity, "name").first rescue ""
161 ea
162end
163
164def xar_decode_file(entity : XML::Node, path : String = "./")
165 file = XARFile.new
166 file.path = path
167 file.name = xml_value(entity, "name").first rescue ""
168 file.type = XARFileType.parse(xml_value(entity, "type").first) rescue XARFileType::FILE
169 file.mode = (xml_value(entity, "mode").first rescue "0000").split("").map { |p| p.to_u8 }
170 file.uid = (xml_value(entity, "uid").first rescue 0).to_u64
171 file.gid = (xml_value(entity, "gid").first rescue 0).to_u64
172 file.user = xml_value(entity, "user").first rescue ""
173 file.group = xml_value(entity, "group").first rescue ""
174 file.size = (xml_value(entity, "size").first rescue 0).to_u64
175
176 data = xml_select(entity, "data")
177 unless data.empty?
178 xar_decode_data data.first, file.data
179 end
180 ea = xml_select(entity, "ea")
181 unless ea.empty?
182 xar_decode_ea ea.first, file.ea
183 end
184 files = [file]
185 children = xml_select(entity, "file")
186 if children.size > 0
187 if file.type != XARFileType::DIRECTORY
188 puts "warn: found a #{file.type} with #{children.size} children"
189 end
190 children.each do |child|
191 files += xar_decode_file child, "#{path}#{file.name}/"
192 end
193 end
194 files
195end
196
197# Parse command line options
198strict_mode = false
199no_extract = false
200filename = ""
201
202i = 0
203while i < ARGV.size
204 case ARGV[i]
205 when "--strict"
206 strict_mode = true
207 when "--no-extract"
208 no_extract = true
209 when "--help", "-h"
210 puts "Usage: #{PROGRAM_NAME} [options] <xar_file>"
211 puts "Options:"
212 puts " --strict Fail extraction if any checksum validation fails"
213 puts " --no-extract Only validate checksums, don't extract files"
214 puts " --help, -h Show this help message"
215 exit 0
216 else
217 if filename.empty?
218 filename = ARGV[i]
219 else
220 perror "multiple filenames provided"
221 end
222 end
223 i += 1
224end
225
226perror "no filename given" if filename.empty?
227
228File.open(filename, "r") do |file|
229 header = XARHeader.new
230 header.load file
231
232 perror "not a xar file" if header.magic != "xar!"
233
234 puts "#{header.magic}"
235 puts "header size #{header.header_size}"
236 puts "format version #{header.version}"
237 puts "TOC length (compressed) #{header.length_compressed}"
238 puts "TOC length (uncompressed) #{header.length_uncompressed}"
239 puts "checksum algo #{XARChecksumAlgo.new(header.checksum_algo.to_i32).to_s}"
240
241 toc_data = Bytes.new header.length_uncompressed
242 file.seek header.header_size
243
244 Compress::Zlib::Reader.open file do |zfile|
245 zfile.read toc_data
246 end
247
248 xar_xml = XML.parse String.new(toc_data)
249 xar_obj = xml_select(xar_xml, "xar")
250 perror "empty xar object" if xar_obj.empty?
251
252 tocs = xml_select(xar_obj.first, "toc")
253 perror "empty TOC" if tocs.empty?
254
255 toc = tocs.first
256 puts "reading TOC"
257
258 xar = XAR.new
259 elem = xml_select(toc, "checksum").first
260 xar.checksum.style = XARChecksumAlgo.parse elem["style"]
261 xar.checksum.size = xml_value(elem, "size").first.to_u64
262 xar.checksum.offset = xml_value(elem, "offset").first.to_u64
263 puts "TOC is checksummed as #{xar.checksum.style}, #{xar.checksum.size} bytes at offset #{xar.checksum.offset}"
264
265
266 xml_select(toc, "file").each do |entity|
267 xar.files += xar_decode_file entity
268 end
269
270 puts "contains #{xar.files.select { |e| e.type == XARFileType::FILE }.size} files across #{xar.files.select { |e| e.type == XARFileType::DIRECTORY }.size} directories"
271 puts xar.files.map { |e| "#{e.path}#{e.name}" }.join " "
272
273 # Get heap offset
274 heap_start = header.header_size.to_u64 + header.length_compressed
275 file.seek(0, IO::Seek::End)
276 file_size = file.tell
277 file.seek(0)
278 heap_size = file_size - heap_start
279
280 file.seek heap_start
281 heap_data = Bytes.new(heap_size)
282 file.read(heap_data)
283
284
285 # Unarchive files (or just validate if --no-extract is specified)
286 validation_results = {
287 "files_processed" => 0,
288 "files_extracted" => 0,
289 "checksum_failures" => 0,
290 "archived_checksum_failures" => 0,
291 "extracted_checksum_failures" => 0,
292 }
293
294 xar.files.each do |xarfile|
295 next if xarfile.type == XARFileType::DIRECTORY
296
297 output_path = File.join("#{filename}.extracted", xarfile.path, xarfile.name)
298 Dir.mkdir_p(File.dirname(output_path)) unless File.exists?(File.dirname(output_path))
299
300 # Log file metadata
301 puts "Processing file: #{output_path}"
302 puts " Offset: #{xarfile.data.offset}"
303 puts " Compressed size: #{xarfile.data.length}"
304 puts " Uncompressed size: #{xarfile.data.size}"
305 puts " Encoding: #{xarfile.data.encoding}"
306
307 # Extract compressed data from heap
308 # In XAR format: length = compressed size, size = uncompressed size
309 compressed_size = xarfile.data.length
310 if xarfile.data.offset + compressed_size > heap_data.size
311 puts " Error: Requested data extends beyond heap boundary (offset: #{xarfile.data.offset}, compressed_size: #{compressed_size}, heap_size: #{heap_data.size})"
312 next
313 end
314
315 compressed_data = heap_data[xarfile.data.offset, compressed_size]
316
317 validation_results["files_processed"] += 1
318
319 # Validate archived checksum (on compressed data)
320 archived_valid = validate_checksum(compressed_data, xarfile.data.checksum_archived,
321 xarfile.data.checksum_archived_style,
322 "archived data for #{xarfile.name}")
323
324 # Auto-detect compression format based on magic bytes
325 actual_encoding = detect_compression_format(compressed_data)
326
327 # Decompress the data if necessary
328 decompressed_data = case actual_encoding
329 when XARFileEncoding::GZIP
330 begin
331 # Check if it's actually ZLIB
332 is_zlib = compressed_data[0] == 0x78
333 if is_zlib
334 Compress::Zlib::Reader.new(SliceIO.new(compressed_data)).getb_to_end
335 else
336 Compress::Gzip::Reader.new(SliceIO.new(compressed_data)).getb_to_end
337 end
338 rescue e
339 puts "Error decompressing #{is_zlib ? "ZLIB" : "GZIP"} data for #{xarfile.name}: #{e}"
340 next
341 end
342 when XARFileEncoding::BZIP2
343 begin
344 Bzip2::Reader.new(SliceIO.new(compressed_data)).getb_to_end
345 rescue e
346 puts "Error decompressing BZIP2 data for #{xarfile.name}: #{e}"
347 next
348 end
349 else
350 compressed_data
351 end
352
353 # Validate extracted checksum (on decompressed data)
354 extracted_valid = validate_checksum(decompressed_data, xarfile.data.checksum_extracted,
355 xarfile.data.checksum_extracted_style,
356 "extracted data for #{xarfile.name}")
357
358 # Track validation results
359 unless archived_valid
360 validation_results["archived_checksum_failures"] += 1
361 end
362 unless extracted_valid
363 validation_results["extracted_checksum_failures"] += 1
364 end
365
366 # Handle checksum validation results
367 checksum_failed = !archived_valid || !extracted_valid
368 if checksum_failed
369 validation_results["checksum_failures"] += 1
370 end
371
372 if checksum_failed && strict_mode
373 perror "Checksum validation failed for #{output_path} (strict mode enabled)"
374 elsif checksum_failed
375 puts "Warning: Checksum validation failed for #{output_path}, extracting anyway"
376 end
377
378 # Write the file (unless --no-extract is specified)
379 if no_extract
380 puts "Validated: #{output_path} (not extracted)"
381 else
382 begin
383 File.write(output_path, decompressed_data)
384 puts "Extracted: #{output_path}"
385 validation_results["files_extracted"] += 1
386 rescue e
387 perror "Error writing file #{output_path}: #{e}"
388 end
389 end
390 end
391
392 # Print validation summary
393 puts "\n=== #{no_extract ? "Validation" : "Extraction"} Summary ==="
394 puts "Files processed: #{validation_results["files_processed"]}"
395 puts "Files extracted: #{validation_results["files_extracted"]}" unless no_extract
396 puts "Checksum failures: #{validation_results["checksum_failures"]}"
397 puts " - Archived checksum failures: #{validation_results["archived_checksum_failures"]}"
398 puts " - Extracted checksum failures: #{validation_results["extracted_checksum_failures"]}"
399
400 if validation_results["checksum_failures"] > 0
401 puts "\nWarning: Some checksum validations failed. The extracted files may be corrupted."
402 else
403 puts "\n✓ All checksums validated successfully!"
404 end
405end