MIRROR: javascript for 馃悳's, a tiny runtime with big ambitions
1const std = @import("std");
2const builtin = @import("builtin");
3const linker = @import("linker.zig");
4
5const c = @cImport({
6 @cInclude("zlib.h");
7});
8
9pub const ExtractError = error{
10 DecompressionFailed,
11 InvalidTarHeader,
12 IoError,
13 OutOfMemory,
14 PathTooLong,
15 UnsupportedFormat,
16 InvalidPath,
17};
18
19inline fn validateBasic(path: []const u8) ExtractError!void {
20 if (path.len == 0 or path.len > 4096) return error.InvalidPath;
21 if (path[0] == '/') return error.InvalidPath;
22}
23
24inline fn validateBadCharsAndTraversal(path: []const u8) ExtractError!void {
25 const len = path.len;
26 var i: usize = 0; var segment_start: usize = 0;
27
28 while (i < len) : (i += 1) {
29 const ch = path[i];
30 if (ch == 0 or ch == '\\' or ch < 0x20) return error.InvalidPath;
31 if (ch == '/') {
32 const seg_len = i - segment_start; if (seg_len == 2) {
33 const seg = path[segment_start..i];
34 if (seg[0] == '.' and seg[1] == '.') return error.InvalidPath;
35 } segment_start = i + 1;
36 }
37 }
38
39 const final_len = len - segment_start; if (final_len == 2) {
40 const seg = path[segment_start..];
41 if (seg[0] == '.' and seg[1] == '.') return error.InvalidPath;
42 }
43}
44
45inline fn isWindowsReserved(name: []const u8) bool {
46 const reserved = [_][]const u8{
47 "CON", "PRN", "AUX", "NUL",
48 "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
49 "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
50 };
51
52 for (reserved) |r| {
53 if (name.len < r.len) continue;
54 const prefix = name[0..r.len];
55 if (!std.ascii.eqlIgnoreCase(prefix, r)) continue;
56 return name.len == r.len or name[r.len] == '.';
57 }
58
59 return false;
60}
61
62inline fn validateWindowsReserved(path: []const u8) ExtractError!void {
63 if (comptime builtin.os.tag != .windows) return;
64
65 const slash_idx = std.mem.lastIndexOfScalar(u8, path, '/');
66 const basename = if (slash_idx) |i| path[i + 1 ..] else path;
67 if (basename.len == 0) return error.InvalidPath;
68
69 const first = std.ascii.toUpper(basename[0]);
70 const should_check = first == 'C' or first == 'P' or first == 'A' or first == 'N' or first == 'L';
71 if (should_check and isWindowsReserved(basename)) return error.InvalidPath;
72}
73
74fn validatePath(path: []const u8) ExtractError!void {
75 try validateBasic(path);
76 try validateBadCharsAndTraversal(path);
77 try validateWindowsReserved(path);
78}
79
80pub const TarHeader = extern struct {
81 name: [100]u8,
82 mode: [8]u8,
83 uid: [8]u8,
84 gid: [8]u8,
85 size: [12]u8,
86 mtime: [12]u8,
87 checksum: [8]u8,
88 typeflag: u8,
89 linkname: [100]u8,
90 magic: [6]u8,
91 version: [2]u8,
92 uname: [32]u8,
93 gname: [32]u8,
94 devmajor: [8]u8,
95 devminor: [8]u8,
96 prefix: [155]u8,
97 _padding: [12]u8,
98
99 comptime {
100 std.debug.assert(@sizeOf(TarHeader) == 512);
101 }
102
103 pub fn isZero(self: *const TarHeader) bool {
104 const bytes: *const [512]u8 = @ptrCast(self);
105 for (bytes) |b| if (b != 0) return false;
106 return true;
107 }
108
109 pub fn getName(self: *const TarHeader, buf: []u8) ![]const u8 {
110 const prefix_len = std.mem.indexOfScalar(u8, &self.prefix, 0) orelse self.prefix.len;
111 const name_len = std.mem.indexOfScalar(u8, &self.name, 0) orelse self.name.len;
112
113 if (prefix_len > 0) {
114 const total_len = prefix_len + 1 + name_len;
115 if (total_len > buf.len) return error.InvalidPath;
116 @memcpy(buf[0..prefix_len], self.prefix[0..prefix_len]);
117 buf[prefix_len] = '/';
118 @memcpy(buf[prefix_len + 1 ..][0..name_len], self.name[0..name_len]);
119 return buf[0 .. prefix_len + 1 + name_len];
120 }
121
122 return self.name[0..name_len];
123 }
124
125 pub fn getSize(self: *const TarHeader) !u64 {
126 const size_str = std.mem.trimRight(u8, &self.size, &[_]u8{ 0, ' ' });
127 return std.fmt.parseInt(u64, size_str, 8) catch return error.InvalidTarHeader;
128 }
129
130 pub fn getMode(self: *const TarHeader) !u32 {
131 const mode_str = std.mem.trimRight(u8, &self.mode, &[_]u8{ 0, ' ' });
132 return std.fmt.parseInt(u32, mode_str, 8) catch return error.InvalidTarHeader;
133 }
134
135 pub fn isFile(self: *const TarHeader) bool {
136 return self.typeflag == '0' or self.typeflag == 0;
137 }
138
139 pub fn isDirectory(self: *const TarHeader) bool {
140 return self.typeflag == '5';
141 }
142
143 pub fn isSymlink(self: *const TarHeader) bool {
144 return self.typeflag == '2';
145 }
146};
147
148pub const GzipDecompressor = struct {
149 stream: c.z_stream,
150 initialized: bool,
151 allocator: std.mem.Allocator,
152
153 pub fn init(allocator: std.mem.Allocator) !*GzipDecompressor {
154 const self = try allocator.create(GzipDecompressor);
155 errdefer allocator.destroy(self);
156
157 self.allocator = allocator;
158 self.stream = std.mem.zeroes(c.z_stream);
159 self.initialized = false;
160
161 const ret = c.inflateInit2(&self.stream, 15 + 32);
162 if (ret != c.Z_OK) {
163 allocator.destroy(self);
164 return error.DecompressionFailed;
165 }
166
167 self.initialized = true;
168 return self;
169 }
170
171 pub fn deinit(self: *GzipDecompressor) void {
172 if (self.initialized) _ = c.inflateEnd(&self.stream);
173 self.allocator.destroy(self);
174 }
175
176 pub fn decompress(
177 self: *GzipDecompressor,
178 input: []const u8,
179 output_fn: *const fn (data: []const u8, user_data: ?*anyopaque) anyerror!void,
180 user_data: ?*anyopaque,
181 ) !bool {
182 var output_buf: [256 * 1024]u8 = undefined;
183
184 self.stream.next_in = @constCast(input.ptr);
185 self.stream.avail_in = @intCast(input.len);
186
187 while (self.stream.avail_in > 0) {
188 self.stream.next_out = &output_buf;
189 self.stream.avail_out = output_buf.len;
190
191 const ret = c.inflate(&self.stream, c.Z_NO_FLUSH);
192
193 if (ret == c.Z_STREAM_END) {
194 const produced = output_buf.len - self.stream.avail_out;
195 if (produced > 0) {
196 try output_fn(output_buf[0..produced], user_data);
197 } return true;
198 }
199
200 if (ret != c.Z_OK) return error.DecompressionFailed;
201 const produced = output_buf.len - self.stream.avail_out;
202 if (produced > 0) try output_fn(output_buf[0..produced], user_data);
203 }
204
205 return false;
206 }
207};
208
209pub const TarParser = struct {
210 state: State,
211 header: TarHeader,
212 header_bytes_read: usize,
213 current_file_remaining: u64,
214 skip_bytes: usize,
215 strip_prefix: [128]u8,
216 strip_prefix_len: usize,
217 prefix_detected: bool,
218 path_buf: [256]u8,
219
220 const State = enum {
221 read_header,
222 read_file_data,
223 skip_padding,
224 };
225
226 pub fn init(default_prefix: []const u8) TarParser {
227 var prefix_buf: [128]u8 = undefined;
228 const len = @min(default_prefix.len, 128);
229 @memcpy(prefix_buf[0..len], default_prefix[0..len]);
230 return .{
231 .state = .read_header,
232 .header = undefined,
233 .header_bytes_read = 0,
234 .current_file_remaining = 0,
235 .skip_bytes = 0,
236 .strip_prefix = prefix_buf,
237 .strip_prefix_len = len,
238 .prefix_detected = false,
239 .path_buf = undefined,
240 };
241 }
242
243 pub const Entry = struct {
244 path: []const u8,
245 mode: u32,
246 size: u64,
247 entry_type: Type,
248
249 pub const Type = enum {
250 file,
251 directory,
252 symlink,
253 };
254 };
255
256 pub const ParseResult = struct {
257 kind: Kind,
258 consumed: usize,
259
260 pub const Kind = union(enum) {
261 need_more_data,
262 entry: Entry,
263 file_data: []const u8,
264 end_of_archive,
265 err: ExtractError,
266 };
267 };
268
269 pub fn feed(self: *TarParser, data: []const u8) ParseResult {
270 switch (self.state) {
271 .read_header => {
272 const needed = @sizeOf(TarHeader) - self.header_bytes_read;
273 const to_copy = @min(needed, data.len);
274
275 const header_bytes: *[512]u8 = @ptrCast(&self.header);
276 @memcpy(header_bytes[self.header_bytes_read..][0..to_copy], data[0..to_copy]);
277 self.header_bytes_read += to_copy;
278
279 if (self.header_bytes_read < @sizeOf(TarHeader)) {
280 return .{ .kind = .need_more_data, .consumed = to_copy };
281 } self.header_bytes_read = 0;
282
283 if (self.header.isZero()) {
284 return .{ .kind = .end_of_archive, .consumed = to_copy };
285 } var path = self.header.getName(&self.path_buf) catch {
286 return .{ .kind = .{ .err = ExtractError.InvalidPath }, .consumed = to_copy };
287 };
288
289 if (!self.prefix_detected and self.header.isDirectory()) {
290 var prefix_len = @min(path.len, 127);
291 @memcpy(self.strip_prefix[0..prefix_len], path[0..prefix_len]);
292 if (prefix_len > 0 and self.strip_prefix[prefix_len - 1] != '/') {
293 self.strip_prefix[prefix_len] = '/';
294 prefix_len += 1;
295 }
296 self.strip_prefix_len = prefix_len;
297 self.prefix_detected = true;
298 }
299
300 const prefix = self.strip_prefix[0..self.strip_prefix_len];
301 if (std.mem.startsWith(u8, path, prefix)) {
302 path = path[self.strip_prefix_len..];
303 }
304
305 if (path.len > 0) validatePath(path) catch {
306 return .{ .kind = .{ .err = ExtractError.InvalidPath }, .consumed = to_copy };
307 };
308
309 const size = self.header.getSize() catch return .{ .kind = .{ .err = ExtractError.InvalidTarHeader }, .consumed = to_copy };
310 const mode = self.header.getMode() catch return .{ .kind = .{ .err = ExtractError.InvalidTarHeader }, .consumed = to_copy };
311
312 const entry_type: Entry.Type = if (self.header.isDirectory()) .directory
313 else if (self.header.isSymlink()) .symlink
314 else .file;
315
316 self.current_file_remaining = size;
317 if (size > 0) {
318 self.state = .read_file_data;
319 } else self.state = .read_header;
320
321 const entry: Entry = .{
322 .path = path,
323 .mode = mode,
324 .size = size,
325 .entry_type = entry_type,
326 };
327
328 return .{ .consumed = to_copy, .kind = .{ .entry = entry } };
329 },
330
331 .read_file_data => {
332 const to_read: usize = @min(self.current_file_remaining, data.len);
333 self.current_file_remaining -= to_read;
334
335 if (self.current_file_remaining == 0) {
336 const size = self.header.getSize() catch return .{ .kind = .{ .err = ExtractError.InvalidTarHeader }, .consumed = to_read };
337 const padding = (512 - (size % 512)) % 512;
338 if (padding > 0) {
339 self.skip_bytes = @intCast(padding);
340 self.state = .skip_padding;
341 } else self.state = .read_header;
342 }
343
344 return .{ .kind = .{ .file_data = data[0..to_read] }, .consumed = to_read };
345 },
346
347 .skip_padding => {
348 const to_skip = @min(self.skip_bytes, data.len);
349 self.skip_bytes -= to_skip;
350
351 if (self.skip_bytes == 0) {
352 self.state = .read_header;
353 }
354
355 if (data.len > to_skip) {
356 const next = self.feed(data[to_skip..]);
357 return .{ .kind = next.kind, .consumed = to_skip + next.consumed };
358 }
359 return .{ .kind = .need_more_data, .consumed = to_skip };
360 },
361 }
362 }
363
364 pub fn reset(self: *TarParser) void { self.* = TarParser.init(self.strip_prefix[0..self.strip_prefix_len]); }
365};
366
367pub const Extractor = struct {
368 allocator: std.mem.Allocator,
369 output_dir: std.fs.Dir,
370 parser: TarParser,
371 decompressor: *GzipDecompressor,
372 current_file: ?std.fs.File,
373 current_file_path: [256]u8,
374 current_file_path_len: usize,
375 current_file_mode: u32,
376 files_extracted: u32,
377 bytes_extracted: u64,
378
379 pub fn init(allocator: std.mem.Allocator, output_path: []const u8) !*Extractor {
380 const extractor = try allocator.create(Extractor);
381 errdefer allocator.destroy(extractor);
382
383 std.fs.cwd().makePath(output_path) catch |err| switch (err) {
384 error.PathAlreadyExists => {},
385 else => return error.IoError,
386 };
387
388 const decompressor = try GzipDecompressor.init(allocator);
389 errdefer decompressor.deinit();
390
391 extractor.* = .{
392 .allocator = allocator,
393 .output_dir = try std.fs.cwd().openDir(output_path, .{}),
394 .parser = TarParser.init("package/"),
395 .decompressor = decompressor,
396 .current_file = null,
397 .current_file_path = undefined,
398 .current_file_path_len = 0,
399 .current_file_mode = 0o644,
400 .files_extracted = 0,
401 .bytes_extracted = 0,
402 };
403
404 return extractor;
405 }
406
407 pub fn deinit(self: *Extractor) void {
408 if (self.current_file) |f| {
409 f.close();
410 self.applyFileMode();
411 }
412 self.output_dir.close();
413 self.decompressor.deinit();
414 self.allocator.destroy(self);
415 }
416
417 fn applyFileMode(self: *Extractor) void {
418 if (self.current_file_path_len == 0) return;
419
420 if (comptime builtin.os.tag != .windows) {
421 if (self.current_file_mode & 0o111 != 0) {
422 const path = self.current_file_path[0..self.current_file_path_len];
423 var path_buf: [257]u8 = undefined;
424 @memcpy(path_buf[0..path.len], path);
425 path_buf[path.len] = 0;
426 const path_z: [*:0]const u8 = path_buf[0..path.len :0];
427 _ = std.c.fchmodat(self.output_dir.fd, path_z, @intCast(self.current_file_mode & 0o777), 0);
428 }
429 }
430 self.current_file_path_len = 0;
431 }
432
433 pub fn feedCompressed(self: *Extractor, data: []const u8) !void {
434 _ = try self.decompressor.decompress(data, handleDecompressed, self);
435 }
436
437 fn handleDecompressed(data: []const u8, user_data: ?*anyopaque) !void {
438 const self: *Extractor = @ptrCast(@alignCast(user_data));
439 try self.feedTar(data);
440 }
441
442 pub fn feedTar(self: *Extractor, data: []const u8) !void {
443 var remaining = data;
444 while (remaining.len > 0) {
445 const result = self.parser.feed(remaining);
446 remaining = remaining[result.consumed..];
447 switch (result.kind) {
448 .need_more_data => return,
449 .entry => |entry| try self.handleEntry(entry),
450 .file_data => |d| try self.writeFileData(d),
451 .end_of_archive => return self.closeCurrentFile(),
452 .err => |e| return e,
453 }
454 }
455 }
456
457 inline fn handleEntry(self: *Extractor, entry: TarParser.Entry) !void {
458 if (entry.path.len == 0) return;
459 switch (entry.entry_type) {
460 .directory => self.output_dir.makePath(entry.path) catch {},
461 .file => try self.createFile(entry),
462 .symlink => self.createSymlink(entry) catch {},
463 }
464 }
465
466 inline fn createFile(self: *Extractor, entry: TarParser.Entry) !void {
467 self.closeCurrentFile();
468 if (std.fs.path.dirname(entry.path)) |dir| {
469 try self.output_dir.makePath(dir);
470 }
471 self.current_file = try self.output_dir.createFile(entry.path, .{});
472 const len = @min(entry.path.len, 256);
473 @memcpy(self.current_file_path[0..len], entry.path[0..len]);
474 self.current_file_path_len = len;
475 self.current_file_mode = entry.mode;
476 self.files_extracted += 1;
477 }
478
479 inline fn createSymlink(self: *Extractor, entry: TarParser.Entry) !void {
480 const linkname_len = std.mem.indexOfScalar(u8, &self.parser.header.linkname, 0) orelse self.parser.header.linkname.len;
481 const target = self.parser.header.linkname[0..linkname_len];
482
483 if (entry.path.len == 0 or target.len == 0) return;
484 try validatePath(target);
485
486 if (std.fs.path.dirname(entry.path)) |dir| {
487 try self.output_dir.makePath(dir);
488 }
489
490 self.output_dir.deleteFile(entry.path) catch {};
491 try linker.createSymlinkOrCopy(self.output_dir, target, entry.path);
492 }
493
494 inline fn writeFileData(self: *Extractor, data: []const u8) !void {
495 if (self.current_file) |f| {
496 try f.writeAll(data);
497 self.bytes_extracted += data.len;
498 }
499 }
500
501 inline fn closeCurrentFile(self: *Extractor) void {
502 if (self.current_file) |f| {
503 f.close();
504 self.applyFileMode();
505 self.current_file = null;
506 }
507 }
508
509 pub fn stats(self: *const Extractor) struct { files: u32, bytes: u64 } {
510 return .{
511 .files = self.files_extracted,
512 .bytes = self.bytes_extracted,
513 };
514 }
515};