My own corner of monopam
2
fork

Configure Feed

Select the types of activity you want to include in your feed.

Refactor Pack to use bytesrw reader factories; add pread_reader

Pack files no longer require loading the entire file into a string.
Pack.t now stores a reader factory (int -> Reader.t) that creates
a bytesrw reader at any offset. of_string wraps strings for backward
compat; of_reader accepts any reader factory.

Added Bytesrw_eio.pread_reader: creates a bytesrw Reader from an
Eio file handle using pread for random-access reading. This is the
building block for file-backed pack reading (wiring pending — needs
switch lifetime design).

Pack.inflate replaced with Pack.inflate_reader that takes a Reader
directly instead of a string+offset.

+149 -67
+28
ocaml-bytesrw-eio/src/bytesrw_eio.ml
··· 40 40 @param slice_length 41 41 Suggested slice length for upstream (default: 65536, which is 42 42 {!Bytes.Slice.unix_io_buffer_size}) *) 43 + (** Create a [Bytes.Reader.t] from an Eio file at [offset]. 44 + 45 + Uses pread for random-access reading without loading the whole file. 46 + 47 + @param slice_length Maximum bytes per slice (default: 65536). 48 + @param length Number of bytes to read (default: to end of file). *) 49 + let pread_reader ?(slice_length = Bytes.Slice.unix_io_buffer_size) 50 + ?length ~offset (file : _ Eio.File.ro) : Bytes.Reader.t = 51 + let stat = Eio.File.stat file in 52 + let file_length = Optint.Int63.to_int stat.size in 53 + let total = match length with Some n -> n | None -> file_length - offset in 54 + let pos = ref 0 in 55 + let buf_size = Bytes.Slice.check_length slice_length in 56 + let buf = Bytes.create buf_size in 57 + let read () = 58 + if !pos >= total then Bytes.Slice.eod 59 + else 60 + let len = min buf_size (total - !pos) in 61 + let cs = Cstruct.create len in 62 + Eio.File.pread_exact file 63 + ~file_offset:(Optint.Int63.of_int (offset + !pos)) 64 + [ cs ]; 65 + Cstruct.blit_to_bytes cs 0 buf 0 len; 66 + pos := !pos + len; 67 + Bytes.Slice.make buf ~first:0 ~length:len 68 + in 69 + Bytes.Reader.make ~slice_length read 70 + 43 71 let bytes_writer_of_flow ?(slice_length = Bytes.Slice.unix_io_buffer_size) 44 72 (flow : _ Eio.Flow.sink) : Bytes.Writer.t = 45 73 let rec write slice =
+15
ocaml-bytesrw-eio/src/bytesrw_eio.mli
··· 22 22 23 23 @param slice_length Maximum bytes per slice (default: 65536). *) 24 24 25 + (** {1 Random-access readers} *) 26 + 27 + val pread_reader : 28 + ?slice_length:int -> 29 + ?length:int -> 30 + offset:int -> 31 + _ Eio.File.ro -> 32 + Bytesrw.Bytes.Reader.t 33 + (** [pread_reader ~offset file] creates a reader from [file] starting at 34 + [offset] bytes. Reads via pread on demand — does not load the file into 35 + memory. 36 + 37 + @param slice_length Maximum bytes per slice (default: 65536). 38 + @param length Number of bytes to read (default: to end of file). *) 39 + 25 40 (** {1 Writers} *) 26 41 27 42 val bytes_writer_of_flow :
+90 -61
ocaml-git/lib/pack.ml
··· 98 98 We use bytesrw.zlib with [~leftover:true] to handle concatenated zlib 99 99 streams in pack files. This positions the reader after each stream ends. *) 100 100 101 - (** Create a reader that serves bytes from [data] starting at [first]. Uses 102 - {!Bytes.unsafe_of_string} to avoid copying the string. *) 103 - let reader_of_string_at ~first data = 104 - let bytes = Bytes.unsafe_of_string data in 105 - let length = String.length data - first in 106 - let pos = ref 0 in 107 - let slice_length = min length (64 * 1024) in 108 - let read () = 109 - if !pos >= length then Slice.eod 110 - else 111 - let len = min slice_length (length - !pos) in 112 - let s = Slice.make bytes ~first:(first + !pos) ~length:len in 113 - pos := !pos + len; 114 - s 115 - in 116 - Reader.make ~slice_length read 117 - 118 101 (** Shared buffer for decompression output. Avoids allocating a new {!Buffer.t} 119 102 per inflate call -- [Buffer.clear] resets the length without freeing the 120 103 backing storage, so after the first large decompression the buffer never 121 104 reallocates. *) 122 105 let inflate_buf = Buffer.create (64 * 1024) 123 106 124 - (** Decompress zlib-compressed data starting at [first] in [data]. Returns 125 - (decompressed_data, bytes_consumed). Zero-copy on input: reads directly from 126 - the string via {!reader_of_string_at}. Reuses a shared output buffer to 127 - avoid per-call allocation. *) 128 - let inflate_with_consumed ?(first = 0) data = 107 + (** Decompress zlib data from a bytesrw reader. Returns (decompressed, 108 + consumed). The reader is positioned after the compressed stream. *) 109 + let inflate_reader reader = 129 110 try 130 - let base_reader = reader_of_string_at ~first data in 131 111 let decompressed = 132 - Bytesrw_zlib.Zlib.decompress_reads ~leftover:true () base_reader 112 + Bytesrw_zlib.Zlib.decompress_reads ~leftover:true () reader 133 113 in 134 114 Buffer.clear inflate_buf; 135 115 Reader.add_to_buffer inflate_buf decompressed; 136 116 let output = Buffer.contents inflate_buf in 137 - Ok (output, Reader.pos base_reader) 117 + Ok (output, Reader.pos reader) 138 118 with exn -> 139 119 Error 140 120 (`Msg (Fmt.str "zlib decompression error: %s" (Printexc.to_string exn))) 141 - 142 - (** Decompress zlib-compressed data starting at [first] in [data]. *) 143 - let inflate ?first data = 144 - match inflate_with_consumed ?first data with 145 - | Ok (output, _consumed) -> Ok output 146 - | Error e -> Error e 147 121 148 122 (** {1 Delta decoding} 149 123 ··· 246 220 247 221 type t = { 248 222 header : header; 249 - data : string; (** Full pack file data for random access *) 223 + length : int; 224 + reader_at : int -> Reader.t; 225 + (** Create a bytesrw reader starting at the given offset. *) 250 226 } 251 227 (** A pack file opened for reading. *) 252 228 253 - (** Open a pack file from a string. *) 254 - let of_string data = 255 - if String.length data < 12 then Error (`Msg "Pack file too short") 229 + (** Read [n] bytes from a reader into a string. *) 230 + let read_bytes reader n = 231 + let buf = Buffer.create n in 232 + let remaining = ref n in 233 + while !remaining > 0 do 234 + let slice = Reader.read reader in 235 + if Slice.is_eod slice then remaining := 0 236 + else begin 237 + let len = min (Slice.length slice) !remaining in 238 + Buffer.add_subbytes buf (Slice.bytes slice) (Slice.first slice) len; 239 + remaining := !remaining - len; 240 + if len < Slice.length slice then 241 + Reader.push_back reader 242 + (Slice.make (Slice.bytes slice) 243 + ~first:(Slice.first slice + len) 244 + ~length:(Slice.length slice - len)) 245 + end 246 + done; 247 + Buffer.contents buf 248 + 249 + (** Read a single byte from a reader. *) 250 + let read_uint8 reader = 251 + let s = read_bytes reader 1 in 252 + Char.code s.[0] 253 + 254 + (** Open a pack file from a bytesrw reader factory. *) 255 + let of_reader ~length ~reader_at = 256 + if length < 12 then Error (`Msg "Pack file too short") 256 257 else 257 - let magic = String.sub data 0 4 in 258 + let r = reader_at 0 in 259 + let hdr = read_bytes r 12 in 260 + let magic = String.sub hdr 0 4 in 258 261 if magic <> "PACK" then Error (`Msg "Invalid pack file: bad magic") 259 262 else 260 - let version = int32_be data 4 in 263 + let version = int32_be hdr 4 in 261 264 if version <> 2 && version <> 3 then err_unsupported_version version 262 265 else 263 - let count = int32_be data 8 in 264 - Ok { header = { version; count }; data } 266 + let count = int32_be hdr 8 in 267 + Ok { header = { version; count }; length; reader_at } 268 + 269 + (** Create a reader over a substring without copying. Uses 270 + {!Bytes.unsafe_of_string} and {!Slice.make} for zero-copy access. *) 271 + let reader_of_string_at data ~offset = 272 + let bytes = Bytes.unsafe_of_string data in 273 + let total = String.length data - offset in 274 + let pos = ref 0 in 275 + let slice_length = min (max total 1) (64 * 1024) in 276 + let read () = 277 + if !pos >= total then Slice.eod 278 + else 279 + let len = min slice_length (total - !pos) in 280 + let s = Slice.make bytes ~first:(offset + !pos) ~length:len in 281 + pos := !pos + len; 282 + s 283 + in 284 + Reader.make ~slice_length read 285 + 286 + (** Open a pack file from a string (for tests). *) 287 + let of_string data = 288 + let length = String.length data in 289 + let reader_at offset = reader_of_string_at data ~offset in 290 + of_reader ~length ~reader_at 265 291 266 292 (** Read entry header at given offset. Returns (header, data_offset). *) 267 293 let read_entry_header_at t offset = 268 294 let open Result.Syntax in 269 - if offset >= String.length t.data then Error (`Msg "Offset beyond pack file") 295 + if offset >= t.length then Error (`Msg "Offset beyond pack file") 270 296 else 271 - let first = Char.code t.data.[offset] in 297 + let r = t.reader_at offset in 298 + let first = read_uint8 r in 272 299 let type_bits = (first lsr 4) land 0x07 in 273 300 let* obj_type = obj_type_of_int type_bits in 274 301 let size = first land 0x0F in 275 302 let continue = first land 0x80 <> 0 in 276 303 if not continue then Ok ({ obj_type; size }, offset + 1) 277 304 else 278 - let rec loop size shift off = 279 - let b = Char.code t.data.[off] in 305 + let rec loop size shift consumed = 306 + let b = read_uint8 r in 280 307 let size = size lor ((b land 0x7F) lsl shift) in 281 - if b land 0x80 = 0 then Ok (size, off + 1) 282 - else loop size (shift + 7) (off + 1) 308 + if b land 0x80 = 0 then Ok (size, offset + 1 + consumed + 1) 309 + else loop size (shift + 7) (consumed + 1) 283 310 in 284 - let* size, data_off = loop size 4 (offset + 1) in 311 + let* size, data_off = loop size 4 0 in 285 312 Ok ({ obj_type; size }, data_off) 286 313 287 314 (** Read OFS_DELTA offset at given position. Returns (offset, next_pos). *) 288 315 let read_ofs_offset_at t pos = 289 - let first = Char.code t.data.[pos] in 316 + let r = t.reader_at pos in 317 + let first = read_uint8 r in 290 318 let offset = first land 0x7F in 291 319 if first land 0x80 = 0 then (offset, pos + 1) 292 320 else 293 - let rec loop offset pos = 294 - let b = Char.code t.data.[pos] in 321 + let rec loop offset consumed = 322 + let b = read_uint8 r in 295 323 let offset = ((offset + 1) lsl 7) lor (b land 0x7F) in 296 - if b land 0x80 = 0 then (offset, pos + 1) else loop offset (pos + 1) 324 + if b land 0x80 = 0 then (offset, pos + 1 + consumed + 1) 325 + else loop offset (consumed + 1) 297 326 in 298 - loop offset (pos + 1) 327 + loop offset 1 299 328 300 329 (** Find the base type by following delta chain. *) 301 330 let rec base_type t off = ··· 315 344 let* header, data_off = read_entry_header_at t offset in 316 345 match header.obj_type with 317 346 | Commit | Tree | Blob | Tag -> 318 - let* data = inflate ~first:data_off t.data in 347 + let r = t.reader_at data_off in 348 + let* data, _consumed = inflate_reader r in 319 349 Ok (kind_of_obj_type header.obj_type, data) 320 350 | Ofs_delta -> 321 351 let rel_offset, delta_off = read_ofs_offset_at t data_off in 322 352 let source_offset = offset - rel_offset in 323 353 let* _kind, source = read_object_at t source_offset in 324 - let* delta = inflate ~first:delta_off t.data in 354 + let r = t.reader_at delta_off in 355 + let* delta, _consumed = inflate_reader r in 325 356 let* target = apply_delta ~source ~delta in 326 357 let* kind = base_type t source_offset in 327 358 Ok (kind, target) 328 - | Ref_delta -> 329 - (* REF_DELTA requires an index to resolve *) 330 - Error (`Msg "REF_DELTA requires pack index for resolution") 359 + | Ref_delta -> Error (`Msg "REF_DELTA requires pack index for resolution") 331 360 332 361 (** Read an object at the given offset, also returning the offset of the next 333 362 entry. This avoids re-decompressing just to find the next position. *) ··· 336 365 let* header, data_off = read_entry_header_at t offset in 337 366 match header.obj_type with 338 367 | Commit | Tree | Blob | Tag -> 339 - let* data, consumed = inflate_with_consumed ~first:data_off t.data in 368 + let r = t.reader_at data_off in 369 + let* data, consumed = inflate_reader r in 340 370 Ok (kind_of_obj_type header.obj_type, data, data_off + consumed) 341 371 | Ofs_delta -> 342 372 let rel_offset, delta_off = read_ofs_offset_at t data_off in 343 373 let source_offset = offset - rel_offset in 344 374 let* _kind, source = read_object_at t source_offset in 345 - let* delta, consumed = inflate_with_consumed ~first:delta_off t.data in 375 + let r = t.reader_at delta_off in 376 + let* delta, consumed = inflate_reader r in 346 377 let* target = apply_delta ~source ~delta in 347 378 let* kind = base_type t source_offset in 348 379 Ok (kind, target, delta_off + consumed) 349 - | Ref_delta -> 350 - (* REF_DELTA requires an index to resolve *) 351 - Error (`Msg "REF_DELTA requires pack index for resolution") 380 + | Ref_delta -> Error (`Msg "REF_DELTA requires pack index for resolution") 352 381 353 382 (** Get the number of objects in the pack. *) 354 383 let count t = t.header.count
+12 -4
ocaml-git/lib/pack.mli
··· 112 112 113 113 (** {1 Low-level operations} *) 114 114 115 - val inflate : ?first:int -> string -> (string, [ `Msg of string ]) result 116 - (** [inflate ?first s] decompresses zlib-compressed data starting at offset 117 - [first] (default [0]). Uses a zero-copy slice view to avoid copying the 118 - string tail. *) 115 + val inflate_reader : 116 + Bytesrw.Bytes.Reader.t -> (string * int, [ `Msg of string ]) result 117 + (** [inflate_reader r] decompresses zlib-compressed data from reader [r]. 118 + Returns [(data, consumed)] where [consumed] is the number of compressed 119 + bytes read. *) 120 + 121 + val of_reader : 122 + length:int -> 123 + reader_at:(int -> Bytesrw.Bytes.Reader.t) -> 124 + (t, [ `Msg of string ]) result 125 + (** [of_reader ~length ~reader_at] opens a pack from a reader factory. 126 + [reader_at offset] must return a fresh reader starting at [offset]. *) 119 127 120 128 val read_entry_header_at : 121 129 t -> int -> (entry_header * int, [ `Msg of string ]) result
+4 -2
ocaml-git/test/test_pack.ml
··· 57 57 let compressed_reader = Bytesrw_zlib.Zlib.compress_reads () reader in 58 58 Bytesrw.Bytes.Reader.to_string compressed_reader 59 59 in 60 - match Git.Pack.inflate compressed with 61 - | Ok decompressed -> Alcotest.(check string) "roundtrip" original decompressed 60 + let reader = Bytesrw.Bytes.Reader.of_string compressed in 61 + match Git.Pack.inflate_reader reader with 62 + | Ok (decompressed, _consumed) -> 63 + Alcotest.(check string) "roundtrip" original decompressed 62 64 | Error (`Msg m) -> Alcotest.fail m 63 65 64 66 let test_delta () =