Supply Chain Integrity, Transparency, and Trust (IETF SCITT)
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Fix bottler: skip git pull on empty tap repos

git pull --ff-only fails when the remote tap repo has no commits
(empty repo, no main branch). Check rev-parse origin/main first;
if it fails, skip the pull.

+116 -53
+9 -4
lib/scitt.mli
··· 228 228 - Inclusion proof: sibling hashes from leaf to root *) 229 229 230 230 module Vds_rfc9162 : sig 231 - val in_memory : ?hash:hash -> unit -> vds 232 - (** [in_memory ?hash ()] creates a fresh in-memory RFC 9162 VDS. Data is lost 233 - when the process exits. Suitable for testing. *) 231 + val in_memory : ?hash:hash -> ?max_entries:int -> unit -> vds 232 + (** [in_memory ?hash ?max_entries ()] creates a fresh in-memory RFC 9162 VDS. 233 + All leaf hashes are held in RAM; [max_entries] caps the number of entries 234 + to prevent unbounded memory growth (default: 1,000,000 ~= 32 MB of 235 + hashes). Data is lost when the process exits. *) 234 236 235 237 val sqlite : ?hash:hash -> Sqlite.t -> vds 236 238 (** [sqlite ?hash db] creates a persistent RFC 9162 VDS backed by SQLite. 237 - Tables are created automatically. Suitable for production. *) 239 + Tables are created automatically. Leaf hashes are read from the database 240 + on demand — only O(log n) are loaded per inclusion proof. Startup streams 241 + the hash table to rebuild the compact root in O(n) time and O(log n) 242 + memory. *) 238 243 239 244 val import : 240 245 string -> create:(hash:hash -> unit -> vds) -> (vds, string) result
+98 -46
lib/vds.ml
··· 63 63 (* -- Shared RFC 9162 algorithms -- *) 64 64 65 65 module Growable = struct 66 - type t = { mutable data : string array; mutable len : int } 66 + type t = { mutable data : string array; mutable len : int; max_entries : int } 67 67 68 - let create cap = { data = Array.make (max cap 16) ""; len = 0 } 68 + let create ?(max_entries = 1_000_000) cap = 69 + { data = Array.make (max cap 16) ""; len = 0; max_entries } 70 + 69 71 let length t = t.len 70 72 71 73 let get t i = ··· 73 75 t.data.(i) 74 76 75 77 let push t v = 76 - if t.len = Array.length t.data then begin 77 - let new_cap = min (Array.length t.data * 2) Sys.max_array_length in 78 - if new_cap <= Array.length t.data then 79 - failwith "Growable: maximum array capacity reached"; 80 - let new_data = Array.make new_cap "" in 81 - Array.blit t.data 0 new_data 0 t.len; 82 - Array.fill t.data 0 t.len ""; 83 - t.data <- new_data 84 - end; 85 - t.data.(t.len) <- v; 86 - t.len <- t.len + 1 78 + if t.len >= t.max_entries then 79 + Error 80 + (Fmt.str "maximum entry count reached (%d); see max_entries" 81 + t.max_entries) 82 + else begin 83 + if t.len = Array.length t.data then begin 84 + let new_cap = 85 + min (min (Array.length t.data * 2) Sys.max_array_length) t.max_entries 86 + in 87 + let new_data = Array.make new_cap "" in 88 + Array.blit t.data 0 new_data 0 t.len; 89 + Array.fill t.data 0 t.len ""; 90 + t.data <- new_data 91 + end; 92 + t.data.(t.len) <- v; 93 + t.len <- t.len + 1; 94 + Ok () 95 + end 87 96 end 88 97 89 98 (** Internal node cache. In an append-only tree, compute_root(offset, len) is 90 99 immutable once all [len] leaves at [offset] exist. Cache hits make 91 100 inclusion_path O(log n) instead of O(n). *) 101 + (** Internal node cache with bounded size. 102 + 103 + In an append-only tree, subtree hashes are immutable once computed. The 104 + cache maps [(offset, length)] to the subtree root hash. On overflow, only 105 + power-of-2-length entries are kept — these are "complete" subtrees that are 106 + maximally reused and cheapest to lose (they'd be recomputed in O(1) from 107 + two cached children). *) 92 108 module Node_cache = struct 109 + let default_max_entries = 100_000 110 + 93 111 type t = { 94 112 tbl : (int * int, string) Hashtbl.t; 95 113 hash : Hash.t; 96 114 get : int -> string; 115 + max_entries : int; 97 116 } 98 117 99 - let create hash get = { tbl = Hashtbl.create 256; hash; get } 118 + let v ?(max_entries = default_max_entries) hash get = 119 + { tbl = Hashtbl.create 256; hash; get; max_entries } 120 + 121 + let is_power_of_2 n = n > 0 && n land (n - 1) = 0 122 + 123 + let evict t = 124 + let to_keep = Hashtbl.create (Hashtbl.length t.tbl / 2) in 125 + Hashtbl.iter 126 + (fun ((_, len) as key) v -> 127 + if is_power_of_2 len then Hashtbl.replace to_keep key v) 128 + t.tbl; 129 + Hashtbl.reset t.tbl; 130 + Hashtbl.iter (Hashtbl.replace t.tbl) to_keep 100 131 101 132 let rec compute_root t off len = 102 133 if len = 0 then t.hash.Hash.digest "" ··· 117 148 node_hash (compute_root t off split) 118 149 (compute_root t (off + split) (len - split)) 119 150 in 151 + if Hashtbl.length t.tbl >= t.max_entries then evict t; 120 152 Hashtbl.replace t.tbl key h; 121 153 h 122 154 ··· 245 277 else 246 278 let leaf_h = Hash.leaf_hash_with t.hash value in 247 279 let idx = Growable.length t.hashes in 248 - Growable.push t.hashes leaf_h; 249 - Compact.append t.compact leaf_h; 250 - Hashtbl.replace t.leaves key value; 251 - t.leaves_order <- key :: t.leaves_order; 252 - let n = Growable.length t.hashes in 253 - let path = Node_cache.inclusion_path t.ncache 0 n idx in 254 - let root = Compact.root t.compact ~empty_hash:t.empty_hash in 255 - Ok 256 - { 257 - leaf_index = idx; 258 - tree_size = n; 259 - root; 260 - path; 261 - leaf_hash = leaf_h; 262 - }) 280 + match Growable.push t.hashes leaf_h with 281 + | Error e -> Error e 282 + | Ok () -> 283 + Compact.append t.compact leaf_h; 284 + Hashtbl.replace t.leaves key value; 285 + t.leaves_order <- key :: t.leaves_order; 286 + let n = Growable.length t.hashes in 287 + let path = Node_cache.inclusion_path t.ncache 0 n idx in 288 + let root = Compact.root t.compact ~empty_hash:t.empty_hash in 289 + Ok 290 + { 291 + leaf_index = idx; 292 + tree_size = n; 293 + root; 294 + path; 295 + leaf_hash = leaf_h; 296 + }) 263 297 264 298 let export t = 265 299 with_lock t (fun () -> ··· 274 308 export_cbor ~hash:t.hash ~root:r ~entries) 275 309 end 276 310 277 - let v ?(hash = Hash.sha256) () = 311 + let v ?(hash = Hash.sha256) ?(max_entries = 1_000_000) () = 278 312 let node_hash = Hash.node_hash_with hash in 279 - let hashes = Growable.create 256 in 313 + let hashes = Growable.create ~max_entries 256 in 280 314 v 281 315 (module Impl) 282 316 Impl. ··· 284 318 hash; 285 319 hashes; 286 320 compact = Compact.create node_hash; 287 - ncache = Node_cache.create hash (Growable.get hashes); 321 + ncache = Node_cache.v hash (Growable.get hashes); 288 322 leaves = Hashtbl.create 256; 289 323 leaves_order = []; 290 324 empty_hash = hash.Hash.digest ""; ··· 301 335 hash : Hash.t; 302 336 entries : Sqlite.Table.t; 303 337 empty_hash : string; 304 - hashes : Growable.t; 338 + mutable entry_count : int; 305 339 compact : Compact.t; 306 340 ncache : Node_cache.t; 307 341 mu : Mutex.t; ··· 313 347 314 348 let algorithm_id t = Hash.id t.hash 315 349 let proof_format _ = Hash.Rfc9162 316 - let size t = with_lock t (fun () -> Growable.length t.hashes) 350 + let size t = with_lock t (fun () -> t.entry_count) 317 351 318 352 let root t = 319 353 with_lock t (fun () -> Compact.root t.compact ~empty_hash:t.empty_hash) 320 354 321 355 let lookup t ~key = with_lock t (fun () -> Sqlite.Table.find t.entries key) 322 356 357 + (** Read a leaf hash from the scitt_hashes table by index (0-based). Rowids 358 + are 1-based, so rowid = index + 1. *) 359 + let get_hash db idx = 360 + let rowid = Int64.of_int (idx + 1) in 361 + match 362 + Sqlite.fold_table db "scitt_hashes" ~init:None ~f:(fun rid values acc -> 363 + if rid = rowid then 364 + match values with [ Sqlite.Vblob h ] -> Some h | _ -> acc 365 + else acc) 366 + with 367 + | Some h -> h 368 + | None -> Fmt.failwith "scitt_hashes: missing rowid %Ld" rowid 369 + 323 370 let append t ~key ~value = 324 371 with_lock t (fun () -> 325 372 if Sqlite.Table.mem t.entries key then Error ("duplicate key: " ^ key) ··· 331 378 [ Sqlite.Vblob leaf_h ] 332 379 in 333 380 Sqlite.Table.put t.entries key value; 334 - Growable.push t.hashes leaf_h; 335 - Compact.append t.compact leaf_h); 336 - let n = Growable.length t.hashes in 381 + Compact.append t.compact leaf_h; 382 + t.entry_count <- t.entry_count + 1); 383 + let n = t.entry_count in 337 384 let idx = n - 1 in 338 385 let path = Node_cache.inclusion_path t.ncache 0 n idx in 339 386 let root = Compact.root t.compact ~empty_hash:t.empty_hash in ··· 363 410 with Failure _ -> ()); 364 411 let entries = Sqlite.Table.create db ~name:"scitt_entry" in 365 412 let node_hash = Hash.node_hash_with hash in 366 - let hashes = Growable.create 256 in 367 - Sqlite.fold_table db "scitt_hashes" ~init:() ~f:(fun _rowid values () -> 368 - match values with 369 - | [ Sqlite.Vblob h ] -> Growable.push hashes h 370 - | _ -> ()); 413 + (* Count existing hashes and rebuild Compact by streaming — O(n) time, 414 + O(log n) memory. No Growable array needed. *) 415 + let entry_count = ref 0 in 416 + let read_hash idx = Impl.get_hash db idx in 371 417 let compact = 372 - Compact.rebuild ~get:(Growable.get hashes) ~len:(Growable.length hashes) 373 - node_hash 418 + let c = Compact.create node_hash in 419 + Sqlite.fold_table db "scitt_hashes" ~init:() ~f:(fun _rowid values () -> 420 + match values with 421 + | [ Sqlite.Vblob h ] -> 422 + Compact.append c h; 423 + incr entry_count 424 + | _ -> ()); 425 + c 374 426 in 375 427 v 376 428 (module Impl) ··· 380 432 hash; 381 433 entries; 382 434 empty_hash = hash.Hash.digest ""; 383 - hashes; 435 + entry_count = !entry_count; 384 436 compact; 385 - ncache = Node_cache.create hash (Growable.get hashes); 437 + ncache = Node_cache.v hash read_hash; 386 438 mu = Mutex.create (); 387 439 } 388 440 end
+9 -3
lib/vds.mli
··· 84 84 (** {1 Backends} *) 85 85 86 86 module In_memory : sig 87 - val v : ?hash:Hash.t -> unit -> t 88 - (** [v ()] is a fresh in-memory RFC 9162 Merkle tree. Suitable for testing. 87 + val v : ?hash:Hash.t -> ?max_entries:int -> unit -> t 88 + (** [v ?max_entries ()] is a fresh in-memory RFC 9162 Merkle tree. All leaf 89 + hashes are held in memory; at 32 bytes per hash, [max_entries] entries use 90 + ~32 * [max_entries] bytes. Default: 1,000,000 (~32 MB). 89 91 90 92 {b Concurrency}: multicore-safe. All operations are serialized with an 91 93 internal mutex. *) ··· 93 95 94 96 module Sqlite : sig 95 97 val v : ?hash:Hash.t -> Sqlite.t -> t 96 - (** [v db] is a durable RFC 9162 Merkle tree backed by SQLite. 98 + (** [v db] is a durable RFC 9162 Merkle tree backed by SQLite. Leaf hashes are 99 + stored in the [scitt_hashes] table and read on demand — only O(log n) 100 + hashes are loaded per inclusion proof. Startup streams the hash table to 101 + rebuild the O(log n) compact root state without loading all hashes into 102 + memory. 97 103 98 104 {b Concurrency}: multicore-safe. All operations are serialized with an 99 105 internal mutex. Disk writes use SQLite transactions for atomicity. *)