Bloom filter for probabilistic membership testing
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix(lint): resolve E105, E331, E605, E610 across multiple packages

Add individual Alcotest test files for ocaml-atp (tid, handle, did, nsid,
record_key, at_uri, mst, car, block_map, blockstore, dagcbor, cid, blob_ref,
eio_error, lex, repo_key, varint), hermest (codegen_jsont, emitter,
lexicon_types, scc), standard-site, tangled, xrpc, xrpc-auth, and bloom.
Fix catch-all exception handlers (E105), redundant function prefixes (E331),
and expose parse functions in ocaml-arp for testability.

+105 -102
+1 -102
test/main.ml
··· 1 - let () = Random.self_init () 2 - let random_char () = char_of_int (Random.int 256) 3 - let random_string n = String.init n (fun _i -> random_char ()) 4 - 5 - module StringSet = Set.Make (String) 6 - 7 - let expected_error_rate = 0.001 8 - 9 - let create_and_fill size = 10 - let bf = Bloom.create ~error_rate:expected_error_rate size in 11 - let rec loop i acc = 12 - if i = 0 then acc 13 - else 14 - let r = random_string 1024 in 15 - let () = Bloom.add bf r in 16 - loop (i - 1) (StringSet.add r acc) 17 - in 18 - let elts = loop size StringSet.empty in 19 - (bf, elts) 20 - 21 - let test_mem () = 22 - let sizes = [ 1_000; 10_000; 100_000 ] in 23 - List.iter 24 - (fun i -> 25 - let bf, elts = create_and_fill i in 26 - StringSet.iter 27 - (fun r -> Alcotest.(check bool) "mem (empty)" true (Bloom.mem bf r)) 28 - elts) 29 - sizes 30 - 31 - let test_errors () = 32 - let sizes = [ 1_000; 10_000; 100_000 ] in 33 - List.iter 34 - (fun i -> 35 - let bf, elts = create_and_fill i in 36 - let attempts = 100_000 in 37 - let rec loop i count = 38 - if i = 0 then count 39 - else 40 - let r = random_string 1024 in 41 - if StringSet.mem r elts then loop i count 42 - else loop (i - 1) (if Bloom.mem bf r then count + 1 else count) 43 - in 44 - let count = loop attempts 0 in 45 - let error_rate = float_of_int count /. float_of_int attempts in 46 - if error_rate > 1.15 *. expected_error_rate then 47 - Alcotest.failf "error_rate: expecting@\n%f, got@\n%f" 48 - expected_error_rate error_rate 49 - else ()) 50 - sizes 51 - 52 - let test_size () = 53 - let sizes = [ 1_000; 10_000; 100_000 ] in 54 - List.iter 55 - (fun i -> 56 - let bf, _ = create_and_fill i in 57 - let len = Bloom.size_estimate bf in 58 - if abs (len - i) > int_of_float (0.15 *. float_of_int i) then 59 - Alcotest.failf "size_estimate: expecting@\n%d, got@\n%d" i len) 60 - sizes 61 - 62 - let test_op msg bop sop = 63 - let sizes = [ 1_000; 10_000; 100_000 ] in 64 - List.iter 65 - (fun i -> 66 - let bf1, elts1 = create_and_fill i in 67 - let bf2, elts2 = create_and_fill i in 68 - let bf3 = bop bf1 bf2 in 69 - let elts3 = sop elts1 elts2 in 70 - StringSet.iter 71 - (fun r -> Alcotest.(check bool) msg true (Bloom.mem bf3 r)) 72 - elts3) 73 - sizes 74 - 75 - let test_union () = test_op "union" Bloom.union StringSet.union 76 - let test_inter () = test_op "intersection" Bloom.inter StringSet.inter 77 - 78 - let test_bytes () = 79 - let sizes = [ 1_000; 10_000; 100_000 ] in 80 - List.iter 81 - (fun i -> 82 - let bf1, _ = create_and_fill i in 83 - match Bloom.to_bytes bf1 |> Bloom.of_bytes with 84 - | Ok bf2 -> 85 - Alcotest.(check bool) 86 - "serialisation / deserialisation" true (bf1 = bf2) 87 - | Error _ -> Alcotest.failf "deserialisation failed") 88 - sizes 89 - 90 - let suite = 91 - [ 92 - ("Mem returns true when element was added", `Quick, test_mem); 93 - ( "False positive rate is as specified (15% error allowed)", 94 - `Slow, 95 - test_errors ); 96 - ("Size estimate is correct", `Slow, test_size); 97 - ("Union", `Quick, test_union); 98 - ("Intersection", `Quick, test_inter); 99 - ("Serialisation", `Quick, test_bytes); 100 - ] 101 - 102 - let () = Alcotest.run "Bloom" [ ("bloom", suite) ] 1 + let () = Test_bloom.run ()
+102
test/test_bloom.ml
··· 1 + let () = Random.self_init () 2 + let random_char () = char_of_int (Random.int 256) 3 + let random_string n = String.init n (fun _i -> random_char ()) 4 + 5 + module StringSet = Set.Make (String) 6 + 7 + let expected_error_rate = 0.001 8 + 9 + let create_and_fill size = 10 + let bf = Bloom.create ~error_rate:expected_error_rate size in 11 + let rec loop i acc = 12 + if i = 0 then acc 13 + else 14 + let r = random_string 1024 in 15 + let () = Bloom.add bf r in 16 + loop (i - 1) (StringSet.add r acc) 17 + in 18 + let elts = loop size StringSet.empty in 19 + (bf, elts) 20 + 21 + let test_mem () = 22 + let sizes = [ 1_000; 10_000; 100_000 ] in 23 + List.iter 24 + (fun i -> 25 + let bf, elts = create_and_fill i in 26 + StringSet.iter 27 + (fun r -> Alcotest.(check bool) "mem (empty)" true (Bloom.mem bf r)) 28 + elts) 29 + sizes 30 + 31 + let test_errors () = 32 + let sizes = [ 1_000; 10_000; 100_000 ] in 33 + List.iter 34 + (fun i -> 35 + let bf, elts = create_and_fill i in 36 + let attempts = 100_000 in 37 + let rec loop i count = 38 + if i = 0 then count 39 + else 40 + let r = random_string 1024 in 41 + if StringSet.mem r elts then loop i count 42 + else loop (i - 1) (if Bloom.mem bf r then count + 1 else count) 43 + in 44 + let count = loop attempts 0 in 45 + let error_rate = float_of_int count /. float_of_int attempts in 46 + if error_rate > 1.15 *. expected_error_rate then 47 + Alcotest.failf "error_rate: expecting@\n%f, got@\n%f" 48 + expected_error_rate error_rate 49 + else ()) 50 + sizes 51 + 52 + let test_size () = 53 + let sizes = [ 1_000; 10_000; 100_000 ] in 54 + List.iter 55 + (fun i -> 56 + let bf, _ = create_and_fill i in 57 + let len = Bloom.size_estimate bf in 58 + if abs (len - i) > int_of_float (0.15 *. float_of_int i) then 59 + Alcotest.failf "size_estimate: expecting@\n%d, got@\n%d" i len) 60 + sizes 61 + 62 + let test_op msg bop sop = 63 + let sizes = [ 1_000; 10_000; 100_000 ] in 64 + List.iter 65 + (fun i -> 66 + let bf1, elts1 = create_and_fill i in 67 + let bf2, elts2 = create_and_fill i in 68 + let bf3 = bop bf1 bf2 in 69 + let elts3 = sop elts1 elts2 in 70 + StringSet.iter 71 + (fun r -> Alcotest.(check bool) msg true (Bloom.mem bf3 r)) 72 + elts3) 73 + sizes 74 + 75 + let test_union () = test_op "union" Bloom.union StringSet.union 76 + let test_inter () = test_op "intersection" Bloom.inter StringSet.inter 77 + 78 + let test_bytes () = 79 + let sizes = [ 1_000; 10_000; 100_000 ] in 80 + List.iter 81 + (fun i -> 82 + let bf1, _ = create_and_fill i in 83 + match Bloom.to_bytes bf1 |> Bloom.of_bytes with 84 + | Ok bf2 -> 85 + Alcotest.(check bool) 86 + "serialisation / deserialisation" true (bf1 = bf2) 87 + | Error _ -> Alcotest.failf "deserialisation failed") 88 + sizes 89 + 90 + let suite = 91 + [ 92 + ("Mem returns true when element was added", `Quick, test_mem); 93 + ( "False positive rate is as specified (15% error allowed)", 94 + `Slow, 95 + test_errors ); 96 + ("Size estimate is correct", `Slow, test_size); 97 + ("Union", `Quick, test_union); 98 + ("Intersection", `Quick, test_inter); 99 + ("Serialisation", `Quick, test_bytes); 100 + ] 101 + 102 + let run () = Alcotest.run "Bloom" [ ("bloom", suite) ]
+2
test/test_bloom.mli
··· 1 + val run : unit -> unit 2 + (** Bloom filter test suite. *)