Bloom filter for probabilistic membership testing
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix(lint): flatten test suites to single tuple (E600) and fix broken renames

Change all test_*.mli to export val suite : string * unit Alcotest.test_case
list (singular tuple). Flatten multi-group test suites into single groups.
Fix incorrect v_* function renames in CLI call sites.

+35 -37
+12 -12
fuzz/fuzz_bloom.ml
··· 5 5 (** No false negatives - after adding an element, mem must return true. *) 6 6 let test_no_false_negatives seed elts = 7 7 let size = max 100 (String.length seed * 10) in 8 - let bf = Bloom.create ~error_rate:0.01 size in 8 + let bf = Bloom.v ~error_rate:0.01 size in 9 9 List.iter (fun e -> Bloom.add bf e) elts; 10 10 List.iter 11 11 (fun e -> if not (Bloom.mem bf e) then fail "false negative detected") ··· 13 13 14 14 (** Serialization roundtrip - valid filters must round-trip. *) 15 15 let test_serialization_roundtrip elts = 16 - let bf = Bloom.create ~error_rate:0.01 1000 in 16 + let bf = Bloom.v ~error_rate:0.01 1000 in 17 17 List.iter (fun e -> Bloom.add bf e) elts; 18 18 match Bloom.to_bytes bf |> Bloom.of_bytes with 19 19 | Error _ -> fail "deserialization failed" ··· 25 25 26 26 (** Union contains all elements from both filters. *) 27 27 let test_union elts1 elts2 = 28 - let bf1 = Bloom.create ~error_rate:0.01 1000 in 29 - let bf2 = Bloom.create ~error_rate:0.01 1000 in 28 + let bf1 = Bloom.v ~error_rate:0.01 1000 in 29 + let bf2 = Bloom.v ~error_rate:0.01 1000 in 30 30 List.iter (fun e -> Bloom.add bf1 e) elts1; 31 31 List.iter (fun e -> Bloom.add bf2 e) elts2; 32 32 let bf_union = Bloom.union bf1 bf2 in ··· 41 41 42 42 (** Copy creates independent filter - original elements in both. *) 43 43 let test_copy_independence elts extra = 44 - let bf = Bloom.create ~error_rate:0.01 1000 in 44 + let bf = Bloom.v ~error_rate:0.01 1000 in 45 45 List.iter (fun e -> Bloom.add bf e) elts; 46 46 let bf_copy = Bloom.copy bf in 47 47 Bloom.add bf extra; ··· 53 53 54 54 (** Clear empties filter - size estimate should be near 0. *) 55 55 let test_clear elts = 56 - let bf = Bloom.create ~error_rate:0.01 1000 in 56 + let bf = Bloom.v ~error_rate:0.01 1000 in 57 57 List.iter (fun e -> Bloom.add bf e) elts; 58 58 Bloom.clear bf; 59 59 let estimate = Bloom.size_estimate bf in ··· 61 61 62 62 (** Intersection preserves common elements. *) 63 63 let test_intersection elts = 64 - let bf1 = Bloom.create ~error_rate:0.01 1000 in 65 - let bf2 = Bloom.create ~error_rate:0.01 1000 in 64 + let bf1 = Bloom.v ~error_rate:0.01 1000 in 65 + let bf2 = Bloom.v ~error_rate:0.01 1000 in 66 66 (* Add same elements to both *) 67 67 List.iter (fun e -> Bloom.add bf1 e) elts; 68 68 List.iter (fun e -> Bloom.add bf2 e) elts; ··· 77 77 (** Create with invalid error_rate should fail. *) 78 78 let test_invalid_error_rate () = 79 79 (try 80 - let _ = Bloom.create ~error_rate:0.0 100 in 80 + let _ = Bloom.v ~error_rate:0.0 100 in 81 81 fail "should reject error_rate=0" 82 82 with Invalid_argument _ -> ()); 83 83 (try 84 - let _ = Bloom.create ~error_rate:1.0 100 in 84 + let _ = Bloom.v ~error_rate:1.0 100 in 85 85 fail "should reject error_rate=1" 86 86 with Invalid_argument _ -> ()); 87 87 (try 88 - let _ = Bloom.create ~error_rate:(-0.1) 100 in 88 + let _ = Bloom.v ~error_rate:(-0.1) 100 in 89 89 fail "should reject negative error_rate" 90 90 with Invalid_argument _ -> ()); 91 91 try 92 - let _ = Bloom.create ~error_rate:1.5 100 in 92 + let _ = Bloom.v ~error_rate:1.5 100 in 93 93 fail "should reject error_rate>1" 94 94 with Invalid_argument _ -> () 95 95
+4 -4
src/bloom.ml
··· 19 19 in 20 20 aux 0 [] (m / k) 21 21 22 - let v m k = 22 + let of_params m k = 23 23 let m, lengths = partition_lengths m k in 24 24 let p_len = 25 25 let rec aux acc off = function ··· 40 40 let k = ceil (log2 *. m /. nf) in 41 41 (m, k) 42 42 43 - let create ?(error_rate = 0.01) n_items = 43 + let v ?(error_rate = 0.01) n_items = 44 44 let m, k = estimate_parameters n_items error_rate in 45 45 if error_rate <= 0. || error_rate >= 1. then invalid_arg "Bloomf.create"; 46 - v (int_of_float m) (int_of_float k) 46 + of_params (int_of_float m) (int_of_float k) 47 47 48 48 let add_priv t hashed_data = 49 49 let rec loop = function ··· 142 142 module Make (H : Hashable) = struct 143 143 type t = priv 144 144 145 - let create = create 145 + let v = v 146 146 let copy = copy 147 147 let add bf data = add_priv bf (H.hash data) 148 148 let mem bf data = mem_priv bf (H.hash data)
+5 -5
src/bloom.mli
··· 15 15 type 'a t 16 16 (** The type of the Bloom filter. *) 17 17 18 - val create : ?error_rate:float -> int -> 'a t 19 - (** [create ~error_rate size] creates a fresh BF for which expected false 20 - positive rate when filled with [size] elements is [error_rate]. 18 + val v : ?error_rate:float -> int -> 'a t 19 + (** [v ~error_rate size] creates a fresh BF for which expected false positive 20 + rate when filled with [size] elements is [error_rate]. 21 21 22 22 @raise Invalid_argument 23 23 if [error_rate] is not in \]0, 1\[, or [size] is negative. *) ··· 85 85 type t 86 86 (** The type of the Bloom filter. *) 87 87 88 - val create : ?error_rate:float -> int -> t 89 - (** [create ~error_rate size] creates a fresh Bloom filter. *) 88 + val v : ?error_rate:float -> int -> t 89 + (** [v ~error_rate size] creates a fresh Bloom filter. *) 90 90 91 91 val copy : t -> t 92 92 (** [copy t] returns a deep copy of the Bloom filter. *)
+1 -1
test/test.ml
··· 1 - let () = Alcotest.run "Bloom" Test_bloom.suite 1 + let () = Alcotest.run "Bloom" [ Test_bloom.suite ]
+11 -14
test/test_bloom.ml
··· 7 7 let expected_error_rate = 0.001 8 8 9 9 let create_and_fill size = 10 - let bf = Bloom.create ~error_rate:expected_error_rate size in 10 + let bf = Bloom.v ~error_rate:expected_error_rate size in 11 11 let rec loop i acc = 12 12 if i = 0 then acc 13 13 else ··· 88 88 sizes 89 89 90 90 let suite = 91 - [ 92 - ( "bloom", 93 - [ 94 - ("Mem returns true when element was added", `Quick, test_mem); 95 - ( "False positive rate is as specified (15% error allowed)", 96 - `Slow, 97 - test_errors ); 98 - ("Size estimate is correct", `Slow, test_size); 99 - ("Union", `Quick, test_union); 100 - ("Intersection", `Quick, test_inter); 101 - ("Serialisation", `Quick, test_bytes); 102 - ] ); 103 - ] 91 + ( "bloom", 92 + [ 93 + Alcotest.test_case "mem returns true when element was added" `Quick 94 + test_mem; 95 + Alcotest.test_case "false positive rate is as specified" `Slow test_errors; 96 + Alcotest.test_case "size estimate is correct" `Slow test_size; 97 + Alcotest.test_case "union" `Quick test_union; 98 + Alcotest.test_case "intersection" `Quick test_inter; 99 + Alcotest.test_case "serialisation" `Quick test_bytes; 100 + ] )
+2 -1
test/test_bloom.mli
··· 1 - val suite : (string * unit Alcotest.test_case list) list 1 + val suite : string * unit Alcotest.test_case list 2 + (** Test suite. *)