···11(** Pure OCaml GHASH (GF(2^128) for AES-GCM).
2233- Constant-time implementation using BearSSL's ctmul32 technique:
44- carryless multiplication via masked regular multiplication.
33+ Constant-time shift-and-XOR implementation. All 128 iterations
44+ execute identical operations regardless of input values.
55 No branches on secret data, no lookup tables.
6677- Reference: Thomas Pornin, BearSSL ghash_ctmul32.c
88- https://bearssl.org/gitweb/?p=BearSSL&a=blob&f=src/hash/ghash_ctmul32.c *)
99-1010-(** Carryless multiply of two 32-bit values, producing a 64-bit result
1111- stored as two Int32 values (hi, lo).
1212-1313- Uses the BearSSL technique: mask inputs to isolate non-adjacent bits,
1414- then regular multiplication is carryless for those bits. Four such
1515- multiplications cover all bit positions. *)
1616-1717-let bmul32 (x : int) (y : int) : int * int =
1818- (* We work with native ints. On 64-bit OCaml, int is 63 bits.
1919- On 32-bit jsoo, int is 32 bits. We only use the low 32 bits. *)
2020- let x = x land 0xFFFFFFFF in
2121- let y = y land 0xFFFFFFFF in
2222- (* Isolate every 4th bit *)
2323- let x0 = x land 0x11111111 in
2424- let x1 = x land 0x22222222 in
2525- let x2 = x land 0x44444444 in
2626- let x3 = x land 0x88888888 in
2727- let y0 = y land 0x11111111 in
2828- let y1 = y land 0x22222222 in
2929- let y2 = y land 0x44444444 in
3030- let y3 = y land 0x88888888 in
3131- (* Multiply non-adjacent bits: no carry between them *)
3232- (* Use Int32 to avoid overflow on 32-bit targets *)
3333- let ( * ) a b = Int32.mul (Int32.of_int a) (Int32.of_int b) in
3434- let ( lxor ) = Int32.logxor in
3535- let ( land ) = Int32.logand in
3636- let z0 = (x0 * y0) lxor (x1 * y3) lxor (x2 * y2) lxor (x3 * y1) in
3737- let z1 = (x0 * y1) lxor (x1 * y0) lxor (x2 * y3) lxor (x3 * y2) in
3838- let z2 = (x0 * y2) lxor (x1 * y1) lxor (x2 * y0) lxor (x3 * y3) in
3939- let z3 = (x0 * y3) lxor (x1 * y2) lxor (x2 * y1) lxor (x3 * y0) in
4040- (* Recombine: mask each contribution to its bit lane *)
4141- let z0 = z0 land 0x11111111l in
4242- let z1 = z1 land 0x22222222l in
4343- let z2 = z2 land 0x44444444l in
4444- let z3 = z3 land 0x88888888l in
4545- let lo = z0 lxor z1 lxor z2 lxor z3 in
4646- let z0h = Int32.shift_right_logical z0 0 in (* already masked *)
4747- (* The high 32 bits come from the upper halves of the products.
4848- For 32x32->64 we need the high word. Int32.mul truncates.
4949- We need to use a different approach for the high bits. *)
5050- (* Actually: with Int32.mul we only get the low 32 bits. For GF(2^128)
5151- multiplication we need the full 64-bit product. Let's use the
5252- Karatsuba approach at 16-bit granularity instead. *)
5353- ignore (z0h);
5454- (* Fall back to 16-bit multiply which fits in 32 bits *)
5555- let lo32 = Int32.to_int lo in
5656- (0, lo32) (* TODO: implement properly *)
77+ Reference: BearSSL constant-time technique (Thomas Pornin)
88+ https://bearssl.org/constanttime.html *)
5795858-(* Actually, let me use the approach that works on 32-bit: process
5959- the 128-bit field elements as arrays of bytes, using the
6060- shift-and-xor algorithm but with all operations unconditional. *)
1010+(** Constant-time GF(2^128) multiply.
61116262-(** Constant-time GF(2^128) multiply using shift-and-XOR.
6363-6464- All 128 iterations execute the same operations regardless of input.
6565- The conditional XOR is replaced with a mask: if bit is 1, mask is
6666- all-ones; if 0, mask is all-zeros. AND with the mask makes the XOR
6767- a no-op when the bit is 0. *)
6868-1212+ Conditional operations use arithmetic masks:
1313+ - [(-bit) land 0xff] is [0xFF] when [bit = 1], [0x00] when [bit = 0]
1414+ - AND with the mask makes XOR a no-op when the bit is zero
1515+ - No branch, no table lookup, no data-dependent memory access *)
6916let gf128_mul_ct (x : bytes) (h : bytes) : bytes =
7017 let z = Bytes.make 16 '\x00' in
7118 let v = Bytes.copy h in
7219 for i = 0 to 127 do
7373- (* Constant-time: compute mask from bit i of x *)
7420 let byte = Char.code (Bytes.get x (i / 8)) in
7521 let bit = (byte lsr (7 - (i mod 8))) land 1 in
7676- let mask = (-bit) land 0xff in (* 0xFF if bit=1, 0x00 if bit=0 *)
7777- (* Conditional XOR: z ^= v & mask *)
2222+ let mask = (-bit) land 0xff in
7823 for j = 0 to 15 do
7924 let zj = Char.code (Bytes.get z j) in
8025 let vj = Char.code (Bytes.get v j) in
8126 Bytes.set z j (Char.chr (zj lxor (vj land mask)))
8227 done;
8383- (* Shift v right by 1, XOR with reduction polynomial if carry *)
8428 let carry = Char.code (Bytes.get v 15) land 1 in
8529 for j = 15 downto 1 do
8630 let vj = Char.code (Bytes.get v j) in
8731 let vj1 = Char.code (Bytes.get v (j - 1)) in
8888- Bytes.set v j (Char.chr (((vj lsr 1) lor ((vj1 land 1) lsl 7)) land 0xff))
3232+ Bytes.set v j
3333+ (Char.chr (((vj lsr 1) lor ((vj1 land 1) lsl 7)) land 0xff))
8934 done;
9090- Bytes.set v 0 (Char.chr ((Char.code (Bytes.get v 0) lsr 1) land 0xff));
9191- (* Constant-time conditional XOR with 0xE1 *)
3535+ Bytes.set v 0
3636+ (Char.chr ((Char.code (Bytes.get v 0) lsr 1) land 0xff));
9237 let reduce_mask = (-carry) land 0xff in
9338 let v0 = Char.code (Bytes.get v 0) in
9439 Bytes.set v 0 (Char.chr (v0 lxor (0xe1 land reduce_mask)))
9540 done;
9641 z
97429898-let ghash (key : bytes) (tag : bytes) (data : string) (off : int) (len : int) =
4343+let ghash (key : bytes) (tag : bytes) (data : string) (off : int)
4444+ (len : int) =
9945 let h = Bytes.copy key in
10046 let nblocks = len / 16 in
10147 for b = 0 to nblocks - 1 do
···11662 Bytes.set x i
11763 (Char.chr
11864 (Char.code (Bytes.get tag i)
119119- lxor Char.code (String.get data (off + (nblocks * 16) + i))))
6565+ lxor
6666+ Char.code
6767+ (String.get data (off + (nblocks * 16) + i))))
12068 done;
12169 for i = rem to 15 do
12270 Bytes.set x i (Bytes.get tag i)