lib: add mul_u64_add_u64_div_u64() and mul_u64_u64_div_u64_roundup()

The existing mul_u64_u64_div_u64() rounds down, a 'rounding up' variant
needs 'divisor - 1' adding in between the multiply and divide so cannot
easily be done by a caller.

Add mul_u64_add_u64_div_u64(a, b, c, d) that calculates (a * b + c)/d and
implement the 'round down' and 'round up' using it.

Update the x86-64 asm to optimise for 'c' being a constant zero.

Add kerndoc definitions for all three functions.

Link: https://lkml.kernel.org/r/20251105201035.64043-5-david.laight.linux@gmail.com
Signed-off-by: David Laight <david.laight.linux@gmail.com>
Reviewed-by: Nicolas Pitre <npitre@baylibre.com>
Cc: Biju Das <biju.das.jz@bp.renesas.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

David Laight and committed by

Andrew Morton 6 months ago 6480241f d91f891d

+67 -15

3 changed files

expand all

arch

x86

include

asm

div64.h

include

linux

math64.h

lib

math

div64.c

+12 -8

arch/x86/include/asm/div64.h

··· 84 84 * Will generate an #DE when the result doesn't fit u64, could fix with an 85 85 * __ex_table[] entry when it becomes an issue. 86 86 */ 87 - static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) 87 + static inline u64 mul_u64_add_u64_div_u64(u64 rax, u64 mul, u64 add, u64 div) 88 88 { 89 - u64 q; 89 + u64 rdx; 90 90 91 - asm ("mulq %2; divq %3" : "=a" (q) 92 - : "a" (a), "rm" (mul), "rm" (div) 93 - : "rdx"); 91 + asm ("mulq %[mul]" : "+a" (rax), "=d" (rdx) : [mul] "rm" (mul)); 94 92 95 - return q; 93 + if (!statically_true(!add)) 94 + asm ("addq %[add], %[lo]; adcq $0, %[hi]" : 95 + [lo] "+r" (rax), [hi] "+r" (rdx) : [add] "irm" (add)); 96 + 97 + asm ("divq %[div]" : "+a" (rax), "+d" (rdx) : [div] "rm" (div)); 98 + 99 + return rax; 96 100 } 97 - #define mul_u64_u64_div_u64 mul_u64_u64_div_u64 101 + #define mul_u64_add_u64_div_u64 mul_u64_add_u64_div_u64 98 102 99 103 static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div) 100 104 { 101 - return mul_u64_u64_div_u64(a, mul, div); 105 + return mul_u64_add_u64_div_u64(a, mul, 0, div); 102 106 } 103 107 #define mul_u64_u32_div mul_u64_u32_div 104 108

+47 -1

include/linux/math64.h

··· 282 282 } 283 283 #endif /* mul_u64_u32_div */ 284 284 285 - u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); 285 + /** 286 + * mul_u64_add_u64_div_u64 - unsigned 64bit multiply, add, and divide 287 + * @a: first unsigned 64bit multiplicand 288 + * @b: second unsigned 64bit multiplicand 289 + * @c: unsigned 64bit addend 290 + * @d: unsigned 64bit divisor 291 + * 292 + * Multiply two 64bit values together to generate a 128bit product 293 + * add a third value and then divide by a fourth. 294 + * The Generic code divides by 0 if @d is zero and returns ~0 on overflow. 295 + * Architecture specific code may trap on zero or overflow. 296 + * 297 + * Return: (@a * @b + @c) / @d 298 + */ 299 + u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d); 300 + 301 + /** 302 + * mul_u64_u64_div_u64 - unsigned 64bit multiply and divide 303 + * @a: first unsigned 64bit multiplicand 304 + * @b: second unsigned 64bit multiplicand 305 + * @d: unsigned 64bit divisor 306 + * 307 + * Multiply two 64bit values together to generate a 128bit product 308 + * and then divide by a third value. 309 + * The Generic code divides by 0 if @d is zero and returns ~0 on overflow. 310 + * Architecture specific code may trap on zero or overflow. 311 + * 312 + * Return: @a * @b / @d 313 + */ 314 + #define mul_u64_u64_div_u64(a, b, d) mul_u64_add_u64_div_u64(a, b, 0, d) 315 + 316 + /** 317 + * mul_u64_u64_div_u64_roundup - unsigned 64bit multiply and divide rounded up 318 + * @a: first unsigned 64bit multiplicand 319 + * @b: second unsigned 64bit multiplicand 320 + * @d: unsigned 64bit divisor 321 + * 322 + * Multiply two 64bit values together to generate a 128bit product 323 + * and then divide and round up. 324 + * The Generic code divides by 0 if @d is zero and returns ~0 on overflow. 325 + * Architecture specific code may trap on zero or overflow. 326 + * 327 + * Return: (@a * @b + @d - 1) / @d 328 + */ 329 + #define mul_u64_u64_div_u64_roundup(a, b, d) \ 330 + ({ u64 _tmp = (d); mul_u64_add_u64_div_u64(a, b, _tmp - 1, _tmp); }) 331 + 286 332 287 333 /** 288 334 * DIV64_U64_ROUND_UP - unsigned 64bit divide with 64bit divisor rounded up

+8 -6

lib/math/div64.c

··· 183 183 } 184 184 EXPORT_SYMBOL(iter_div_u64_rem); 185 185 186 - #ifndef mul_u64_u64_div_u64 187 - u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 d) 186 + #ifndef mul_u64_add_u64_div_u64 187 + u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d) 188 188 { 189 189 #if defined(__SIZEOF_INT128__) 190 190 191 191 /* native 64x64=128 bits multiplication */ 192 - u128 prod = (u128)a * b; 192 + u128 prod = (u128)a * b + c; 193 193 u64 n_lo = prod, n_hi = prod >> 64; 194 194 195 195 #else ··· 198 198 u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32; 199 199 u64 x, y, z; 200 200 201 - x = (u64)a_lo * b_lo; 202 - y = (u64)a_lo * b_hi + (u32)(x >> 32); 201 + /* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */ 202 + x = (u64)a_lo * b_lo + (u32)c; 203 + y = (u64)a_lo * b_hi + (u32)(c >> 32); 204 + y += (u32)(x >> 32); 203 205 z = (u64)a_hi * b_hi + (u32)(y >> 32); 204 206 y = (u64)a_hi * b_lo + (u32)y; 205 207 z += (u32)(y >> 32); ··· 267 265 268 266 return res; 269 267 } 270 - EXPORT_SYMBOL(mul_u64_u64_div_u64); 268 + EXPORT_SYMBOL(mul_u64_add_u64_div_u64); 271 269 #endif

Configure Feed

Configure Feed