Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

dm-delay: don't busy-wait in kthread

When using a kthread to delay the IOs, dm-delay would continuously loop,
checking if IOs were ready to submit. It had a cond_resched() call in
the loop, but might still loop hundreds of millions of times waiting for
an IO that was scheduled to be submitted 10s of ms in the future. With
the change to make dm-delay over zoned devices always use kthreads
regardless of the length of the delay, this wasted work only gets worse.

To solve this and still keep roughly the same precision for very short
delays, dm-delay now calls fsleep() for 1/8th of the smallest non-zero
delay it will place on IOs, or 1 ms, whichever is smaller. The reason
that dm-delay doesn't just use the actual expiration time of the next
delayed IO to calculated the sleep time is that delay_dtr() must wait
for the kthread to finish before deleting the table. If a zoned device
with a long delay queued an IO shortly before being suspended and
removed, the IO would be flushed in delay_presuspend(), but the removing
the device would still have to wait for the remainder of the long delay.
This time is now capped at 1 ms.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Tested-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

authored by

Benjamin Marzinski and committed by
Mikulas Patocka
33304b75 ad320ae2

+14 -3
+14 -3
drivers/md/dm-delay.c
··· 14 14 #include <linux/bio.h> 15 15 #include <linux/slab.h> 16 16 #include <linux/kthread.h> 17 + #include <linux/delay.h> 17 18 18 19 #include <linux/device-mapper.h> 19 20 20 21 #define DM_MSG_PREFIX "delay" 22 + 23 + #define SLEEP_SHIFT 3 21 24 22 25 struct delay_class { 23 26 struct dm_dev *dev; ··· 37 34 struct work_struct flush_expired_bios; 38 35 struct list_head delayed_bios; 39 36 struct task_struct *worker; 37 + unsigned int worker_sleep_us; 40 38 bool may_delay; 41 39 42 40 struct delay_class read; ··· 140 136 schedule(); 141 137 } else { 142 138 spin_unlock(&dc->delayed_bios_lock); 139 + fsleep(dc->worker_sleep_us); 143 140 cond_resched(); 144 141 } 145 142 } ··· 217 212 { 218 213 struct delay_c *dc; 219 214 int ret; 220 - unsigned int max_delay; 215 + unsigned int max_delay, min_delay; 221 216 222 217 if (argc != 3 && argc != 6 && argc != 9) { 223 218 ti->error = "Requires exactly 3, 6 or 9 arguments"; ··· 240 235 ret = delay_class_ctr(ti, &dc->read, argv); 241 236 if (ret) 242 237 goto bad; 243 - max_delay = dc->read.delay; 238 + min_delay = max_delay = dc->read.delay; 244 239 245 240 if (argc == 3) { 246 241 ret = delay_class_ctr(ti, &dc->write, argv); ··· 256 251 if (ret) 257 252 goto bad; 258 253 max_delay = max(max_delay, dc->write.delay); 254 + min_delay = min_not_zero(min_delay, dc->write.delay); 259 255 260 256 if (argc == 6) { 261 257 ret = delay_class_ctr(ti, &dc->flush, argv + 3); ··· 269 263 if (ret) 270 264 goto bad; 271 265 max_delay = max(max_delay, dc->flush.delay); 266 + min_delay = min_not_zero(min_delay, dc->flush.delay); 272 267 273 268 out: 274 269 if (max_delay < 50) { 270 + if (min_delay >> SLEEP_SHIFT) 271 + dc->worker_sleep_us = 1000; 272 + else 273 + dc->worker_sleep_us = (min_delay * 1000) >> SLEEP_SHIFT; 275 274 /* 276 275 * In case of small requested delays, use kthread instead of 277 276 * timers and workqueue to achieve better latency. ··· 449 438 450 439 static struct target_type delay_target = { 451 440 .name = "delay", 452 - .version = {1, 4, 0}, 441 + .version = {1, 5, 0}, 453 442 .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM, 454 443 .module = THIS_MODULE, 455 444 .ctr = delay_ctr,