Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm/damon/lru_sort: detect and use fresh enabled and kdamond_pid values

DAMON_LRU_SORT updates 'enabled' and 'kdamond_pid' parameter values, which
represents the running status of its kdamond, when the user explicitly
requests start/stop of the kdamond. The kdamond can, however, be stopped
in events other than the explicit user request in the following three
events.

1. ctx->regions_score_histogram allocation failure at beginning of the
execution,
2. damon_commit_ctx() failure due to invalid user input, and
3. damon_commit_ctx() failure due to its internal allocation failures.

Hence, if the kdamond is stopped by the above three events, the values of
the status parameters can be stale. Users could show the stale values and
be confused. This is already bad, but the real consequence is worse.
DAMON_LRU_SORT avoids unnecessary damon_start() and damon_stop() calls
based on the 'enabled' parameter value. And the update of 'enabled'
parameter value depends on the damon_start() and damon_stop() call
results. Hence, once the kdamond has stopped by the unintentional events,
the user cannot restart the kdamond before the system reboot. For
example, the issue can be reproduced via below steps.

# cd /sys/module/damon_lru_sort/parameters
#
# # start DAMON_LRU_SORT
# echo Y > enabled
# ps -ef | grep kdamond
root 806 2 0 17:53 ? 00:00:00 [kdamond.0]
root 808 803 0 17:53 pts/4 00:00:00 grep kdamond
#
# # commit wrong input to stop kdamond withou explicit stop request
# echo 3 > addr_unit
# echo Y > commit_inputs
bash: echo: write error: Invalid argument
#
# # confirm kdamond is stopped
# ps -ef | grep kdamond
root 811 803 0 17:53 pts/4 00:00:00 grep kdamond
#
# # users casn now show stable status
# cat enabled
Y
# cat kdamond_pid
806
#
# # even after fixing the wrong parameter,
# # kdamond cannot be restarted.
# echo 1 > addr_unit
# echo Y > enabled
# ps -ef | grep kdamond
root 815 803 0 17:54 pts/4 00:00:00 grep kdamond

The problem will only rarely happen in real and common setups for the
following reasons. The allocation failures are unlikely in such setups
since those allocations are arguably too small to fail. Also sane users
on real production environments may not commit wrong input parameters.
But once it happens, the consequence is quite bad. And the bug is a bug.

The issue stems from the fact that there are multiple events that can
change the status, and following all the events is challenging.
Dynamically detect and use the fresh status for the parameters when those
are requested.

Link: https://lore.kernel.org/20260419161003.79176-3-sj@kernel.org
Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting")
Co-developed-by: Liew Rui Yan <aethernet65535@gmail.com>
Signed-off-by: Liew Rui Yan <aethernet65535@gmail.com>
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: <stable@vger.kernel.org> # 6.0.x
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

SeongJae Park and committed by
Andrew Morton
b98b7ff6 64a140af

+55 -30
+55 -30
mm/damon/lru_sort.c
··· 161 161 */ 162 162 static unsigned long addr_unit __read_mostly = 1; 163 163 164 - /* 165 - * PID of the DAMON thread 166 - * 167 - * If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread. 168 - * Else, -1. 169 - */ 170 - static int kdamond_pid __read_mostly = -1; 171 - module_param(kdamond_pid, int, 0400); 172 - 173 164 static struct damos_stat damon_lru_sort_hot_stat; 174 165 DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_lru_sort_hot_stat, 175 166 lru_sort_tried_hot_regions, lru_sorted_hot_regions, ··· 377 386 { 378 387 int err; 379 388 380 - if (!on) { 381 - err = damon_stop(&ctx, 1); 382 - if (!err) 383 - kdamond_pid = -1; 384 - return err; 385 - } 389 + if (!on) 390 + return damon_stop(&ctx, 1); 386 391 387 392 err = damon_lru_sort_apply_parameters(); 388 393 if (err) ··· 387 400 err = damon_start(&ctx, 1, true); 388 401 if (err) 389 402 return err; 390 - kdamond_pid = damon_kdamond_pid(ctx); 391 - if (kdamond_pid < 0) 392 - return kdamond_pid; 393 403 return damon_call(ctx, &call_control); 394 404 } 395 405 ··· 414 430 MODULE_PARM_DESC(addr_unit, 415 431 "Scale factor for DAMON_LRU_SORT to ops address conversion (default: 1)"); 416 432 433 + static bool damon_lru_sort_enabled(void) 434 + { 435 + if (!ctx) 436 + return false; 437 + return damon_is_running(ctx); 438 + } 439 + 417 440 static int damon_lru_sort_enabled_store(const char *val, 418 441 const struct kernel_param *kp) 419 442 { 420 - bool is_enabled = enabled; 421 - bool enable; 422 443 int err; 423 444 424 - err = kstrtobool(val, &enable); 445 + err = kstrtobool(val, &enabled); 425 446 if (err) 426 447 return err; 427 448 428 - if (is_enabled == enable) 449 + if (damon_lru_sort_enabled() == enabled) 429 450 return 0; 430 451 431 452 /* Called before init function. The function will handle this. */ 432 453 if (!damon_initialized()) 433 - goto set_param_out; 454 + return 0; 434 455 435 - err = damon_lru_sort_turn(enable); 436 - if (err) 437 - return err; 456 + return damon_lru_sort_turn(enabled); 457 + } 438 458 439 - set_param_out: 440 - enabled = enable; 441 - return err; 459 + static int damon_lru_sort_enabled_load(char *buffer, 460 + const struct kernel_param *kp) 461 + { 462 + return sprintf(buffer, "%c\n", damon_lru_sort_enabled() ? 'Y' : 'N'); 442 463 } 443 464 444 465 static const struct kernel_param_ops enabled_param_ops = { 445 466 .set = damon_lru_sort_enabled_store, 446 - .get = param_get_bool, 467 + .get = damon_lru_sort_enabled_load, 447 468 }; 448 469 449 470 module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); 450 471 MODULE_PARM_DESC(enabled, 451 472 "Enable or disable DAMON_LRU_SORT (default: disabled)"); 473 + 474 + static int damon_lru_sort_kdamond_pid_store(const char *val, 475 + const struct kernel_param *kp) 476 + { 477 + /* 478 + * kdamond_pid is read-only, but kernel command line could write it. 479 + * Do nothing here. 480 + */ 481 + return 0; 482 + } 483 + 484 + static int damon_lru_sort_kdamond_pid_load(char *buffer, 485 + const struct kernel_param *kp) 486 + { 487 + int kdamond_pid = -1; 488 + 489 + if (ctx) { 490 + kdamond_pid = damon_kdamond_pid(ctx); 491 + if (kdamond_pid < 0) 492 + kdamond_pid = -1; 493 + } 494 + return sprintf(buffer, "%d\n", kdamond_pid); 495 + } 496 + 497 + static const struct kernel_param_ops kdamond_pid_param_ops = { 498 + .set = damon_lru_sort_kdamond_pid_store, 499 + .get = damon_lru_sort_kdamond_pid_load, 500 + }; 501 + 502 + /* 503 + * PID of the DAMON thread 504 + * 505 + * If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread. 506 + * Else, -1. 507 + */ 508 + module_param_cb(kdamond_pid, &kdamond_pid_param_ops, NULL, 0400); 452 509 453 510 static int __init damon_lru_sort_init(void) 454 511 {