Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm/hwpoison: decouple hwpoison_filter from mm/memory-failure.c

mm/memory-failure.c defines and uses hwpoison_filter_* parameters but the
values of those parameters can only be modified via mm/hwpoison-inject.c
from userspace. They have a potentially different life time. Decouple
those parameters from mm/memory-failure.c to fix this broken layering.

Link: https://lkml.kernel.org/r/20250904062258.3336092-1-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Suggested-by: Michal Hocko <mhocko@suse.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Miaohe Lin and committed by
Andrew Morton
5ce1dbfd a488ba31

+113 -96
+1
fs/proc/page.c
··· 256 256 257 257 return u; 258 258 } 259 + EXPORT_SYMBOL_GPL(stable_page_flags); 259 260 260 261 /* /proc/kpageflags - an array exposing page flags 261 262 *
+91
mm/hwpoison-inject.c
··· 7 7 #include <linux/swap.h> 8 8 #include <linux/pagemap.h> 9 9 #include <linux/hugetlb.h> 10 + #include <linux/page-flags.h> 11 + #include <linux/memcontrol.h> 10 12 #include "internal.h" 13 + 14 + static u32 hwpoison_filter_enable; 15 + static u32 hwpoison_filter_dev_major = ~0U; 16 + static u32 hwpoison_filter_dev_minor = ~0U; 17 + static u64 hwpoison_filter_flags_mask; 18 + static u64 hwpoison_filter_flags_value; 19 + 20 + static int hwpoison_filter_dev(struct page *p) 21 + { 22 + struct folio *folio = page_folio(p); 23 + struct address_space *mapping; 24 + dev_t dev; 25 + 26 + if (hwpoison_filter_dev_major == ~0U && 27 + hwpoison_filter_dev_minor == ~0U) 28 + return 0; 29 + 30 + mapping = folio_mapping(folio); 31 + if (mapping == NULL || mapping->host == NULL) 32 + return -EINVAL; 33 + 34 + dev = mapping->host->i_sb->s_dev; 35 + if (hwpoison_filter_dev_major != ~0U && 36 + hwpoison_filter_dev_major != MAJOR(dev)) 37 + return -EINVAL; 38 + if (hwpoison_filter_dev_minor != ~0U && 39 + hwpoison_filter_dev_minor != MINOR(dev)) 40 + return -EINVAL; 41 + 42 + return 0; 43 + } 44 + 45 + static int hwpoison_filter_flags(struct page *p) 46 + { 47 + if (!hwpoison_filter_flags_mask) 48 + return 0; 49 + 50 + if ((stable_page_flags(p) & hwpoison_filter_flags_mask) == 51 + hwpoison_filter_flags_value) 52 + return 0; 53 + else 54 + return -EINVAL; 55 + } 56 + 57 + /* 58 + * This allows stress tests to limit test scope to a collection of tasks 59 + * by putting them under some memcg. This prevents killing unrelated/important 60 + * processes such as /sbin/init. Note that the target task may share clean 61 + * pages with init (eg. libc text), which is harmless. If the target task 62 + * share _dirty_ pages with another task B, the test scheme must make sure B 63 + * is also included in the memcg. At last, due to race conditions this filter 64 + * can only guarantee that the page either belongs to the memcg tasks, or is 65 + * a freed page. 66 + */ 67 + #ifdef CONFIG_MEMCG 68 + static u64 hwpoison_filter_memcg; 69 + static int hwpoison_filter_task(struct page *p) 70 + { 71 + if (!hwpoison_filter_memcg) 72 + return 0; 73 + 74 + if (page_cgroup_ino(p) != hwpoison_filter_memcg) 75 + return -EINVAL; 76 + 77 + return 0; 78 + } 79 + #else 80 + static int hwpoison_filter_task(struct page *p) { return 0; } 81 + #endif 82 + 83 + static int hwpoison_filter(struct page *p) 84 + { 85 + if (!hwpoison_filter_enable) 86 + return 0; 87 + 88 + if (hwpoison_filter_dev(p)) 89 + return -EINVAL; 90 + 91 + if (hwpoison_filter_flags(p)) 92 + return -EINVAL; 93 + 94 + if (hwpoison_filter_task(p)) 95 + return -EINVAL; 96 + 97 + return 0; 98 + } 11 99 12 100 static struct dentry *hwpoison_dir; 13 101 ··· 155 67 static void __exit pfn_inject_exit(void) 156 68 { 157 69 hwpoison_filter_enable = 0; 70 + hwpoison_filter_unregister(); 158 71 debugfs_remove_recursive(hwpoison_dir); 159 72 } 160 73 ··· 193 104 debugfs_create_u64("corrupt-filter-memcg", 0600, hwpoison_dir, 194 105 &hwpoison_filter_memcg); 195 106 #endif 107 + 108 + hwpoison_filter_register(hwpoison_filter); 196 109 197 110 return 0; 198 111 }
+3 -7
mm/internal.h
··· 1228 1228 #ifdef CONFIG_MEMORY_FAILURE 1229 1229 int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill); 1230 1230 void shake_folio(struct folio *folio); 1231 - extern int hwpoison_filter(struct page *p); 1231 + typedef int hwpoison_filter_func_t(struct page *p); 1232 + void hwpoison_filter_register(hwpoison_filter_func_t *filter); 1233 + void hwpoison_filter_unregister(void); 1232 1234 1233 - extern u32 hwpoison_filter_dev_major; 1234 - extern u32 hwpoison_filter_dev_minor; 1235 - extern u64 hwpoison_filter_flags_mask; 1236 - extern u64 hwpoison_filter_flags_value; 1237 - extern u64 hwpoison_filter_memcg; 1238 - extern u32 hwpoison_filter_enable; 1239 1235 #define MAGIC_HWPOISON 0x48575053U /* HWPS */ 1240 1236 void SetPageHWPoisonTakenOff(struct page *page); 1241 1237 void ClearPageHWPoisonTakenOff(struct page *page);
+1
mm/memcontrol.c
··· 287 287 rcu_read_unlock(); 288 288 return ino; 289 289 } 290 + EXPORT_SYMBOL_GPL(page_cgroup_ino); 290 291 291 292 /* Subset of node_stat_item for memcg stats */ 292 293 static const unsigned int memcg_node_stat_items[] = {
+17 -89
mm/memory-failure.c
··· 212 212 return true; 213 213 } 214 214 215 - #if IS_ENABLED(CONFIG_HWPOISON_INJECT) 215 + static hwpoison_filter_func_t __rcu *hwpoison_filter_func __read_mostly; 216 216 217 - u32 hwpoison_filter_enable = 0; 218 - u32 hwpoison_filter_dev_major = ~0U; 219 - u32 hwpoison_filter_dev_minor = ~0U; 220 - u64 hwpoison_filter_flags_mask; 221 - u64 hwpoison_filter_flags_value; 222 - EXPORT_SYMBOL_GPL(hwpoison_filter_enable); 223 - EXPORT_SYMBOL_GPL(hwpoison_filter_dev_major); 224 - EXPORT_SYMBOL_GPL(hwpoison_filter_dev_minor); 225 - EXPORT_SYMBOL_GPL(hwpoison_filter_flags_mask); 226 - EXPORT_SYMBOL_GPL(hwpoison_filter_flags_value); 227 - 228 - static int hwpoison_filter_dev(struct page *p) 217 + void hwpoison_filter_register(hwpoison_filter_func_t *filter) 229 218 { 230 - struct folio *folio = page_folio(p); 231 - struct address_space *mapping; 232 - dev_t dev; 233 - 234 - if (hwpoison_filter_dev_major == ~0U && 235 - hwpoison_filter_dev_minor == ~0U) 236 - return 0; 237 - 238 - mapping = folio_mapping(folio); 239 - if (mapping == NULL || mapping->host == NULL) 240 - return -EINVAL; 241 - 242 - dev = mapping->host->i_sb->s_dev; 243 - if (hwpoison_filter_dev_major != ~0U && 244 - hwpoison_filter_dev_major != MAJOR(dev)) 245 - return -EINVAL; 246 - if (hwpoison_filter_dev_minor != ~0U && 247 - hwpoison_filter_dev_minor != MINOR(dev)) 248 - return -EINVAL; 249 - 250 - return 0; 219 + rcu_assign_pointer(hwpoison_filter_func, filter); 251 220 } 221 + EXPORT_SYMBOL_GPL(hwpoison_filter_register); 252 222 253 - static int hwpoison_filter_flags(struct page *p) 223 + void hwpoison_filter_unregister(void) 254 224 { 255 - if (!hwpoison_filter_flags_mask) 256 - return 0; 257 - 258 - if ((stable_page_flags(p) & hwpoison_filter_flags_mask) == 259 - hwpoison_filter_flags_value) 260 - return 0; 261 - else 262 - return -EINVAL; 225 + RCU_INIT_POINTER(hwpoison_filter_func, NULL); 226 + synchronize_rcu(); 263 227 } 228 + EXPORT_SYMBOL_GPL(hwpoison_filter_unregister); 264 229 265 - /* 266 - * This allows stress tests to limit test scope to a collection of tasks 267 - * by putting them under some memcg. This prevents killing unrelated/important 268 - * processes such as /sbin/init. Note that the target task may share clean 269 - * pages with init (eg. libc text), which is harmless. If the target task 270 - * share _dirty_ pages with another task B, the test scheme must make sure B 271 - * is also included in the memcg. At last, due to race conditions this filter 272 - * can only guarantee that the page either belongs to the memcg tasks, or is 273 - * a freed page. 274 - */ 275 - #ifdef CONFIG_MEMCG 276 - u64 hwpoison_filter_memcg; 277 - EXPORT_SYMBOL_GPL(hwpoison_filter_memcg); 278 - static int hwpoison_filter_task(struct page *p) 230 + static int hwpoison_filter(struct page *p) 279 231 { 280 - if (!hwpoison_filter_memcg) 281 - return 0; 232 + int ret = 0; 233 + hwpoison_filter_func_t *filter; 282 234 283 - if (page_cgroup_ino(p) != hwpoison_filter_memcg) 284 - return -EINVAL; 235 + rcu_read_lock(); 236 + filter = rcu_dereference(hwpoison_filter_func); 237 + if (filter) 238 + ret = filter(p); 239 + rcu_read_unlock(); 285 240 286 - return 0; 241 + return ret; 287 242 } 288 - #else 289 - static int hwpoison_filter_task(struct page *p) { return 0; } 290 - #endif 291 - 292 - int hwpoison_filter(struct page *p) 293 - { 294 - if (!hwpoison_filter_enable) 295 - return 0; 296 - 297 - if (hwpoison_filter_dev(p)) 298 - return -EINVAL; 299 - 300 - if (hwpoison_filter_flags(p)) 301 - return -EINVAL; 302 - 303 - if (hwpoison_filter_task(p)) 304 - return -EINVAL; 305 - 306 - return 0; 307 - } 308 - EXPORT_SYMBOL_GPL(hwpoison_filter); 309 - #else 310 - int hwpoison_filter(struct page *p) 311 - { 312 - return 0; 313 - } 314 - #endif 315 243 316 244 /* 317 245 * Kill all processes that have a poisoned page mapped and then isolate