Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-3.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
"This is quite late but these need to be backported anyway.

This is the fix for a long-standing cpuset bug which existed from
2009. cpuset makes use of PF_SPREAD_{PAGE|SLAB} flags to modify the
task's memory allocation behavior according to the settings of the
cpuset it belongs to; unfortunately, when those flags have to be
changed, cpuset did so directly even whlie the target task is running,
which is obviously racy as task->flags may be modified by the task
itself at any time. This obscure bug manifested as corrupt
PF_USED_MATH flag leading to a weird crash.

The bug is fixed by moving the flag to task->atomic_flags. The first
two are prepatory ones to help defining atomic_flags accessors and the
third one is the actual fix"

* 'for-3.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cpuset: PF_SPREAD_PAGE and PF_SPREAD_SLAB should be atomic flags
sched: add macros to define bitops for task atomic flags
sched: fix confusing PFA_NO_NEW_PRIVS constant

+41 -22
+3 -3
Documentation/cgroups/cpusets.txt
··· 345 345 The implementation is simple. 346 346 347 347 Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag 348 - PF_SPREAD_PAGE for each task that is in that cpuset or subsequently 348 + PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently 349 349 joins that cpuset. The page allocation calls for the page cache 350 - is modified to perform an inline check for this PF_SPREAD_PAGE task 350 + is modified to perform an inline check for this PFA_SPREAD_PAGE task 351 351 flag, and if set, a call to a new routine cpuset_mem_spread_node() 352 352 returns the node to prefer for the allocation. 353 353 354 354 Similarly, setting 'cpuset.memory_spread_slab' turns on the flag 355 - PF_SPREAD_SLAB, and appropriately marked slab caches will allocate 355 + PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate 356 356 pages from the node returned by cpuset_mem_spread_node(). 357 357 358 358 The cpuset_mem_spread_node() routine is also simple. It uses the
+2 -2
include/linux/cpuset.h
··· 93 93 94 94 static inline int cpuset_do_page_mem_spread(void) 95 95 { 96 - return current->flags & PF_SPREAD_PAGE; 96 + return task_spread_page(current); 97 97 } 98 98 99 99 static inline int cpuset_do_slab_mem_spread(void) 100 100 { 101 - return current->flags & PF_SPREAD_SLAB; 101 + return task_spread_slab(current); 102 102 } 103 103 104 104 extern int current_cpuset_is_being_rebound(void);
+23 -11
include/linux/sched.h
··· 1903 1903 #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ 1904 1904 #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ 1905 1905 #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ 1906 - #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ 1907 - #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ 1908 1906 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ 1909 1907 #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ 1910 1908 #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ ··· 1955 1957 } 1956 1958 1957 1959 /* Per-process atomic flags. */ 1958 - #define PFA_NO_NEW_PRIVS 0x00000001 /* May not gain new privileges. */ 1960 + #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ 1961 + #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ 1962 + #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ 1959 1963 1960 - static inline bool task_no_new_privs(struct task_struct *p) 1961 - { 1962 - return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags); 1963 - } 1964 1964 1965 - static inline void task_set_no_new_privs(struct task_struct *p) 1966 - { 1967 - set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags); 1968 - } 1965 + #define TASK_PFA_TEST(name, func) \ 1966 + static inline bool task_##func(struct task_struct *p) \ 1967 + { return test_bit(PFA_##name, &p->atomic_flags); } 1968 + #define TASK_PFA_SET(name, func) \ 1969 + static inline void task_set_##func(struct task_struct *p) \ 1970 + { set_bit(PFA_##name, &p->atomic_flags); } 1971 + #define TASK_PFA_CLEAR(name, func) \ 1972 + static inline void task_clear_##func(struct task_struct *p) \ 1973 + { clear_bit(PFA_##name, &p->atomic_flags); } 1974 + 1975 + TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs) 1976 + TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs) 1977 + 1978 + TASK_PFA_TEST(SPREAD_PAGE, spread_page) 1979 + TASK_PFA_SET(SPREAD_PAGE, spread_page) 1980 + TASK_PFA_CLEAR(SPREAD_PAGE, spread_page) 1981 + 1982 + TASK_PFA_TEST(SPREAD_SLAB, spread_slab) 1983 + TASK_PFA_SET(SPREAD_SLAB, spread_slab) 1984 + TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) 1969 1985 1970 1986 /* 1971 1987 * task->jobctl flags
+5 -4
kernel/cpuset.c
··· 365 365 struct task_struct *tsk) 366 366 { 367 367 if (is_spread_page(cs)) 368 - tsk->flags |= PF_SPREAD_PAGE; 368 + task_set_spread_page(tsk); 369 369 else 370 - tsk->flags &= ~PF_SPREAD_PAGE; 370 + task_clear_spread_page(tsk); 371 + 371 372 if (is_spread_slab(cs)) 372 - tsk->flags |= PF_SPREAD_SLAB; 373 + task_set_spread_slab(tsk); 373 374 else 374 - tsk->flags &= ~PF_SPREAD_SLAB; 375 + task_clear_spread_slab(tsk); 375 376 } 376 377 377 378 /*
+2 -2
mm/slab.c
··· 2987 2987 2988 2988 #ifdef CONFIG_NUMA 2989 2989 /* 2990 - * Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set. 2990 + * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set. 2991 2991 * 2992 2992 * If we are in_interrupt, then process context, including cpusets and 2993 2993 * mempolicy, may not apply and should not be used for allocation policy. ··· 3219 3219 { 3220 3220 void *objp; 3221 3221 3222 - if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) { 3222 + if (current->mempolicy || cpuset_do_slab_mem_spread()) { 3223 3223 objp = alternate_node_alloc(cache, flags); 3224 3224 if (objp) 3225 3225 goto out;
+6
scripts/tags.sh
··· 197 197 --regex-c++='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/' \ 198 198 --regex-c++='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/' \ 199 199 --regex-c++='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \ 200 + --regex-c++='/TASK_PFA_TEST\([^,]*,\s*([^)]*)\)/task_\1/' \ 201 + --regex-c++='/TASK_PFA_SET\([^,]*,\s*([^)]*)\)/task_set_\1/' \ 202 + --regex-c++='/TASK_PFA_CLEAR\([^,]*,\s*([^)]*)\)/task_clear_\1/'\ 200 203 --regex-c='/PCI_OP_READ\((\w*).*[1-4]\)/pci_bus_read_config_\1/' \ 201 204 --regex-c='/PCI_OP_WRITE\((\w*).*[1-4]\)/pci_bus_write_config_\1/' \ 202 205 --regex-c='/DEFINE_(MUTEX|SEMAPHORE|SPINLOCK)\((\w*)/\2/v/' \ ··· 263 260 --regex='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/' \ 264 261 --regex='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/' \ 265 262 --regex='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \ 263 + --regex='/TASK_PFA_TEST\([^,]*,\s*([^)]*)\)/task_\1/' \ 264 + --regex='/TASK_PFA_SET\([^,]*,\s*([^)]*)\)/task_set_\1/' \ 265 + --regex='/TASK_PFA_CLEAR\([^,]*,\s*([^)]*)\)/task_clear_\1/' \ 266 266 --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' \ 267 267 --regex='/PCI_OP_READ(\([a-z]*[a-z]\).*[1-4])/pci_bus_read_config_\1/' \ 268 268 --regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'\