Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'xfs-6.3-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs percpu counter fixes from Darrick Wong:
"We discovered a filesystem summary counter corruption problem that was
traced to cpu hot-remove racing with the call to percpu_counter_sum
that sets the free block count in the superblock when writing it to
disk. The root cause is that percpu_counter_sum doesn't cull from
dying cpus and hence misses those counter values if the cpu shutdown
hooks have not yet run to merge the values.

I'm hoping this is a fairly painless fix to the problem, since the
dying cpu mask should generally be empty. It's been in for-next for a
week without any complaints from the bots.

- Fix a race in the percpu counters summation code where the
summation failed to add in the values for any CPUs that were dying
but not yet dead. This fixes some minor discrepancies and incorrect
assertions when running generic/650"

* tag 'xfs-6.3-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
pcpcntr: remove percpu_counter_sum_all()
fork: remove use of percpu_counter_sum_all
pcpcntrs: fix dying cpu summation race
cpumask: introduce for_each_cpu_or

+77 -34
+17
include/linux/cpumask.h
··· 351 351 for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) 352 352 353 353 /** 354 + * for_each_cpu_or - iterate over every cpu present in either mask 355 + * @cpu: the (optionally unsigned) integer iterator 356 + * @mask1: the first cpumask pointer 357 + * @mask2: the second cpumask pointer 358 + * 359 + * This saves a temporary CPU mask in many places. It is equivalent to: 360 + * struct cpumask tmp; 361 + * cpumask_or(&tmp, &mask1, &mask2); 362 + * for_each_cpu(cpu, &tmp) 363 + * ... 364 + * 365 + * After the loop, cpu is >= nr_cpu_ids. 366 + */ 367 + #define for_each_cpu_or(cpu, mask1, mask2) \ 368 + for_each_or_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) 369 + 370 + /** 354 371 * cpumask_any_but - return a "random" in a cpumask, but not this one. 355 372 * @mask: the cpumask to search 356 373 * @cpu: the cpu to ignore.
+37
include/linux/find.h
··· 14 14 unsigned long nbits, unsigned long start); 15 15 unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, 16 16 unsigned long nbits, unsigned long start); 17 + unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, 18 + unsigned long nbits, unsigned long start); 17 19 unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, 18 20 unsigned long start); 19 21 extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); ··· 126 124 } 127 125 128 126 return _find_next_andnot_bit(addr1, addr2, size, offset); 127 + } 128 + #endif 129 + 130 + #ifndef find_next_or_bit 131 + /** 132 + * find_next_or_bit - find the next set bit in either memory regions 133 + * @addr1: The first address to base the search on 134 + * @addr2: The second address to base the search on 135 + * @size: The bitmap size in bits 136 + * @offset: The bitnumber to start searching at 137 + * 138 + * Returns the bit number for the next set bit 139 + * If no bits are set, returns @size. 140 + */ 141 + static inline 142 + unsigned long find_next_or_bit(const unsigned long *addr1, 143 + const unsigned long *addr2, unsigned long size, 144 + unsigned long offset) 145 + { 146 + if (small_const_nbits(size)) { 147 + unsigned long val; 148 + 149 + if (unlikely(offset >= size)) 150 + return size; 151 + 152 + val = (*addr1 | *addr2) & GENMASK(size - 1, offset); 153 + return val ? __ffs(val) : size; 154 + } 155 + 156 + return _find_next_or_bit(addr1, addr2, size, offset); 129 157 } 130 158 #endif 131 159 ··· 566 534 #define for_each_andnot_bit(bit, addr1, addr2, size) \ 567 535 for ((bit) = 0; \ 568 536 (bit) = find_next_andnot_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ 537 + (bit)++) 538 + 539 + #define for_each_or_bit(bit, addr1, addr2, size) \ 540 + for ((bit) = 0; \ 541 + (bit) = find_next_or_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ 569 542 (bit)++) 570 543 571 544 /* same as for_each_set_bit() but use bit as value to start with */
-6
include/linux/percpu_counter.h
··· 45 45 void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, 46 46 s32 batch); 47 47 s64 __percpu_counter_sum(struct percpu_counter *fbc); 48 - s64 percpu_counter_sum_all(struct percpu_counter *fbc); 49 48 int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); 50 49 void percpu_counter_sync(struct percpu_counter *fbc); 51 50 ··· 191 192 } 192 193 193 194 static inline s64 percpu_counter_sum(struct percpu_counter *fbc) 194 - { 195 - return percpu_counter_read(fbc); 196 - } 197 - 198 - static inline s64 percpu_counter_sum_all(struct percpu_counter *fbc) 199 195 { 200 196 return percpu_counter_read(fbc); 201 197 }
-5
kernel/fork.c
··· 755 755 for (i = 0; i < NR_MM_COUNTERS; i++) { 756 756 long x = percpu_counter_sum(&mm->rss_stat[i]); 757 757 758 - if (likely(!x)) 759 - continue; 760 - 761 - /* Making sure this is not due to race with CPU offlining. */ 762 - x = percpu_counter_sum_all(&mm->rss_stat[i]); 763 758 if (unlikely(x)) 764 759 pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", 765 760 mm, resident_page_types[i], x);
+9
lib/find_bit.c
··· 182 182 EXPORT_SYMBOL(_find_next_andnot_bit); 183 183 #endif 184 184 185 + #ifndef find_next_or_bit 186 + unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, 187 + unsigned long nbits, unsigned long start) 188 + { 189 + return FIND_NEXT_BIT(addr1[idx] | addr2[idx], /* nop */, nbits, start); 190 + } 191 + EXPORT_SYMBOL(_find_next_or_bit); 192 + #endif 193 + 185 194 #ifndef find_next_zero_bit 186 195 unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, 187 196 unsigned long start)
+14 -23
lib/percpu_counter.c
··· 122 122 } 123 123 EXPORT_SYMBOL(percpu_counter_sync); 124 124 125 - static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, 126 - const struct cpumask *cpu_mask) 125 + /* 126 + * Add up all the per-cpu counts, return the result. This is a more accurate 127 + * but much slower version of percpu_counter_read_positive(). 128 + * 129 + * We use the cpu mask of (cpu_online_mask | cpu_dying_mask) to capture sums 130 + * from CPUs that are in the process of being taken offline. Dying cpus have 131 + * been removed from the online mask, but may not have had the hotplug dead 132 + * notifier called to fold the percpu count back into the global counter sum. 133 + * By including dying CPUs in the iteration mask, we avoid this race condition 134 + * so __percpu_counter_sum() just does the right thing when CPUs are being taken 135 + * offline. 136 + */ 137 + s64 __percpu_counter_sum(struct percpu_counter *fbc) 127 138 { 128 139 s64 ret; 129 140 int cpu; ··· 142 131 143 132 raw_spin_lock_irqsave(&fbc->lock, flags); 144 133 ret = fbc->count; 145 - for_each_cpu(cpu, cpu_mask) { 134 + for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) { 146 135 s32 *pcount = per_cpu_ptr(fbc->counters, cpu); 147 136 ret += *pcount; 148 137 } 149 138 raw_spin_unlock_irqrestore(&fbc->lock, flags); 150 139 return ret; 151 140 } 152 - 153 - /* 154 - * Add up all the per-cpu counts, return the result. This is a more accurate 155 - * but much slower version of percpu_counter_read_positive() 156 - */ 157 - s64 __percpu_counter_sum(struct percpu_counter *fbc) 158 - { 159 - return __percpu_counter_sum_mask(fbc, cpu_online_mask); 160 - } 161 141 EXPORT_SYMBOL(__percpu_counter_sum); 162 - 163 - /* 164 - * This is slower version of percpu_counter_sum as it traverses all possible 165 - * cpus. Use this only in the cases where accurate data is needed in the 166 - * presense of CPUs getting offlined. 167 - */ 168 - s64 percpu_counter_sum_all(struct percpu_counter *fbc) 169 - { 170 - return __percpu_counter_sum_mask(fbc, cpu_possible_mask); 171 - } 172 - EXPORT_SYMBOL(percpu_counter_sum_all); 173 142 174 143 int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, 175 144 struct lock_class_key *key)