Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

ext4: fix deadlock due to mbcache entry corruption

When manipulating xattr blocks, we can deadlock infinitely looping
inside ext4_xattr_block_set() where we constantly keep finding xattr
block for reuse in mbcache but we are unable to reuse it because its
reference count is too big. This happens because cache entry for the
xattr block is marked as reusable (e_reusable set) although its
reference count is too big. When this inconsistency happens, this
inconsistent state is kept indefinitely and so ext4_xattr_block_set()
keeps retrying indefinitely.

The inconsistent state is caused by non-atomic update of e_reusable bit.
e_reusable is part of a bitfield and e_reusable update can race with
update of e_referenced bit in the same bitfield resulting in loss of one
of the updates. Fix the problem by using atomic bitops instead.

This bug has been around for many years, but it became *much* easier
to hit after commit 65f8b80053a1 ("ext4: fix race when reusing xattr
blocks").

Cc: stable@vger.kernel.org
Fixes: 6048c64b2609 ("mbcache: add reusable flag to cache entries")
Fixes: 65f8b80053a1 ("ext4: fix race when reusing xattr blocks")
Reported-and-tested-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
Reported-by: Thilo Fromm <t-lo@linux.microsoft.com>
Link: https://lore.kernel.org/r/c77bf00f-4618-7149-56f1-b8d1664b9d07@linux.microsoft.com/
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Link: https://lore.kernel.org/r/20221123193950.16758-1-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

authored by

Jan Kara and committed by
Theodore Ts'o
a44e84a9 b40ebaf6

+17 -10
+2 -2
fs/ext4/xattr.c
··· 1281 1281 ce = mb_cache_entry_get(ea_block_cache, hash, 1282 1282 bh->b_blocknr); 1283 1283 if (ce) { 1284 - ce->e_reusable = 1; 1284 + set_bit(MBE_REUSABLE_B, &ce->e_flags); 1285 1285 mb_cache_entry_put(ea_block_cache, ce); 1286 1286 } 1287 1287 } ··· 2043 2043 } 2044 2044 BHDR(new_bh)->h_refcount = cpu_to_le32(ref); 2045 2045 if (ref == EXT4_XATTR_REFCOUNT_MAX) 2046 - ce->e_reusable = 0; 2046 + clear_bit(MBE_REUSABLE_B, &ce->e_flags); 2047 2047 ea_bdebug(new_bh, "reusing; refcount now=%d", 2048 2048 ref); 2049 2049 ext4_xattr_block_csum_set(inode, new_bh);
+8 -6
fs/mbcache.c
··· 100 100 atomic_set(&entry->e_refcnt, 2); 101 101 entry->e_key = key; 102 102 entry->e_value = value; 103 - entry->e_reusable = reusable; 104 - entry->e_referenced = 0; 103 + entry->e_flags = 0; 104 + if (reusable) 105 + set_bit(MBE_REUSABLE_B, &entry->e_flags); 105 106 head = mb_cache_entry_head(cache, key); 106 107 hlist_bl_lock(head); 107 108 hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { ··· 166 165 while (node) { 167 166 entry = hlist_bl_entry(node, struct mb_cache_entry, 168 167 e_hash_list); 169 - if (entry->e_key == key && entry->e_reusable && 168 + if (entry->e_key == key && 169 + test_bit(MBE_REUSABLE_B, &entry->e_flags) && 170 170 atomic_inc_not_zero(&entry->e_refcnt)) 171 171 goto out; 172 172 node = node->next; ··· 286 284 void mb_cache_entry_touch(struct mb_cache *cache, 287 285 struct mb_cache_entry *entry) 288 286 { 289 - entry->e_referenced = 1; 287 + set_bit(MBE_REFERENCED_B, &entry->e_flags); 290 288 } 291 289 EXPORT_SYMBOL(mb_cache_entry_touch); 292 290 ··· 311 309 entry = list_first_entry(&cache->c_list, 312 310 struct mb_cache_entry, e_list); 313 311 /* Drop initial hash reference if there is no user */ 314 - if (entry->e_referenced || 312 + if (test_bit(MBE_REFERENCED_B, &entry->e_flags) || 315 313 atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) { 316 - entry->e_referenced = 0; 314 + clear_bit(MBE_REFERENCED_B, &entry->e_flags); 317 315 list_move_tail(&entry->e_list, &cache->c_list); 318 316 continue; 319 317 }
+7 -2
include/linux/mbcache.h
··· 10 10 11 11 struct mb_cache; 12 12 13 + /* Cache entry flags */ 14 + enum { 15 + MBE_REFERENCED_B = 0, 16 + MBE_REUSABLE_B 17 + }; 18 + 13 19 struct mb_cache_entry { 14 20 /* List of entries in cache - protected by cache->c_list_lock */ 15 21 struct list_head e_list; ··· 32 26 atomic_t e_refcnt; 33 27 /* Key in hash - stable during lifetime of the entry */ 34 28 u32 e_key; 35 - u32 e_referenced:1; 36 - u32 e_reusable:1; 29 + unsigned long e_flags; 37 30 /* User provided value - stable during lifetime of the entry */ 38 31 u64 e_value; 39 32 };