Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfs-7.0-rc6.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:

- Fix netfs_limit_iter() hitting BUG() when an ITER_KVEC iterator
reaches it via core dump writes to 9P filesystems. Add ITER_KVEC
handling following the same pattern as the existing ITER_BVEC code.

- Fix a NULL pointer dereference in the netfs unbuffered write retry
path when the filesystem (e.g., 9P) doesn't set the prepare_write
operation.

- Clear I_DIRTY_TIME in sync_lazytime for filesystems implementing
->sync_lazytime. Without this the flag stays set and may cause
additional unnecessary calls during inode deactivation.

- Increase tmpfs size in mount_setattr selftests. A recent commit
bumped the ext4 image size to 2 GB but didn't adjust the tmpfs
backing store, so mkfs.ext4 fails with ENOSPC writing metadata.

- Fix an invalid folio access in iomap when i_blkbits matches the folio
size but differs from the I/O granularity. The cur_folio pointer
would not get invalidated and iomap_read_end() would still be called
on it despite the IO helper owning it.

- Fix hash_name() docstring.

- Fix read abandonment during netfs retry where the subreq variable
used for abandonment could be uninitialized on the first pass or
point to a deleted subrequest on later passes.

- Don't block sync for filesystems with no data integrity guarantees.
Add a SB_I_NO_DATA_INTEGRITY superblock flag replacing the per-inode
AS_NO_DATA_INTEGRITY mapping flag so sync kicks off writeback but
doesn't wait for flusher threads. This fixes a suspend-to-RAM hang on
fuse-overlayfs where the flusher thread blocks when the fuse daemon
is frozen.

- Fix a lockdep splat in iomap when reads fail. iomap_read_end_io()
invokes fserror_report() which calls igrab() taking i_lock in hardirq
context while i_lock is normally held with interrupts enabled. Kick
failed read handling to a workqueue.

- Remove the redundant netfs_io_stream::front member and use
stream->subrequests.next instead, fixing a potential issue in the
direct write code path.

* tag 'vfs-7.0-rc6.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
netfs: Fix the handling of stream->front by removing it
iomap: fix lockdep complaint when reads fail
writeback: don't block sync for filesystems with no data integrity guarantees
netfs: Fix read abandonment during retry
vfs: fix docstring of hash_name()
iomap: fix invalid folio access when i_blkbits differs from I/O granularity
selftests/mount_setattr: increase tmpfs size for idmapped mount tests
fs: clear I_DIRTY_TIME in sync_lazytime
netfs: Fix NULL pointer dereference in netfs_unbuffered_write() on retry
netfs: Fix kernel BUG in netfs_limit_iter() for ITER_KVEC iterators

+168 -53
+27 -9
fs/fs-writeback.c
··· 1711 1711 } 1712 1712 } 1713 1713 1714 + static bool __sync_lazytime(struct inode *inode) 1715 + { 1716 + spin_lock(&inode->i_lock); 1717 + if (!(inode_state_read(inode) & I_DIRTY_TIME)) { 1718 + spin_unlock(&inode->i_lock); 1719 + return false; 1720 + } 1721 + inode_state_clear(inode, I_DIRTY_TIME); 1722 + spin_unlock(&inode->i_lock); 1723 + inode->i_op->sync_lazytime(inode); 1724 + return true; 1725 + } 1726 + 1714 1727 bool sync_lazytime(struct inode *inode) 1715 1728 { 1716 1729 if (!(inode_state_read_once(inode) & I_DIRTY_TIME)) ··· 1731 1718 1732 1719 trace_writeback_lazytime(inode); 1733 1720 if (inode->i_op->sync_lazytime) 1734 - inode->i_op->sync_lazytime(inode); 1735 - else 1736 - mark_inode_dirty_sync(inode); 1721 + return __sync_lazytime(inode); 1722 + mark_inode_dirty_sync(inode); 1737 1723 return true; 1738 1724 } 1739 1725 ··· 2787 2775 * The mapping can appear untagged while still on-list since we 2788 2776 * do not have the mapping lock. Skip it here, wb completion 2789 2777 * will remove it. 2790 - * 2791 - * If the mapping does not have data integrity semantics, 2792 - * there's no need to wait for the writeout to complete, as the 2793 - * mapping cannot guarantee that data is persistently stored. 2794 2778 */ 2795 - if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK) || 2796 - mapping_no_data_integrity(mapping)) 2779 + if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) 2797 2780 continue; 2798 2781 2799 2782 spin_unlock_irq(&sb->s_inode_wblist_lock); ··· 2923 2916 */ 2924 2917 if (bdi == &noop_backing_dev_info) 2925 2918 return; 2919 + 2920 + /* 2921 + * If the superblock has SB_I_NO_DATA_INTEGRITY set, there's no need to 2922 + * wait for the writeout to complete, as the filesystem cannot guarantee 2923 + * data persistence on sync. Just kick off writeback and return. 2924 + */ 2925 + if (sb->s_iflags & SB_I_NO_DATA_INTEGRITY) { 2926 + wakeup_flusher_threads_bdi(bdi, WB_REASON_SYNC); 2927 + return; 2928 + } 2929 + 2926 2930 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 2927 2931 2928 2932 /* protect against inode wb switch, see inode_switch_wbs_work_fn() */
+1 -3
fs/fuse/file.c
··· 3201 3201 3202 3202 inode->i_fop = &fuse_file_operations; 3203 3203 inode->i_data.a_ops = &fuse_file_aops; 3204 - if (fc->writeback_cache) { 3204 + if (fc->writeback_cache) 3205 3205 mapping_set_writeback_may_deadlock_on_reclaim(&inode->i_data); 3206 - mapping_set_no_data_integrity(&inode->i_data); 3207 - } 3208 3206 3209 3207 INIT_LIST_HEAD(&fi->write_files); 3210 3208 INIT_LIST_HEAD(&fi->queued_writes);
+1
fs/fuse/inode.c
··· 1709 1709 sb->s_export_op = &fuse_export_operations; 1710 1710 sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; 1711 1711 sb->s_iflags |= SB_I_NOIDMAP; 1712 + sb->s_iflags |= SB_I_NO_DATA_INTEGRITY; 1712 1713 if (sb->s_user_ns != &init_user_ns) 1713 1714 sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; 1714 1715 sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
+50 -1
fs/iomap/bio.c
··· 8 8 #include "internal.h" 9 9 #include "trace.h" 10 10 11 - static void iomap_read_end_io(struct bio *bio) 11 + static DEFINE_SPINLOCK(failed_read_lock); 12 + static struct bio_list failed_read_list = BIO_EMPTY_LIST; 13 + 14 + static void __iomap_read_end_io(struct bio *bio) 12 15 { 13 16 int error = blk_status_to_errno(bio->bi_status); 14 17 struct folio_iter fi; ··· 19 16 bio_for_each_folio_all(fi, bio) 20 17 iomap_finish_folio_read(fi.folio, fi.offset, fi.length, error); 21 18 bio_put(bio); 19 + } 20 + 21 + static void 22 + iomap_fail_reads( 23 + struct work_struct *work) 24 + { 25 + struct bio *bio; 26 + struct bio_list tmp = BIO_EMPTY_LIST; 27 + unsigned long flags; 28 + 29 + spin_lock_irqsave(&failed_read_lock, flags); 30 + bio_list_merge_init(&tmp, &failed_read_list); 31 + spin_unlock_irqrestore(&failed_read_lock, flags); 32 + 33 + while ((bio = bio_list_pop(&tmp)) != NULL) { 34 + __iomap_read_end_io(bio); 35 + cond_resched(); 36 + } 37 + } 38 + 39 + static DECLARE_WORK(failed_read_work, iomap_fail_reads); 40 + 41 + static void iomap_fail_buffered_read(struct bio *bio) 42 + { 43 + unsigned long flags; 44 + 45 + /* 46 + * Bounce I/O errors to a workqueue to avoid nested i_lock acquisitions 47 + * in the fserror code. The caller no longer owns the bio reference 48 + * after the spinlock drops. 49 + */ 50 + spin_lock_irqsave(&failed_read_lock, flags); 51 + if (bio_list_empty(&failed_read_list)) 52 + WARN_ON_ONCE(!schedule_work(&failed_read_work)); 53 + bio_list_add(&failed_read_list, bio); 54 + spin_unlock_irqrestore(&failed_read_lock, flags); 55 + } 56 + 57 + static void iomap_read_end_io(struct bio *bio) 58 + { 59 + if (bio->bi_status) { 60 + iomap_fail_buffered_read(bio); 61 + return; 62 + } 63 + 64 + __iomap_read_end_io(bio); 22 65 } 23 66 24 67 static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx)
+10 -5
fs/iomap/buffered-io.c
··· 514 514 loff_t length = iomap_length(iter); 515 515 struct folio *folio = ctx->cur_folio; 516 516 size_t folio_len = folio_size(folio); 517 + struct iomap_folio_state *ifs; 517 518 size_t poff, plen; 518 519 loff_t pos_diff; 519 520 int ret; ··· 526 525 return iomap_iter_advance(iter, length); 527 526 } 528 527 529 - ifs_alloc(iter->inode, folio, iter->flags); 528 + ifs = ifs_alloc(iter->inode, folio, iter->flags); 530 529 531 530 length = min_t(loff_t, length, folio_len - offset_in_folio(folio, pos)); 532 531 while (length) { ··· 561 560 562 561 *bytes_submitted += plen; 563 562 /* 564 - * If the entire folio has been read in by the IO 565 - * helper, then the helper owns the folio and will end 566 - * the read on it. 563 + * Hand off folio ownership to the IO helper when: 564 + * 1) The entire folio has been submitted for IO, or 565 + * 2) There is no ifs attached to the folio 566 + * 567 + * Case (2) occurs when 1 << i_blkbits matches the folio 568 + * size but the underlying filesystem or block device 569 + * uses a smaller granularity for IO. 567 570 */ 568 - if (*bytes_submitted == folio_len) 571 + if (*bytes_submitted == folio_len || !ifs) 569 572 ctx->cur_folio = NULL; 570 573 } 571 574
+8 -2
fs/namei.c
··· 2437 2437 EXPORT_SYMBOL(hashlen_string); 2438 2438 2439 2439 /* 2440 - * Calculate the length and hash of the path component, and 2441 - * return the length as the result. 2440 + * hash_name - Calculate the length and hash of the path component 2441 + * @nd: the path resolution state 2442 + * @name: the pathname to read the component from 2443 + * @lastword: if the component fits in a single word, LAST_WORD_IS_DOT, 2444 + * LAST_WORD_IS_DOTDOT, or some other value depending on whether the 2445 + * component is '.', '..', or something else. Otherwise, @lastword is 0. 2446 + * 2447 + * Returns: a pointer to the terminating '/' or NUL character in @name. 2442 2448 */ 2443 2449 static inline const char *hash_name(struct nameidata *nd, 2444 2450 const char *name,
+1 -2
fs/netfs/buffered_read.c
··· 171 171 spin_lock(&rreq->lock); 172 172 list_add_tail(&subreq->rreq_link, &stream->subrequests); 173 173 if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { 174 - stream->front = subreq; 175 174 if (!stream->active) { 176 - stream->collected_to = stream->front->start; 175 + stream->collected_to = subreq->start; 177 176 /* Store list pointers before active flag */ 178 177 smp_store_release(&stream->active, true); 179 178 }
+1 -2
fs/netfs/direct_read.c
··· 71 71 spin_lock(&rreq->lock); 72 72 list_add_tail(&subreq->rreq_link, &stream->subrequests); 73 73 if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { 74 - stream->front = subreq; 75 74 if (!stream->active) { 76 - stream->collected_to = stream->front->start; 75 + stream->collected_to = subreq->start; 77 76 /* Store list pointers before active flag */ 78 77 smp_store_release(&stream->active, true); 79 78 }
+11 -4
fs/netfs/direct_write.c
··· 111 111 netfs_prepare_write(wreq, stream, wreq->start + wreq->transferred); 112 112 subreq = stream->construct; 113 113 stream->construct = NULL; 114 - stream->front = NULL; 115 114 } 116 115 117 116 /* Check if (re-)preparation failed. */ ··· 185 186 stream->sreq_max_segs = INT_MAX; 186 187 187 188 netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); 188 - stream->prepare_write(subreq); 189 189 190 - __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 191 - netfs_stat(&netfs_n_wh_retry_write_subreq); 190 + if (stream->prepare_write) { 191 + stream->prepare_write(subreq); 192 + __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 193 + netfs_stat(&netfs_n_wh_retry_write_subreq); 194 + } else { 195 + struct iov_iter source; 196 + 197 + netfs_reset_iter(subreq); 198 + source = subreq->io_iter; 199 + netfs_reissue_write(stream, subreq, &source); 200 + } 192 201 } 193 202 194 203 netfs_unbuffered_write_done(wreq);
+43
fs/netfs/iterator.c
··· 143 143 } 144 144 145 145 /* 146 + * Select the span of a kvec iterator we're going to use. Limit it by both 147 + * maximum size and maximum number of segments. Returns the size of the span 148 + * in bytes. 149 + */ 150 + static size_t netfs_limit_kvec(const struct iov_iter *iter, size_t start_offset, 151 + size_t max_size, size_t max_segs) 152 + { 153 + const struct kvec *kvecs = iter->kvec; 154 + unsigned int nkv = iter->nr_segs, ix = 0, nsegs = 0; 155 + size_t len, span = 0, n = iter->count; 156 + size_t skip = iter->iov_offset + start_offset; 157 + 158 + if (WARN_ON(!iov_iter_is_kvec(iter)) || 159 + WARN_ON(start_offset > n) || 160 + n == 0) 161 + return 0; 162 + 163 + while (n && ix < nkv && skip) { 164 + len = kvecs[ix].iov_len; 165 + if (skip < len) 166 + break; 167 + skip -= len; 168 + n -= len; 169 + ix++; 170 + } 171 + 172 + while (n && ix < nkv) { 173 + len = min3(n, kvecs[ix].iov_len - skip, max_size); 174 + span += len; 175 + nsegs++; 176 + ix++; 177 + if (span >= max_size || nsegs >= max_segs) 178 + break; 179 + skip = 0; 180 + n -= len; 181 + } 182 + 183 + return min(span, max_size); 184 + } 185 + 186 + /* 146 187 * Select the span of an xarray iterator we're going to use. Limit it by both 147 188 * maximum size and maximum number of segments. It is assumed that segments 148 189 * can be larger than a page in size, provided they're physically contiguous. ··· 286 245 return netfs_limit_bvec(iter, start_offset, max_size, max_segs); 287 246 if (iov_iter_is_xarray(iter)) 288 247 return netfs_limit_xarray(iter, start_offset, max_size, max_segs); 248 + if (iov_iter_is_kvec(iter)) 249 + return netfs_limit_kvec(iter, start_offset, max_size, max_segs); 289 250 BUG(); 290 251 } 291 252 EXPORT_SYMBOL(netfs_limit_iter);
+2 -2
fs/netfs/read_collect.c
··· 205 205 * in progress. The issuer thread may be adding stuff to the tail 206 206 * whilst we're doing this. 207 207 */ 208 - front = READ_ONCE(stream->front); 208 + front = list_first_entry_or_null(&stream->subrequests, 209 + struct netfs_io_subrequest, rreq_link); 209 210 while (front) { 210 211 size_t transferred; 211 212 ··· 302 301 list_del_init(&front->rreq_link); 303 302 front = list_first_entry_or_null(&stream->subrequests, 304 303 struct netfs_io_subrequest, rreq_link); 305 - stream->front = front; 306 304 spin_unlock(&rreq->lock); 307 305 netfs_put_subrequest(remove, 308 306 notes & ABANDON_SREQ ?
+4 -1
fs/netfs/read_retry.c
··· 93 93 from->start, from->transferred, from->len); 94 94 95 95 if (test_bit(NETFS_SREQ_FAILED, &from->flags) || 96 - !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) 96 + !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) { 97 + subreq = from; 97 98 goto abandon; 99 + } 98 100 99 101 list_for_each_continue(next, &stream->subrequests) { 100 102 subreq = list_entry(next, struct netfs_io_subrequest, rreq_link); ··· 180 178 if (subreq == to) 181 179 break; 182 180 } 181 + subreq = NULL; 183 182 continue; 184 183 } 185 184
-1
fs/netfs/read_single.c
··· 107 107 spin_lock(&rreq->lock); 108 108 list_add_tail(&subreq->rreq_link, &stream->subrequests); 109 109 trace_netfs_sreq(subreq, netfs_sreq_trace_added); 110 - stream->front = subreq; 111 110 /* Store list pointers before active flag */ 112 111 smp_store_release(&stream->active, true); 113 112 spin_unlock(&rreq->lock);
+2 -2
fs/netfs/write_collect.c
··· 228 228 if (!smp_load_acquire(&stream->active)) 229 229 continue; 230 230 231 - front = stream->front; 231 + front = list_first_entry_or_null(&stream->subrequests, 232 + struct netfs_io_subrequest, rreq_link); 232 233 while (front) { 233 234 trace_netfs_collect_sreq(wreq, front); 234 235 //_debug("sreq [%x] %llx %zx/%zx", ··· 280 279 list_del_init(&front->rreq_link); 281 280 front = list_first_entry_or_null(&stream->subrequests, 282 281 struct netfs_io_subrequest, rreq_link); 283 - stream->front = front; 284 282 spin_unlock(&wreq->lock); 285 283 netfs_put_subrequest(remove, 286 284 notes & SAW_FAILURE ?
+1 -2
fs/netfs/write_issue.c
··· 206 206 spin_lock(&wreq->lock); 207 207 list_add_tail(&subreq->rreq_link, &stream->subrequests); 208 208 if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { 209 - stream->front = subreq; 210 209 if (!stream->active) { 211 - stream->collected_to = stream->front->start; 210 + stream->collected_to = subreq->start; 212 211 /* Write list pointers before active flag */ 213 212 smp_store_release(&stream->active, true); 214 213 }
+1
include/linux/fs/super_types.h
··· 338 338 #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ 339 339 #define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */ 340 340 #define SB_I_ALLOW_HSM 0x00004000 /* Allow HSM events on this superblock */ 341 + #define SB_I_NO_DATA_INTEGRITY 0x00008000 /* fs cannot guarantee data persistence on sync */ 341 342 342 343 #endif /* _LINUX_FS_SUPER_TYPES_H */
-1
include/linux/netfs.h
··· 140 140 void (*issue_write)(struct netfs_io_subrequest *subreq); 141 141 /* Collection tracking */ 142 142 struct list_head subrequests; /* Contributory I/O operations */ 143 - struct netfs_io_subrequest *front; /* Op being collected */ 144 143 unsigned long long collected_to; /* Position we've collected results to */ 145 144 size_t transferred; /* The amount transferred from this stream */ 146 145 unsigned short error; /* Aggregate error for the stream */
-11
include/linux/pagemap.h
··· 210 210 AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9, 211 211 AS_KERNEL_FILE = 10, /* mapping for a fake kernel file that shouldn't 212 212 account usage to user cgroups */ 213 - AS_NO_DATA_INTEGRITY = 11, /* no data integrity guarantees */ 214 213 /* Bits 16-25 are used for FOLIO_ORDER */ 215 214 AS_FOLIO_ORDER_BITS = 5, 216 215 AS_FOLIO_ORDER_MIN = 16, ··· 343 344 static inline bool mapping_writeback_may_deadlock_on_reclaim(const struct address_space *mapping) 344 345 { 345 346 return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); 346 - } 347 - 348 - static inline void mapping_set_no_data_integrity(struct address_space *mapping) 349 - { 350 - set_bit(AS_NO_DATA_INTEGRITY, &mapping->flags); 351 - } 352 - 353 - static inline bool mapping_no_data_integrity(const struct address_space *mapping) 354 - { 355 - return test_bit(AS_NO_DATA_INTEGRITY, &mapping->flags); 356 347 } 357 348 358 349 static inline gfp_t mapping_gfp_mask(const struct address_space *mapping)
+4 -4
include/trace/events/netfs.h
··· 740 740 __field(unsigned int, wreq) 741 741 __field(unsigned char, stream) 742 742 __field(unsigned long long, collected_to) 743 - __field(unsigned long long, front) 743 + __field(unsigned long long, issued_to) 744 744 ), 745 745 746 746 TP_fast_assign( 747 747 __entry->wreq = wreq->debug_id; 748 748 __entry->stream = stream->stream_nr; 749 749 __entry->collected_to = stream->collected_to; 750 - __entry->front = stream->front ? stream->front->start : UINT_MAX; 750 + __entry->issued_to = atomic64_read(&wreq->issued_to); 751 751 ), 752 752 753 - TP_printk("R=%08x[%x:] cto=%llx frn=%llx", 753 + TP_printk("R=%08x[%x:] cto=%llx ito=%llx", 754 754 __entry->wreq, __entry->stream, 755 - __entry->collected_to, __entry->front) 755 + __entry->collected_to, __entry->issued_to) 756 756 ); 757 757 758 758 TRACE_EVENT(netfs_folioq,
+1 -1
tools/testing/selftests/mount_setattr/mount_setattr_test.c
··· 1020 1020 "size=100000,mode=700"), 0); 1021 1021 1022 1022 ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, 1023 - "size=2m,mode=700"), 0); 1023 + "size=256m,mode=700"), 0); 1024 1024 1025 1025 ASSERT_EQ(mkdir("/mnt/A", 0777), 0); 1026 1026