Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

fs: use do_splice_direct() for nfsd/ksmbd server-side-copy

nfsd/ksmbd call vfs_copy_file_range() with flag COPY_FILE_SPLICE to
perform kernel copy between two files on any two filesystems.

Splicing input file, while holding file_start_write() on the output file
which is on a different sb, posses a risk for fanotify related deadlocks.

We only need to call splice_file_range() from within the context of
->copy_file_range() filesystem methods with file_start_write() held.

To avoid the possible deadlocks, always use do_splice_direct() instead of
splice_file_range() for the kernel copy fallback in vfs_copy_file_range()
without holding file_start_write().

Reported-and-tested-by: Bert Karwatzki <spasswolf@web.de>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://lore.kernel.org/r/20231130141624.3338942-4-amir73il@gmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>

authored by

Amir Goldstein and committed by
Christian Brauner
73065126 da40448c

+26 -13
+26 -13
fs/read_write.c
··· 1421 1421 struct file *file_out, loff_t pos_out, 1422 1422 size_t len, unsigned int flags) 1423 1423 { 1424 + /* May only be called from within ->copy_file_range() methods */ 1425 + if (WARN_ON_ONCE(flags)) 1426 + return -EINVAL; 1427 + 1424 1428 return splice_file_range(file_in, &pos_in, file_out, &pos_out, 1425 1429 min_t(size_t, len, MAX_RW_COUNT)); 1426 1430 } ··· 1514 1510 { 1515 1511 ssize_t ret; 1516 1512 bool splice = flags & COPY_FILE_SPLICE; 1513 + bool samesb = file_inode(file_in)->i_sb == file_inode(file_out)->i_sb; 1517 1514 1518 1515 if (flags & ~COPY_FILE_SPLICE) 1519 1516 return -EINVAL; ··· 1546 1541 ret = file_out->f_op->copy_file_range(file_in, pos_in, 1547 1542 file_out, pos_out, 1548 1543 len, flags); 1549 - goto done; 1550 - } 1551 - 1552 - if (!splice && file_in->f_op->remap_file_range && 1553 - file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { 1544 + } else if (!splice && file_in->f_op->remap_file_range && samesb) { 1554 1545 ret = file_in->f_op->remap_file_range(file_in, pos_in, 1555 1546 file_out, pos_out, 1556 1547 min_t(loff_t, MAX_RW_COUNT, len), 1557 1548 REMAP_FILE_CAN_SHORTEN); 1558 - if (ret > 0) 1559 - goto done; 1549 + /* fallback to splice */ 1550 + if (ret <= 0) 1551 + splice = true; 1552 + } else if (samesb) { 1553 + /* Fallback to splice for same sb copy for backward compat */ 1554 + splice = true; 1560 1555 } 1556 + 1557 + file_end_write(file_out); 1558 + 1559 + if (!splice) 1560 + goto done; 1561 1561 1562 1562 /* 1563 1563 * We can get here for same sb copy of filesystems that do not implement ··· 1575 1565 * and which filesystems do not, that will allow userspace tools to 1576 1566 * make consistent desicions w.r.t using copy_file_range(). 1577 1567 * 1578 - * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE. 1568 + * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE 1569 + * for server-side-copy between any two sb. 1570 + * 1571 + * In any case, we call do_splice_direct() and not splice_file_range(), 1572 + * without file_start_write() held, to avoid possible deadlocks related 1573 + * to splicing from input file, while file_start_write() is held on 1574 + * the output file on a different sb. 1579 1575 */ 1580 - ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, 1581 - flags); 1582 - 1576 + ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, 1577 + min_t(size_t, len, MAX_RW_COUNT), 0); 1583 1578 done: 1584 1579 if (ret > 0) { 1585 1580 fsnotify_access(file_in); ··· 1595 1580 1596 1581 inc_syscr(current); 1597 1582 inc_syscw(current); 1598 - 1599 - file_end_write(file_out); 1600 1583 1601 1584 return ret; 1602 1585 }