Merge tag 'xfs-4.20-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

+5

Documentation/filesystems/porting

··· 623 623 On success you get a new struct file sharing the mount/dentry with the 624 624 original, on failure - ERR_PTR(). 625 625 -- 626 + [mandatory] 627 + ->clone_file_range() and ->dedupe_file_range have been replaced with 628 + ->remap_file_range(). See Documentation/filesystems/vfs.txt for more 629 + information. 630 + -- 626 631 [recommended] 627 632 ->lookup() instances doing an equivalent of 628 633 if (IS_ERR(inode))

+15 -7

Documentation/filesystems/vfs.txt

··· 883 883 unsigned (*mmap_capabilities)(struct file *); 884 884 #endif 885 885 ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); 886 - int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64); 887 - int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64); 886 + loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, 887 + struct file *file_out, loff_t pos_out, 888 + loff_t len, unsigned int remap_flags); 888 889 int (*fadvise)(struct file *, loff_t, loff_t, int); 889 890 }; 890 891 ··· 961 960 962 961 copy_file_range: called by the copy_file_range(2) system call. 963 962 964 - clone_file_range: called by the ioctl(2) system call for FICLONERANGE and 965 - FICLONE commands. 966 - 967 - dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE 968 - command. 963 + remap_file_range: called by the ioctl(2) system call for FICLONERANGE and 964 + FICLONE and FIDEDUPERANGE commands to remap file ranges. An 965 + implementation should remap len bytes at pos_in of the source file into 966 + the dest file at pos_out. Implementations must handle callers passing 967 + in len == 0; this means "remap to the end of the source file". The 968 + return value should the number of bytes remapped, or the usual 969 + negative error code if errors occurred before any bytes were remapped. 970 + The remap_flags parameter accepts REMAP_FILE_* flags. If 971 + REMAP_FILE_DEDUP is set then the implementation must only remap if the 972 + requested file ranges have identical contents. If REMAP_CAN_SHORTEN is 973 + set, the caller is ok with the implementation shortening the request 974 + length to satisfy alignment or EOF requirements (or any other reason). 969 975 970 976 fadvise: possibly called by the fadvise64() system call. 971 977

+3 -5

fs/btrfs/ctree.h

··· 3201 3201 struct btrfs_ioctl_space_info *space); 3202 3202 void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, 3203 3203 struct btrfs_ioctl_balance_args *bargs); 3204 - int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff, 3205 - struct file *dst_file, loff_t dst_loff, 3206 - u64 olen); 3207 3204 3208 3205 /* file.c */ 3209 3206 int __init btrfs_auto_defrag_init(void); ··· 3230 3233 size_t num_pages, loff_t pos, size_t write_bytes, 3231 3234 struct extent_state **cached); 3232 3235 int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); 3233 - int btrfs_clone_file_range(struct file *file_in, loff_t pos_in, 3234 - struct file *file_out, loff_t pos_out, u64 len); 3236 + loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in, 3237 + struct file *file_out, loff_t pos_out, 3238 + loff_t len, unsigned int remap_flags); 3235 3239 3236 3240 /* tree-defrag.c */ 3237 3241 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,

+1 -2

fs/btrfs/file.c

··· 3298 3298 #ifdef CONFIG_COMPAT 3299 3299 .compat_ioctl = btrfs_compat_ioctl, 3300 3300 #endif 3301 - .clone_file_range = btrfs_clone_file_range, 3302 - .dedupe_file_range = btrfs_dedupe_file_range, 3301 + .remap_file_range = btrfs_remap_file_range, 3303 3302 }; 3304 3303 3305 3304 void __cold btrfs_auto_defrag_exit(void)

+27 -23

fs/btrfs/ioctl.c

··· 3629 3629 return ret; 3630 3630 } 3631 3631 3632 - int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff, 3633 - struct file *dst_file, loff_t dst_loff, 3634 - u64 olen) 3635 - { 3636 - struct inode *src = file_inode(src_file); 3637 - struct inode *dst = file_inode(dst_file); 3638 - u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 3639 - 3640 - if (WARN_ON_ONCE(bs < PAGE_SIZE)) { 3641 - /* 3642 - * Btrfs does not support blocksize < page_size. As a 3643 - * result, btrfs_cmp_data() won't correctly handle 3644 - * this situation without an update. 3645 - */ 3646 - return -EINVAL; 3647 - } 3648 - 3649 - return btrfs_extent_same(src, src_loff, olen, dst, dst_loff); 3650 - } 3651 - 3652 3632 static int clone_finish_inode_update(struct btrfs_trans_handle *trans, 3653 3633 struct inode *inode, 3654 3634 u64 endoff, ··· 4330 4350 return ret; 4331 4351 } 4332 4352 4333 - int btrfs_clone_file_range(struct file *src_file, loff_t off, 4334 - struct file *dst_file, loff_t destoff, u64 len) 4353 + loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, 4354 + struct file *dst_file, loff_t destoff, loff_t len, 4355 + unsigned int remap_flags) 4335 4356 { 4336 - return btrfs_clone_files(dst_file, src_file, off, len, destoff); 4357 + int ret; 4358 + 4359 + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 4360 + return -EINVAL; 4361 + 4362 + if (remap_flags & REMAP_FILE_DEDUP) { 4363 + struct inode *src = file_inode(src_file); 4364 + struct inode *dst = file_inode(dst_file); 4365 + u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 4366 + 4367 + if (WARN_ON_ONCE(bs < PAGE_SIZE)) { 4368 + /* 4369 + * Btrfs does not support blocksize < page_size. As a 4370 + * result, btrfs_cmp_data() won't correctly handle 4371 + * this situation without an update. 4372 + */ 4373 + return -EINVAL; 4374 + } 4375 + 4376 + ret = btrfs_extent_same(src, off, len, dst, destoff); 4377 + } else { 4378 + ret = btrfs_clone_files(dst_file, src_file, off, len, destoff); 4379 + } 4380 + return ret < 0 ? ret : len; 4337 4381 } 4338 4382 4339 4383 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)

+14 -10

fs/cifs/cifsfs.c

··· 992 992 .listxattr = cifs_listxattr, 993 993 }; 994 994 995 - static int cifs_clone_file_range(struct file *src_file, loff_t off, 996 - struct file *dst_file, loff_t destoff, u64 len) 995 + static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, 996 + struct file *dst_file, loff_t destoff, loff_t len, 997 + unsigned int remap_flags) 997 998 { 998 999 struct inode *src_inode = file_inode(src_file); 999 1000 struct inode *target_inode = file_inode(dst_file); ··· 1003 1002 struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink); 1004 1003 unsigned int xid; 1005 1004 int rc; 1005 + 1006 + if (remap_flags & ~REMAP_FILE_ADVISORY) 1007 + return -EINVAL; 1006 1008 1007 1009 cifs_dbg(FYI, "clone range\n"); 1008 1010 ··· 1046 1042 unlock_two_nondirectories(src_inode, target_inode); 1047 1043 out: 1048 1044 free_xid(xid); 1049 - return rc; 1045 + return rc < 0 ? rc : len; 1050 1046 } 1051 1047 1052 1048 ssize_t cifs_file_copychunk_range(unsigned int xid, ··· 1155 1151 .llseek = cifs_llseek, 1156 1152 .unlocked_ioctl = cifs_ioctl, 1157 1153 .copy_file_range = cifs_copy_file_range, 1158 - .clone_file_range = cifs_clone_file_range, 1154 + .remap_file_range = cifs_remap_file_range, 1159 1155 .setlease = cifs_setlease, 1160 1156 .fallocate = cifs_fallocate, 1161 1157 }; ··· 1174 1170 .llseek = cifs_llseek, 1175 1171 .unlocked_ioctl = cifs_ioctl, 1176 1172 .copy_file_range = cifs_copy_file_range, 1177 - .clone_file_range = cifs_clone_file_range, 1173 + .remap_file_range = cifs_remap_file_range, 1178 1174 .setlease = cifs_setlease, 1179 1175 .fallocate = cifs_fallocate, 1180 1176 }; ··· 1193 1189 .splice_write = iter_file_splice_write, 1194 1190 .unlocked_ioctl = cifs_ioctl, 1195 1191 .copy_file_range = cifs_copy_file_range, 1196 - .clone_file_range = cifs_clone_file_range, 1192 + .remap_file_range = cifs_remap_file_range, 1197 1193 .llseek = cifs_llseek, 1198 1194 .setlease = cifs_setlease, 1199 1195 .fallocate = cifs_fallocate, ··· 1212 1208 .llseek = cifs_llseek, 1213 1209 .unlocked_ioctl = cifs_ioctl, 1214 1210 .copy_file_range = cifs_copy_file_range, 1215 - .clone_file_range = cifs_clone_file_range, 1211 + .remap_file_range = cifs_remap_file_range, 1216 1212 .setlease = cifs_setlease, 1217 1213 .fallocate = cifs_fallocate, 1218 1214 }; ··· 1230 1226 .llseek = cifs_llseek, 1231 1227 .unlocked_ioctl = cifs_ioctl, 1232 1228 .copy_file_range = cifs_copy_file_range, 1233 - .clone_file_range = cifs_clone_file_range, 1229 + .remap_file_range = cifs_remap_file_range, 1234 1230 .setlease = cifs_setlease, 1235 1231 .fallocate = cifs_fallocate, 1236 1232 }; ··· 1248 1244 .splice_write = iter_file_splice_write, 1249 1245 .unlocked_ioctl = cifs_ioctl, 1250 1246 .copy_file_range = cifs_copy_file_range, 1251 - .clone_file_range = cifs_clone_file_range, 1247 + .remap_file_range = cifs_remap_file_range, 1252 1248 .llseek = cifs_llseek, 1253 1249 .setlease = cifs_setlease, 1254 1250 .fallocate = cifs_fallocate, ··· 1260 1256 .read = generic_read_dir, 1261 1257 .unlocked_ioctl = cifs_ioctl, 1262 1258 .copy_file_range = cifs_copy_file_range, 1263 - .clone_file_range = cifs_clone_file_range, 1259 + .remap_file_range = cifs_remap_file_range, 1264 1260 .llseek = generic_file_llseek, 1265 1261 .fsync = cifs_dir_fsync, 1266 1262 };

+9 -1

fs/ioctl.c

··· 223 223 u64 off, u64 olen, u64 destoff) 224 224 { 225 225 struct fd src_file = fdget(srcfd); 226 + loff_t cloned; 226 227 int ret; 227 228 228 229 if (!src_file.file) ··· 231 230 ret = -EXDEV; 232 231 if (src_file.file->f_path.mnt != dst_file->f_path.mnt) 233 232 goto fdput; 234 - ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); 233 + cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff, 234 + olen, 0); 235 + if (cloned < 0) 236 + ret = cloned; 237 + else if (olen && cloned != olen) 238 + ret = -EINVAL; 239 + else 240 + ret = 0; 235 241 fdput: 236 242 fdput(src_file); 237 243 return ret;

+8 -4

fs/nfs/nfs4file.c

··· 180 180 return nfs42_proc_allocate(filep, offset, len); 181 181 } 182 182 183 - static int nfs42_clone_file_range(struct file *src_file, loff_t src_off, 184 - struct file *dst_file, loff_t dst_off, u64 count) 183 + static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, 184 + struct file *dst_file, loff_t dst_off, loff_t count, 185 + unsigned int remap_flags) 185 186 { 186 187 struct inode *dst_inode = file_inode(dst_file); 187 188 struct nfs_server *server = NFS_SERVER(dst_inode); ··· 190 189 unsigned int bs = server->clone_blksize; 191 190 bool same_inode = false; 192 191 int ret; 192 + 193 + if (remap_flags & ~REMAP_FILE_ADVISORY) 194 + return -EINVAL; 193 195 194 196 /* check alignment w.r.t. clone_blksize */ 195 197 ret = -EINVAL; ··· 244 240 inode_unlock(src_inode); 245 241 } 246 242 out: 247 - return ret; 243 + return ret < 0 ? ret : count; 248 244 } 249 245 #endif /* CONFIG_NFS_V4_2 */ 250 246 ··· 266 262 .copy_file_range = nfs4_copy_file_range, 267 263 .llseek = nfs4_file_llseek, 268 264 .fallocate = nfs42_fallocate, 269 - .clone_file_range = nfs42_clone_file_range, 265 + .remap_file_range = nfs42_remap_file_range, 270 266 #else 271 267 .llseek = nfs_file_llseek, 272 268 #endif

+6 -2

fs/nfsd/vfs.c

··· 541 541 __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, 542 542 u64 dst_pos, u64 count) 543 543 { 544 - return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, 545 - count)); 544 + loff_t cloned; 545 + 546 + cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); 547 + if (count && cloned != count) 548 + cloned = -EINVAL; 549 + return nfserrno(cloned < 0 ? cloned : 0); 546 550 } 547 551 548 552 ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,

+73 -20

fs/ocfs2/file.c

··· 2527 2527 return offset; 2528 2528 } 2529 2529 2530 - static int ocfs2_file_clone_range(struct file *file_in, 2531 - loff_t pos_in, 2532 - struct file *file_out, 2533 - loff_t pos_out, 2534 - u64 len) 2530 + static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in, 2531 + struct file *file_out, loff_t pos_out, 2532 + loff_t len, unsigned int remap_flags) 2535 2533 { 2536 - return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, 2537 - len, false); 2538 - } 2534 + struct inode *inode_in = file_inode(file_in); 2535 + struct inode *inode_out = file_inode(file_out); 2536 + struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb); 2537 + struct buffer_head *in_bh = NULL, *out_bh = NULL; 2538 + bool same_inode = (inode_in == inode_out); 2539 + loff_t remapped = 0; 2540 + ssize_t ret; 2539 2541 2540 - static int ocfs2_file_dedupe_range(struct file *file_in, 2541 - loff_t pos_in, 2542 - struct file *file_out, 2543 - loff_t pos_out, 2544 - u64 len) 2545 - { 2546 - return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, 2547 - len, true); 2542 + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 2543 + return -EINVAL; 2544 + if (!ocfs2_refcount_tree(osb)) 2545 + return -EOPNOTSUPP; 2546 + if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) 2547 + return -EROFS; 2548 + 2549 + /* Lock both files against IO */ 2550 + ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh); 2551 + if (ret) 2552 + return ret; 2553 + 2554 + /* Check file eligibility and prepare for block sharing. */ 2555 + ret = -EINVAL; 2556 + if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) || 2557 + (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE)) 2558 + goto out_unlock; 2559 + 2560 + ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, 2561 + &len, remap_flags); 2562 + if (ret < 0 || len == 0) 2563 + goto out_unlock; 2564 + 2565 + /* Lock out changes to the allocation maps and remap. */ 2566 + down_write(&OCFS2_I(inode_in)->ip_alloc_sem); 2567 + if (!same_inode) 2568 + down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem, 2569 + SINGLE_DEPTH_NESTING); 2570 + 2571 + /* Zap any page cache for the destination file's range. */ 2572 + truncate_inode_pages_range(&inode_out->i_data, 2573 + round_down(pos_out, PAGE_SIZE), 2574 + round_up(pos_out + len, PAGE_SIZE) - 1); 2575 + 2576 + remapped = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, 2577 + inode_out, out_bh, pos_out, len); 2578 + up_write(&OCFS2_I(inode_in)->ip_alloc_sem); 2579 + if (!same_inode) 2580 + up_write(&OCFS2_I(inode_out)->ip_alloc_sem); 2581 + if (remapped < 0) { 2582 + ret = remapped; 2583 + mlog_errno(ret); 2584 + goto out_unlock; 2585 + } 2586 + 2587 + /* 2588 + * Empty the extent map so that we may get the right extent 2589 + * record from the disk. 2590 + */ 2591 + ocfs2_extent_map_trunc(inode_in, 0); 2592 + ocfs2_extent_map_trunc(inode_out, 0); 2593 + 2594 + ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len); 2595 + if (ret) { 2596 + mlog_errno(ret); 2597 + goto out_unlock; 2598 + } 2599 + 2600 + out_unlock: 2601 + ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh); 2602 + return remapped > 0 ? remapped : ret; 2548 2603 } 2549 2604 2550 2605 const struct inode_operations ocfs2_file_iops = { ··· 2641 2586 .splice_read = generic_file_splice_read, 2642 2587 .splice_write = iter_file_splice_write, 2643 2588 .fallocate = ocfs2_fallocate, 2644 - .clone_file_range = ocfs2_file_clone_range, 2645 - .dedupe_file_range = ocfs2_file_dedupe_range, 2589 + .remap_file_range = ocfs2_remap_file_range, 2646 2590 }; 2647 2591 2648 2592 const struct file_operations ocfs2_dops = { ··· 2687 2633 .splice_read = generic_file_splice_read, 2688 2634 .splice_write = iter_file_splice_write, 2689 2635 .fallocate = ocfs2_fallocate, 2690 - .clone_file_range = ocfs2_file_clone_range, 2691 - .dedupe_file_range = ocfs2_file_dedupe_range, 2636 + .remap_file_range = ocfs2_remap_file_range, 2692 2637 }; 2693 2638 2694 2639 const struct file_operations ocfs2_dops_no_plocks = {

+37 -111

fs/ocfs2/refcounttree.c

··· 4466 4466 } 4467 4467 4468 4468 /* Update destination inode size, if necessary. */ 4469 - static int ocfs2_reflink_update_dest(struct inode *dest, 4470 - struct buffer_head *d_bh, 4471 - loff_t newlen) 4469 + int ocfs2_reflink_update_dest(struct inode *dest, 4470 + struct buffer_head *d_bh, 4471 + loff_t newlen) 4472 4472 { 4473 4473 handle_t *handle; 4474 4474 int ret; ··· 4505 4505 } 4506 4506 4507 4507 /* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */ 4508 - static int ocfs2_reflink_remap_extent(struct inode *s_inode, 4509 - struct buffer_head *s_bh, 4510 - loff_t pos_in, 4511 - struct inode *t_inode, 4512 - struct buffer_head *t_bh, 4513 - loff_t pos_out, 4514 - loff_t len, 4515 - struct ocfs2_cached_dealloc_ctxt *dealloc) 4508 + static loff_t ocfs2_reflink_remap_extent(struct inode *s_inode, 4509 + struct buffer_head *s_bh, 4510 + loff_t pos_in, 4511 + struct inode *t_inode, 4512 + struct buffer_head *t_bh, 4513 + loff_t pos_out, 4514 + loff_t len, 4515 + struct ocfs2_cached_dealloc_ctxt *dealloc) 4516 4516 { 4517 4517 struct ocfs2_extent_tree s_et; 4518 4518 struct ocfs2_extent_tree t_et; ··· 4520 4520 struct buffer_head *ref_root_bh = NULL; 4521 4521 struct ocfs2_refcount_tree *ref_tree; 4522 4522 struct ocfs2_super *osb; 4523 + loff_t remapped_bytes = 0; 4523 4524 loff_t pstart, plen; 4524 - u32 p_cluster, num_clusters, slast, spos, tpos; 4525 + u32 p_cluster, num_clusters, slast, spos, tpos, remapped_clus = 0; 4525 4526 unsigned int ext_flags; 4526 4527 int ret = 0; 4527 4528 ··· 4604 4603 next_loop: 4605 4604 spos += num_clusters; 4606 4605 tpos += num_clusters; 4606 + remapped_clus += num_clusters; 4607 4607 } 4608 4608 4609 - out: 4610 - return ret; 4609 + goto out; 4611 4610 out_unlock_refcount: 4612 4611 ocfs2_unlock_refcount_tree(osb, ref_tree, 1); 4613 4612 brelse(ref_root_bh); 4614 - return ret; 4613 + out: 4614 + remapped_bytes = ocfs2_clusters_to_bytes(t_inode->i_sb, remapped_clus); 4615 + remapped_bytes = min_t(loff_t, len, remapped_bytes); 4616 + 4617 + return remapped_bytes > 0 ? remapped_bytes : ret; 4615 4618 } 4616 4619 4617 4620 /* Set up refcount tree and remap s_inode to t_inode. */ 4618 - static int ocfs2_reflink_remap_blocks(struct inode *s_inode, 4619 - struct buffer_head *s_bh, 4620 - loff_t pos_in, 4621 - struct inode *t_inode, 4622 - struct buffer_head *t_bh, 4623 - loff_t pos_out, 4624 - loff_t len) 4621 + loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode, 4622 + struct buffer_head *s_bh, 4623 + loff_t pos_in, 4624 + struct inode *t_inode, 4625 + struct buffer_head *t_bh, 4626 + loff_t pos_out, 4627 + loff_t len) 4625 4628 { 4626 4629 struct ocfs2_cached_dealloc_ctxt dealloc; 4627 4630 struct ocfs2_super *osb; 4628 4631 struct ocfs2_dinode *dis; 4629 4632 struct ocfs2_dinode *dit; 4630 - int ret; 4633 + loff_t ret; 4631 4634 4632 4635 osb = OCFS2_SB(s_inode->i_sb); 4633 4636 dis = (struct ocfs2_dinode *)s_bh->b_data; ··· 4703 4698 /* Actually remap extents now. */ 4704 4699 ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh, 4705 4700 pos_out, len, &dealloc); 4706 - if (ret) { 4701 + if (ret < 0) { 4707 4702 mlog_errno(ret); 4708 4703 goto out; 4709 4704 } ··· 4718 4713 } 4719 4714 4720 4715 /* Lock an inode and grab a bh pointing to the inode. */ 4721 - static int ocfs2_reflink_inodes_lock(struct inode *s_inode, 4722 - struct buffer_head **bh1, 4723 - struct inode *t_inode, 4724 - struct buffer_head **bh2) 4716 + int ocfs2_reflink_inodes_lock(struct inode *s_inode, 4717 + struct buffer_head **bh1, 4718 + struct inode *t_inode, 4719 + struct buffer_head **bh2) 4725 4720 { 4726 4721 struct inode *inode1; 4727 4722 struct inode *inode2; ··· 4806 4801 } 4807 4802 4808 4803 /* Unlock both inodes and release buffers. */ 4809 - static void ocfs2_reflink_inodes_unlock(struct inode *s_inode, 4810 - struct buffer_head *s_bh, 4811 - struct inode *t_inode, 4812 - struct buffer_head *t_bh) 4804 + void ocfs2_reflink_inodes_unlock(struct inode *s_inode, 4805 + struct buffer_head *s_bh, 4806 + struct inode *t_inode, 4807 + struct buffer_head *t_bh) 4813 4808 { 4814 4809 ocfs2_inode_unlock(s_inode, 1); 4815 4810 ocfs2_rw_unlock(s_inode, 1); ··· 4820 4815 brelse(t_bh); 4821 4816 } 4822 4817 unlock_two_nondirectories(s_inode, t_inode); 4823 - } 4824 - 4825 - /* Link a range of blocks from one file to another. */ 4826 - int ocfs2_reflink_remap_range(struct file *file_in, 4827 - loff_t pos_in, 4828 - struct file *file_out, 4829 - loff_t pos_out, 4830 - u64 len, 4831 - bool is_dedupe) 4832 - { 4833 - struct inode *inode_in = file_inode(file_in); 4834 - struct inode *inode_out = file_inode(file_out); 4835 - struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb); 4836 - struct buffer_head *in_bh = NULL, *out_bh = NULL; 4837 - bool same_inode = (inode_in == inode_out); 4838 - ssize_t ret; 4839 - 4840 - if (!ocfs2_refcount_tree(osb)) 4841 - return -EOPNOTSUPP; 4842 - if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) 4843 - return -EROFS; 4844 - 4845 - /* Lock both files against IO */ 4846 - ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh); 4847 - if (ret) 4848 - return ret; 4849 - 4850 - /* Check file eligibility and prepare for block sharing. */ 4851 - ret = -EINVAL; 4852 - if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) || 4853 - (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE)) 4854 - goto out_unlock; 4855 - 4856 - ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, 4857 - &len, is_dedupe); 4858 - if (ret <= 0) 4859 - goto out_unlock; 4860 - 4861 - /* Lock out changes to the allocation maps and remap. */ 4862 - down_write(&OCFS2_I(inode_in)->ip_alloc_sem); 4863 - if (!same_inode) 4864 - down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem, 4865 - SINGLE_DEPTH_NESTING); 4866 - 4867 - ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out, 4868 - out_bh, pos_out, len); 4869 - 4870 - /* Zap any page cache for the destination file's range. */ 4871 - if (!ret) 4872 - truncate_inode_pages_range(&inode_out->i_data, pos_out, 4873 - PAGE_ALIGN(pos_out + len) - 1); 4874 - 4875 - up_write(&OCFS2_I(inode_in)->ip_alloc_sem); 4876 - if (!same_inode) 4877 - up_write(&OCFS2_I(inode_out)->ip_alloc_sem); 4878 - if (ret) { 4879 - mlog_errno(ret); 4880 - goto out_unlock; 4881 - } 4882 - 4883 - /* 4884 - * Empty the extent map so that we may get the right extent 4885 - * record from the disk. 4886 - */ 4887 - ocfs2_extent_map_trunc(inode_in, 0); 4888 - ocfs2_extent_map_trunc(inode_out, 0); 4889 - 4890 - ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len); 4891 - if (ret) { 4892 - mlog_errno(ret); 4893 - goto out_unlock; 4894 - } 4895 - 4896 - ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh); 4897 - return 0; 4898 - 4899 - out_unlock: 4900 - ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh); 4901 - return ret; 4902 4818 }

+18 -6

fs/ocfs2/refcounttree.h

··· 115 115 const char __user *oldname, 116 116 const char __user *newname, 117 117 bool preserve); 118 - int ocfs2_reflink_remap_range(struct file *file_in, 119 - loff_t pos_in, 120 - struct file *file_out, 121 - loff_t pos_out, 122 - u64 len, 123 - bool is_dedupe); 118 + loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode, 119 + struct buffer_head *s_bh, 120 + loff_t pos_in, 121 + struct inode *t_inode, 122 + struct buffer_head *t_bh, 123 + loff_t pos_out, 124 + loff_t len); 125 + int ocfs2_reflink_inodes_lock(struct inode *s_inode, 126 + struct buffer_head **bh1, 127 + struct inode *t_inode, 128 + struct buffer_head **bh2); 129 + void ocfs2_reflink_inodes_unlock(struct inode *s_inode, 130 + struct buffer_head *s_bh, 131 + struct inode *t_inode, 132 + struct buffer_head *t_bh); 133 + int ocfs2_reflink_update_dest(struct inode *dest, 134 + struct buffer_head *d_bh, 135 + loff_t newlen); 124 136 125 137 #endif /* OCFS2_REFCOUNTTREE_H */

+3 -3

fs/overlayfs/copy_up.c

··· 125 125 struct file *new_file; 126 126 loff_t old_pos = 0; 127 127 loff_t new_pos = 0; 128 + loff_t cloned; 128 129 int error = 0; 129 130 130 131 if (len == 0) ··· 142 141 } 143 142 144 143 /* Try to use clone_file_range to clone up within the same fs */ 145 - error = do_clone_file_range(old_file, 0, new_file, 0, len); 146 - if (!error) 144 + cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0); 145 + if (cloned == len) 147 146 goto out; 148 147 /* Couldn't clone, so now we try to copy the data */ 149 - error = 0; 150 148 151 149 /* FIXME: copy up sparse files efficiently */ 152 150 while (len) {

+24 -19

fs/overlayfs/file.c

··· 434 434 OVL_DEDUPE, 435 435 }; 436 436 437 - static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in, 437 + static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, 438 438 struct file *file_out, loff_t pos_out, 439 - u64 len, unsigned int flags, enum ovl_copyop op) 439 + loff_t len, unsigned int flags, enum ovl_copyop op) 440 440 { 441 441 struct inode *inode_out = file_inode(file_out); 442 442 struct fd real_in, real_out; 443 443 const struct cred *old_cred; 444 - ssize_t ret; 444 + loff_t ret; 445 445 446 446 ret = ovl_real_fdget(file_out, &real_out); 447 447 if (ret) ··· 462 462 463 463 case OVL_CLONE: 464 464 ret = vfs_clone_file_range(real_in.file, pos_in, 465 - real_out.file, pos_out, len); 465 + real_out.file, pos_out, len, flags); 466 466 break; 467 467 468 468 case OVL_DEDUPE: 469 469 ret = vfs_dedupe_file_range_one(real_in.file, pos_in, 470 - real_out.file, pos_out, len); 470 + real_out.file, pos_out, len, 471 + flags); 471 472 break; 472 473 } 473 474 revert_creds(old_cred); ··· 490 489 OVL_COPY); 491 490 } 492 491 493 - static int ovl_clone_file_range(struct file *file_in, loff_t pos_in, 494 - struct file *file_out, loff_t pos_out, u64 len) 492 + static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, 493 + struct file *file_out, loff_t pos_out, 494 + loff_t len, unsigned int remap_flags) 495 495 { 496 - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0, 497 - OVL_CLONE); 498 - } 496 + enum ovl_copyop op; 499 497 500 - static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in, 501 - struct file *file_out, loff_t pos_out, u64 len) 502 - { 498 + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 499 + return -EINVAL; 500 + 501 + if (remap_flags & REMAP_FILE_DEDUP) 502 + op = OVL_DEDUPE; 503 + else 504 + op = OVL_CLONE; 505 + 503 506 /* 504 507 * Don't copy up because of a dedupe request, this wouldn't make sense 505 508 * most of the time (data would be duplicated instead of deduplicated). 506 509 */ 507 - if (!ovl_inode_upper(file_inode(file_in)) || 508 - !ovl_inode_upper(file_inode(file_out))) 510 + if (op == OVL_DEDUPE && 511 + (!ovl_inode_upper(file_inode(file_in)) || 512 + !ovl_inode_upper(file_inode(file_out)))) 509 513 return -EPERM; 510 514 511 - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0, 512 - OVL_DEDUPE); 515 + return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 516 + remap_flags, op); 513 517 } 514 518 515 519 const struct file_operations ovl_file_operations = { ··· 531 525 .compat_ioctl = ovl_compat_ioctl, 532 526 533 527 .copy_file_range = ovl_copy_file_range, 534 - .clone_file_range = ovl_clone_file_range, 535 - .dedupe_file_range = ovl_dedupe_file_range, 528 + .remap_file_range = ovl_remap_file_range, 536 529 };

+235 -188

fs/read_write.c

··· 1587 1587 * Try cloning first, this is supported by more file systems, and 1588 1588 * more efficient if both clone and copy are supported (e.g. NFS). 1589 1589 */ 1590 - if (file_in->f_op->clone_file_range) { 1591 - ret = file_in->f_op->clone_file_range(file_in, pos_in, 1592 - file_out, pos_out, len); 1593 - if (ret == 0) { 1594 - ret = len; 1590 + if (file_in->f_op->remap_file_range) { 1591 + loff_t cloned; 1592 + 1593 + cloned = file_in->f_op->remap_file_range(file_in, pos_in, 1594 + file_out, pos_out, 1595 + min_t(loff_t, MAX_RW_COUNT, len), 1596 + REMAP_FILE_CAN_SHORTEN); 1597 + if (cloned > 0) { 1598 + ret = cloned; 1595 1599 goto done; 1596 1600 } 1597 1601 } ··· 1689 1685 return ret; 1690 1686 } 1691 1687 1692 - static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) 1688 + static int remap_verify_area(struct file *file, loff_t pos, loff_t len, 1689 + bool write) 1693 1690 { 1694 1691 struct inode *inode = file_inode(file); 1695 1692 1696 - if (unlikely(pos < 0)) 1693 + if (unlikely(pos < 0 || len < 0)) 1697 1694 return -EINVAL; 1698 1695 1699 1696 if (unlikely((loff_t) (pos + len) < 0)) ··· 1712 1707 1713 1708 return security_file_permission(file, write ? MAY_WRITE : MAY_READ); 1714 1709 } 1715 - 1716 1710 /* 1717 - * Check that the two inodes are eligible for cloning, the ranges make 1718 - * sense, and then flush all dirty data. Caller must ensure that the 1719 - * inodes have been locked against any other modifications. 1711 + * Ensure that we don't remap a partial EOF block in the middle of something 1712 + * else. Assume that the offsets have already been checked for block 1713 + * alignment. 1720 1714 * 1721 - * Returns: 0 for "nothing to clone", 1 for "something to clone", or 1722 - * the usual negative error code. 1715 + * For deduplication we always scale down to the previous block because we 1716 + * can't meaningfully compare post-EOF contents. 1717 + * 1718 + * For clone we only link a partial EOF block above the destination file's EOF. 1719 + * 1720 + * Shorten the request if possible. 1723 1721 */ 1724 - int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, 1725 - struct inode *inode_out, loff_t pos_out, 1726 - u64 *len, bool is_dedupe) 1722 + static int generic_remap_check_len(struct inode *inode_in, 1723 + struct inode *inode_out, 1724 + loff_t pos_out, 1725 + loff_t *len, 1726 + unsigned int remap_flags) 1727 1727 { 1728 - loff_t bs = inode_out->i_sb->s_blocksize; 1729 - loff_t blen; 1730 - loff_t isize; 1731 - bool same_inode = (inode_in == inode_out); 1732 - int ret; 1728 + u64 blkmask = i_blocksize(inode_in) - 1; 1729 + loff_t new_len = *len; 1733 1730 1734 - /* Don't touch certain kinds of inodes */ 1735 - if (IS_IMMUTABLE(inode_out)) 1736 - return -EPERM; 1737 - 1738 - if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) 1739 - return -ETXTBSY; 1740 - 1741 - /* Don't reflink dirs, pipes, sockets... */ 1742 - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 1743 - return -EISDIR; 1744 - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 1745 - return -EINVAL; 1746 - 1747 - /* Are we going all the way to the end? */ 1748 - isize = i_size_read(inode_in); 1749 - if (isize == 0) 1731 + if ((*len & blkmask) == 0) 1750 1732 return 0; 1751 1733 1752 - /* Zero length dedupe exits immediately; reflink goes to EOF. */ 1753 - if (*len == 0) { 1754 - if (is_dedupe || pos_in == isize) 1755 - return 0; 1756 - if (pos_in > isize) 1757 - return -EINVAL; 1758 - *len = isize - pos_in; 1734 + if ((remap_flags & REMAP_FILE_DEDUP) || 1735 + pos_out + *len < i_size_read(inode_out)) 1736 + new_len &= ~blkmask; 1737 + 1738 + if (new_len == *len) 1739 + return 0; 1740 + 1741 + if (remap_flags & REMAP_FILE_CAN_SHORTEN) { 1742 + *len = new_len; 1743 + return 0; 1759 1744 } 1760 1745 1761 - /* Ensure offsets don't wrap and the input is inside i_size */ 1762 - if (pos_in + *len < pos_in || pos_out + *len < pos_out || 1763 - pos_in + *len > isize) 1764 - return -EINVAL; 1765 - 1766 - /* Don't allow dedupe past EOF in the dest file */ 1767 - if (is_dedupe) { 1768 - loff_t disize; 1769 - 1770 - disize = i_size_read(inode_out); 1771 - if (pos_out >= disize || pos_out + *len > disize) 1772 - return -EINVAL; 1773 - } 1774 - 1775 - /* If we're linking to EOF, continue to the block boundary. */ 1776 - if (pos_in + *len == isize) 1777 - blen = ALIGN(isize, bs) - pos_in; 1778 - else 1779 - blen = *len; 1780 - 1781 - /* Only reflink if we're aligned to block boundaries */ 1782 - if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || 1783 - !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) 1784 - return -EINVAL; 1785 - 1786 - /* Don't allow overlapped reflink within the same file */ 1787 - if (same_inode) { 1788 - if (pos_out + blen > pos_in && pos_out < pos_in + blen) 1789 - return -EINVAL; 1790 - } 1791 - 1792 - /* Wait for the completion of any pending IOs on both files */ 1793 - inode_dio_wait(inode_in); 1794 - if (!same_inode) 1795 - inode_dio_wait(inode_out); 1796 - 1797 - ret = filemap_write_and_wait_range(inode_in->i_mapping, 1798 - pos_in, pos_in + *len - 1); 1799 - if (ret) 1800 - return ret; 1801 - 1802 - ret = filemap_write_and_wait_range(inode_out->i_mapping, 1803 - pos_out, pos_out + *len - 1); 1804 - if (ret) 1805 - return ret; 1806 - 1807 - /* 1808 - * Check that the extents are the same. 1809 - */ 1810 - if (is_dedupe) { 1811 - bool is_same = false; 1812 - 1813 - ret = vfs_dedupe_file_range_compare(inode_in, pos_in, 1814 - inode_out, pos_out, *len, &is_same); 1815 - if (ret) 1816 - return ret; 1817 - if (!is_same) 1818 - return -EBADE; 1819 - } 1820 - 1821 - return 1; 1746 + return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL; 1822 1747 } 1823 - EXPORT_SYMBOL(vfs_clone_file_prep_inodes); 1824 - 1825 - int do_clone_file_range(struct file *file_in, loff_t pos_in, 1826 - struct file *file_out, loff_t pos_out, u64 len) 1827 - { 1828 - struct inode *inode_in = file_inode(file_in); 1829 - struct inode *inode_out = file_inode(file_out); 1830 - int ret; 1831 - 1832 - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 1833 - return -EISDIR; 1834 - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 1835 - return -EINVAL; 1836 - 1837 - /* 1838 - * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on 1839 - * the same mount. Practically, they only need to be on the same file 1840 - * system. 1841 - */ 1842 - if (inode_in->i_sb != inode_out->i_sb) 1843 - return -EXDEV; 1844 - 1845 - if (!(file_in->f_mode & FMODE_READ) || 1846 - !(file_out->f_mode & FMODE_WRITE) || 1847 - (file_out->f_flags & O_APPEND)) 1848 - return -EBADF; 1849 - 1850 - if (!file_in->f_op->clone_file_range) 1851 - return -EOPNOTSUPP; 1852 - 1853 - ret = clone_verify_area(file_in, pos_in, len, false); 1854 - if (ret) 1855 - return ret; 1856 - 1857 - ret = clone_verify_area(file_out, pos_out, len, true); 1858 - if (ret) 1859 - return ret; 1860 - 1861 - if (pos_in + len > i_size_read(inode_in)) 1862 - return -EINVAL; 1863 - 1864 - ret = file_in->f_op->clone_file_range(file_in, pos_in, 1865 - file_out, pos_out, len); 1866 - if (!ret) { 1867 - fsnotify_access(file_in); 1868 - fsnotify_modify(file_out); 1869 - } 1870 - 1871 - return ret; 1872 - } 1873 - EXPORT_SYMBOL(do_clone_file_range); 1874 - 1875 - int vfs_clone_file_range(struct file *file_in, loff_t pos_in, 1876 - struct file *file_out, loff_t pos_out, u64 len) 1877 - { 1878 - int ret; 1879 - 1880 - file_start_write(file_out); 1881 - ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len); 1882 - file_end_write(file_out); 1883 - 1884 - return ret; 1885 - } 1886 - EXPORT_SYMBOL(vfs_clone_file_range); 1887 1748 1888 1749 /* 1889 1750 * Read a page's worth of file data into the page cache. Return the page ··· 1757 1886 */ 1758 1887 static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset) 1759 1888 { 1760 - struct address_space *mapping; 1761 1889 struct page *page; 1762 - pgoff_t n; 1763 1890 1764 - n = offset >> PAGE_SHIFT; 1765 - mapping = inode->i_mapping; 1766 - page = read_mapping_page(mapping, n, NULL); 1891 + page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL); 1767 1892 if (IS_ERR(page)) 1768 1893 return page; 1769 1894 if (!PageUptodate(page)) { ··· 1774 1907 * Compare extents of two files to see if they are the same. 1775 1908 * Caller must have locked both inodes to prevent write races. 1776 1909 */ 1777 - int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, 1778 - struct inode *dest, loff_t destoff, 1779 - loff_t len, bool *is_same) 1910 + static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, 1911 + struct inode *dest, loff_t destoff, 1912 + loff_t len, bool *is_same) 1780 1913 { 1781 1914 loff_t src_poff; 1782 1915 loff_t dest_poff; ··· 1841 1974 out_error: 1842 1975 return error; 1843 1976 } 1844 - EXPORT_SYMBOL(vfs_dedupe_file_range_compare); 1977 + 1978 + /* 1979 + * Check that the two inodes are eligible for cloning, the ranges make 1980 + * sense, and then flush all dirty data. Caller must ensure that the 1981 + * inodes have been locked against any other modifications. 1982 + * 1983 + * If there's an error, then the usual negative error code is returned. 1984 + * Otherwise returns 0 with *len set to the request length. 1985 + */ 1986 + int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, 1987 + struct file *file_out, loff_t pos_out, 1988 + loff_t *len, unsigned int remap_flags) 1989 + { 1990 + struct inode *inode_in = file_inode(file_in); 1991 + struct inode *inode_out = file_inode(file_out); 1992 + bool same_inode = (inode_in == inode_out); 1993 + int ret; 1994 + 1995 + /* Don't touch certain kinds of inodes */ 1996 + if (IS_IMMUTABLE(inode_out)) 1997 + return -EPERM; 1998 + 1999 + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) 2000 + return -ETXTBSY; 2001 + 2002 + /* Don't reflink dirs, pipes, sockets... */ 2003 + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 2004 + return -EISDIR; 2005 + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 2006 + return -EINVAL; 2007 + 2008 + /* Zero length dedupe exits immediately; reflink goes to EOF. */ 2009 + if (*len == 0) { 2010 + loff_t isize = i_size_read(inode_in); 2011 + 2012 + if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize) 2013 + return 0; 2014 + if (pos_in > isize) 2015 + return -EINVAL; 2016 + *len = isize - pos_in; 2017 + if (*len == 0) 2018 + return 0; 2019 + } 2020 + 2021 + /* Check that we don't violate system file offset limits. */ 2022 + ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len, 2023 + remap_flags); 2024 + if (ret) 2025 + return ret; 2026 + 2027 + /* Wait for the completion of any pending IOs on both files */ 2028 + inode_dio_wait(inode_in); 2029 + if (!same_inode) 2030 + inode_dio_wait(inode_out); 2031 + 2032 + ret = filemap_write_and_wait_range(inode_in->i_mapping, 2033 + pos_in, pos_in + *len - 1); 2034 + if (ret) 2035 + return ret; 2036 + 2037 + ret = filemap_write_and_wait_range(inode_out->i_mapping, 2038 + pos_out, pos_out + *len - 1); 2039 + if (ret) 2040 + return ret; 2041 + 2042 + /* 2043 + * Check that the extents are the same. 2044 + */ 2045 + if (remap_flags & REMAP_FILE_DEDUP) { 2046 + bool is_same = false; 2047 + 2048 + ret = vfs_dedupe_file_range_compare(inode_in, pos_in, 2049 + inode_out, pos_out, *len, &is_same); 2050 + if (ret) 2051 + return ret; 2052 + if (!is_same) 2053 + return -EBADE; 2054 + } 2055 + 2056 + ret = generic_remap_check_len(inode_in, inode_out, pos_out, len, 2057 + remap_flags); 2058 + if (ret) 2059 + return ret; 2060 + 2061 + /* If can't alter the file contents, we're done. */ 2062 + if (!(remap_flags & REMAP_FILE_DEDUP)) { 2063 + /* Update the timestamps, since we can alter file contents. */ 2064 + if (!(file_out->f_mode & FMODE_NOCMTIME)) { 2065 + ret = file_update_time(file_out); 2066 + if (ret) 2067 + return ret; 2068 + } 2069 + 2070 + /* 2071 + * Clear the security bits if the process is not being run by 2072 + * root. This keeps people from modifying setuid and setgid 2073 + * binaries. 2074 + */ 2075 + ret = file_remove_privs(file_out); 2076 + if (ret) 2077 + return ret; 2078 + } 2079 + 2080 + return 0; 2081 + } 2082 + EXPORT_SYMBOL(generic_remap_file_range_prep); 2083 + 2084 + loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, 2085 + struct file *file_out, loff_t pos_out, 2086 + loff_t len, unsigned int remap_flags) 2087 + { 2088 + struct inode *inode_in = file_inode(file_in); 2089 + struct inode *inode_out = file_inode(file_out); 2090 + loff_t ret; 2091 + 2092 + WARN_ON_ONCE(remap_flags); 2093 + 2094 + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 2095 + return -EISDIR; 2096 + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 2097 + return -EINVAL; 2098 + 2099 + /* 2100 + * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on 2101 + * the same mount. Practically, they only need to be on the same file 2102 + * system. 2103 + */ 2104 + if (inode_in->i_sb != inode_out->i_sb) 2105 + return -EXDEV; 2106 + 2107 + if (!(file_in->f_mode & FMODE_READ) || 2108 + !(file_out->f_mode & FMODE_WRITE) || 2109 + (file_out->f_flags & O_APPEND)) 2110 + return -EBADF; 2111 + 2112 + if (!file_in->f_op->remap_file_range) 2113 + return -EOPNOTSUPP; 2114 + 2115 + ret = remap_verify_area(file_in, pos_in, len, false); 2116 + if (ret) 2117 + return ret; 2118 + 2119 + ret = remap_verify_area(file_out, pos_out, len, true); 2120 + if (ret) 2121 + return ret; 2122 + 2123 + ret = file_in->f_op->remap_file_range(file_in, pos_in, 2124 + file_out, pos_out, len, remap_flags); 2125 + if (ret < 0) 2126 + return ret; 2127 + 2128 + fsnotify_access(file_in); 2129 + fsnotify_modify(file_out); 2130 + return ret; 2131 + } 2132 + EXPORT_SYMBOL(do_clone_file_range); 2133 + 2134 + loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, 2135 + struct file *file_out, loff_t pos_out, 2136 + loff_t len, unsigned int remap_flags) 2137 + { 2138 + loff_t ret; 2139 + 2140 + file_start_write(file_out); 2141 + ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len, 2142 + remap_flags); 2143 + file_end_write(file_out); 2144 + 2145 + return ret; 2146 + } 2147 + EXPORT_SYMBOL(vfs_clone_file_range); 1845 2148 1846 2149 /* Check whether we are allowed to dedupe the destination file */ 1847 2150 static bool allow_file_dedupe(struct file *file) ··· 2027 1990 return false; 2028 1991 } 2029 1992 2030 - int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, 2031 - struct file *dst_file, loff_t dst_pos, u64 len) 1993 + loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, 1994 + struct file *dst_file, loff_t dst_pos, 1995 + loff_t len, unsigned int remap_flags) 2032 1996 { 2033 - s64 ret; 1997 + loff_t ret; 1998 + 1999 + WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP | 2000 + REMAP_FILE_CAN_SHORTEN)); 2034 2001 2035 2002 ret = mnt_want_write_file(dst_file); 2036 2003 if (ret) 2037 2004 return ret; 2038 2005 2039 - ret = clone_verify_area(dst_file, dst_pos, len, true); 2006 + ret = remap_verify_area(dst_file, dst_pos, len, true); 2040 2007 if (ret < 0) 2041 2008 goto out_drop_write; 2042 2009 ··· 2057 2016 goto out_drop_write; 2058 2017 2059 2018 ret = -EINVAL; 2060 - if (!dst_file->f_op->dedupe_file_range) 2019 + if (!dst_file->f_op->remap_file_range) 2061 2020 goto out_drop_write; 2062 2021 2063 - ret = dst_file->f_op->dedupe_file_range(src_file, src_pos, 2064 - dst_file, dst_pos, len); 2022 + if (len == 0) { 2023 + ret = 0; 2024 + goto out_drop_write; 2025 + } 2026 + 2027 + ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file, 2028 + dst_pos, len, remap_flags | REMAP_FILE_DEDUP); 2065 2029 out_drop_write: 2066 2030 mnt_drop_write_file(dst_file); 2067 2031 ··· 2083 2037 int i; 2084 2038 int ret; 2085 2039 u16 count = same->dest_count; 2086 - int deduped; 2040 + loff_t deduped; 2087 2041 2088 2042 if (!(file->f_mode & FMODE_READ)) 2089 2043 return -EINVAL; ··· 2102 2056 if (!S_ISREG(src->i_mode)) 2103 2057 goto out; 2104 2058 2105 - ret = clone_verify_area(file, off, len, false); 2059 + ret = remap_verify_area(file, off, len, false); 2106 2060 if (ret < 0) 2107 2061 goto out; 2108 2062 ret = 0; ··· 2134 2088 } 2135 2089 2136 2090 deduped = vfs_dedupe_file_range_one(file, off, dst_file, 2137 - info->dest_offset, len); 2091 + info->dest_offset, len, 2092 + REMAP_FILE_CAN_SHORTEN); 2138 2093 if (deduped == -EBADE) 2139 2094 info->status = FILE_DEDUPE_RANGE_DIFFERS; 2140 2095 else if (deduped < 0)

+60 -22

fs/xfs/xfs_file.c

··· 919 919 return error; 920 920 } 921 921 922 - STATIC int 923 - xfs_file_clone_range( 924 - struct file *file_in, 925 - loff_t pos_in, 926 - struct file *file_out, 927 - loff_t pos_out, 928 - u64 len) 929 - { 930 - return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, 931 - len, false); 932 - } 933 922 934 - STATIC int 935 - xfs_file_dedupe_range( 936 - struct file *file_in, 937 - loff_t pos_in, 938 - struct file *file_out, 939 - loff_t pos_out, 940 - u64 len) 923 + loff_t 924 + xfs_file_remap_range( 925 + struct file *file_in, 926 + loff_t pos_in, 927 + struct file *file_out, 928 + loff_t pos_out, 929 + loff_t len, 930 + unsigned int remap_flags) 941 931 { 942 - return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, 943 - len, true); 932 + struct inode *inode_in = file_inode(file_in); 933 + struct xfs_inode *src = XFS_I(inode_in); 934 + struct inode *inode_out = file_inode(file_out); 935 + struct xfs_inode *dest = XFS_I(inode_out); 936 + struct xfs_mount *mp = src->i_mount; 937 + loff_t remapped = 0; 938 + xfs_extlen_t cowextsize; 939 + int ret; 940 + 941 + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 942 + return -EINVAL; 943 + 944 + if (!xfs_sb_version_hasreflink(&mp->m_sb)) 945 + return -EOPNOTSUPP; 946 + 947 + if (XFS_FORCED_SHUTDOWN(mp)) 948 + return -EIO; 949 + 950 + /* Prepare and then clone file data. */ 951 + ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, 952 + &len, remap_flags); 953 + if (ret < 0 || len == 0) 954 + return ret; 955 + 956 + trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 957 + 958 + ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len, 959 + &remapped); 960 + if (ret) 961 + goto out_unlock; 962 + 963 + /* 964 + * Carry the cowextsize hint from src to dest if we're sharing the 965 + * entire source file to the entire destination file, the source file 966 + * has a cowextsize hint, and the destination file does not. 967 + */ 968 + cowextsize = 0; 969 + if (pos_in == 0 && len == i_size_read(inode_in) && 970 + (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && 971 + pos_out == 0 && len >= i_size_read(inode_out) && 972 + !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) 973 + cowextsize = src->i_d.di_cowextsize; 974 + 975 + ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, 976 + remap_flags); 977 + 978 + out_unlock: 979 + xfs_reflink_remap_unlock(file_in, file_out); 980 + if (ret) 981 + trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); 982 + return remapped > 0 ? remapped : ret; 944 983 } 945 984 946 985 STATIC int ··· 1214 1175 .fsync = xfs_file_fsync, 1215 1176 .get_unmapped_area = thp_get_unmapped_area, 1216 1177 .fallocate = xfs_file_fallocate, 1217 - .clone_file_range = xfs_file_clone_range, 1218 - .dedupe_file_range = xfs_file_dedupe_range, 1178 + .remap_file_range = xfs_file_remap_range, 1219 1179 }; 1220 1180 1221 1181 const struct file_operations xfs_dir_file_operations = {

+36 -137

fs/xfs/xfs_reflink.c

··· 913 913 /* 914 914 * Update destination inode size & cowextsize hint, if necessary. 915 915 */ 916 - STATIC int 916 + int 917 917 xfs_reflink_update_dest( 918 918 struct xfs_inode *dest, 919 919 xfs_off_t newlen, 920 920 xfs_extlen_t cowextsize, 921 - bool is_dedupe) 921 + unsigned int remap_flags) 922 922 { 923 923 struct xfs_mount *mp = dest->i_mount; 924 924 struct xfs_trans *tp; 925 925 int error; 926 926 927 - if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) 927 + if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) 928 928 return 0; 929 929 930 930 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); ··· 945 945 dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; 946 946 } 947 947 948 - if (!is_dedupe) { 949 - xfs_trans_ichgtime(tp, dest, 950 - XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 951 - } 952 948 xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); 953 949 954 950 error = xfs_trans_commit(tp); ··· 1108 1112 /* 1109 1113 * Iteratively remap one file's extents (and holes) to another's. 1110 1114 */ 1111 - STATIC int 1115 + int 1112 1116 xfs_reflink_remap_blocks( 1113 1117 struct xfs_inode *src, 1114 - xfs_fileoff_t srcoff, 1118 + loff_t pos_in, 1115 1119 struct xfs_inode *dest, 1116 - xfs_fileoff_t destoff, 1117 - xfs_filblks_t len, 1118 - xfs_off_t new_isize) 1120 + loff_t pos_out, 1121 + loff_t remap_len, 1122 + loff_t *remapped) 1119 1123 { 1120 1124 struct xfs_bmbt_irec imap; 1125 + xfs_fileoff_t srcoff; 1126 + xfs_fileoff_t destoff; 1127 + xfs_filblks_t len; 1128 + xfs_filblks_t range_len; 1129 + xfs_filblks_t remapped_len = 0; 1130 + xfs_off_t new_isize = pos_out + remap_len; 1121 1131 int nimaps; 1122 1132 int error = 0; 1123 - xfs_filblks_t range_len; 1133 + 1134 + destoff = XFS_B_TO_FSBT(src->i_mount, pos_out); 1135 + srcoff = XFS_B_TO_FSBT(src->i_mount, pos_in); 1136 + len = XFS_B_TO_FSB(src->i_mount, remap_len); 1124 1137 1125 1138 /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ 1126 1139 while (len) { ··· 1144 1139 error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); 1145 1140 xfs_iunlock(src, lock_mode); 1146 1141 if (error) 1147 - goto err; 1142 + break; 1148 1143 ASSERT(nimaps == 1); 1149 1144 1150 1145 trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE, ··· 1158 1153 error = xfs_reflink_remap_extent(dest, &imap, destoff, 1159 1154 new_isize); 1160 1155 if (error) 1161 - goto err; 1156 + break; 1162 1157 1163 1158 if (fatal_signal_pending(current)) { 1164 1159 error = -EINTR; 1165 - goto err; 1160 + break; 1166 1161 } 1167 1162 1168 1163 /* Advance drange/srange */ 1169 1164 srcoff += range_len; 1170 1165 destoff += range_len; 1171 1166 len -= range_len; 1167 + remapped_len += range_len; 1172 1168 } 1173 1169 1174 - return 0; 1175 - 1176 - err: 1177 - trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_); 1170 + if (error) 1171 + trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_); 1172 + *remapped = min_t(loff_t, remap_len, 1173 + XFS_FSB_TO_B(src->i_mount, remapped_len)); 1178 1174 return error; 1179 1175 } 1180 1176 ··· 1224 1218 } 1225 1219 1226 1220 /* Unlock both inodes after they've been prepped for a range clone. */ 1227 - STATIC void 1221 + void 1228 1222 xfs_reflink_remap_unlock( 1229 1223 struct file *file_in, 1230 1224 struct file *file_out) ··· 1292 1286 * stale data in the destination file. Hence we reject these clone attempts with 1293 1287 * -EINVAL in this case. 1294 1288 */ 1295 - STATIC int 1289 + int 1296 1290 xfs_reflink_remap_prep( 1297 1291 struct file *file_in, 1298 1292 loff_t pos_in, 1299 1293 struct file *file_out, 1300 1294 loff_t pos_out, 1301 - u64 *len, 1302 - bool is_dedupe) 1295 + loff_t *len, 1296 + unsigned int remap_flags) 1303 1297 { 1304 1298 struct inode *inode_in = file_inode(file_in); 1305 1299 struct xfs_inode *src = XFS_I(inode_in); 1306 1300 struct inode *inode_out = file_inode(file_out); 1307 1301 struct xfs_inode *dest = XFS_I(inode_out); 1308 1302 bool same_inode = (inode_in == inode_out); 1309 - u64 blkmask = i_blocksize(inode_in) - 1; 1310 1303 ssize_t ret; 1311 1304 1312 1305 /* Lock both files against IO */ ··· 1328 1323 if (IS_DAX(inode_in) || IS_DAX(inode_out)) 1329 1324 goto out_unlock; 1330 1325 1331 - ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, 1332 - len, is_dedupe); 1333 - if (ret <= 0) 1326 + ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, 1327 + len, remap_flags); 1328 + if (ret < 0 || *len == 0) 1334 1329 goto out_unlock; 1335 - 1336 - /* 1337 - * If the dedupe data matches, chop off the partial EOF block 1338 - * from the source file so we don't try to dedupe the partial 1339 - * EOF block. 1340 - */ 1341 - if (is_dedupe) { 1342 - *len &= ~blkmask; 1343 - } else if (*len & blkmask) { 1344 - /* 1345 - * The user is attempting to share a partial EOF block, 1346 - * if it's inside the destination EOF then reject it. 1347 - */ 1348 - if (pos_out + *len < i_size_read(inode_out)) { 1349 - ret = -EINVAL; 1350 - goto out_unlock; 1351 - } 1352 - } 1353 1330 1354 1331 /* Attach dquots to dest inode before changing block map */ 1355 1332 ret = xfs_qm_dqattach(dest); ··· 1352 1365 goto out_unlock; 1353 1366 1354 1367 /* Zap any page cache for the destination file's range. */ 1355 - truncate_inode_pages_range(&inode_out->i_data, pos_out, 1356 - PAGE_ALIGN(pos_out + *len) - 1); 1357 - 1358 - /* If we're altering the file contents... */ 1359 - if (!is_dedupe) { 1360 - /* 1361 - * ...update the timestamps (which will grab the ilock again 1362 - * from xfs_fs_dirty_inode, so we have to call it before we 1363 - * take the ilock). 1364 - */ 1365 - if (!(file_out->f_mode & FMODE_NOCMTIME)) { 1366 - ret = file_update_time(file_out); 1367 - if (ret) 1368 - goto out_unlock; 1369 - } 1370 - 1371 - /* 1372 - * ...clear the security bits if the process is not being run 1373 - * by root. This keeps people from modifying setuid and setgid 1374 - * binaries. 1375 - */ 1376 - ret = file_remove_privs(file_out); 1377 - if (ret) 1378 - goto out_unlock; 1379 - } 1368 + truncate_inode_pages_range(&inode_out->i_data, 1369 + round_down(pos_out, PAGE_SIZE), 1370 + round_up(pos_out + *len, PAGE_SIZE) - 1); 1380 1371 1381 1372 return 1; 1382 1373 out_unlock: 1383 1374 xfs_reflink_remap_unlock(file_in, file_out); 1384 - return ret; 1385 - } 1386 - 1387 - /* 1388 - * Link a range of blocks from one file to another. 1389 - */ 1390 - int 1391 - xfs_reflink_remap_range( 1392 - struct file *file_in, 1393 - loff_t pos_in, 1394 - struct file *file_out, 1395 - loff_t pos_out, 1396 - u64 len, 1397 - bool is_dedupe) 1398 - { 1399 - struct inode *inode_in = file_inode(file_in); 1400 - struct xfs_inode *src = XFS_I(inode_in); 1401 - struct inode *inode_out = file_inode(file_out); 1402 - struct xfs_inode *dest = XFS_I(inode_out); 1403 - struct xfs_mount *mp = src->i_mount; 1404 - xfs_fileoff_t sfsbno, dfsbno; 1405 - xfs_filblks_t fsblen; 1406 - xfs_extlen_t cowextsize; 1407 - ssize_t ret; 1408 - 1409 - if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1410 - return -EOPNOTSUPP; 1411 - 1412 - if (XFS_FORCED_SHUTDOWN(mp)) 1413 - return -EIO; 1414 - 1415 - /* Prepare and then clone file data. */ 1416 - ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, 1417 - &len, is_dedupe); 1418 - if (ret <= 0) 1419 - return ret; 1420 - 1421 - trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 1422 - 1423 - dfsbno = XFS_B_TO_FSBT(mp, pos_out); 1424 - sfsbno = XFS_B_TO_FSBT(mp, pos_in); 1425 - fsblen = XFS_B_TO_FSB(mp, len); 1426 - ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, 1427 - pos_out + len); 1428 - if (ret) 1429 - goto out_unlock; 1430 - 1431 - /* 1432 - * Carry the cowextsize hint from src to dest if we're sharing the 1433 - * entire source file to the entire destination file, the source file 1434 - * has a cowextsize hint, and the destination file does not. 1435 - */ 1436 - cowextsize = 0; 1437 - if (pos_in == 0 && len == i_size_read(inode_in) && 1438 - (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && 1439 - pos_out == 0 && len >= i_size_read(inode_out) && 1440 - !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) 1441 - cowextsize = src->i_d.di_cowextsize; 1442 - 1443 - ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, 1444 - is_dedupe); 1445 - 1446 - out_unlock: 1447 - xfs_reflink_remap_unlock(file_in, file_out); 1448 - if (ret) 1449 - trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); 1450 1375 return ret; 1451 1376 } 1452 1377

+13 -2

fs/xfs/xfs_reflink.h

··· 27 27 extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, 28 28 xfs_off_t count); 29 29 extern int xfs_reflink_recover_cow(struct xfs_mount *mp); 30 - extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, 31 - struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe); 30 + extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, 31 + struct file *file_out, loff_t pos_out, loff_t len, 32 + unsigned int remap_flags); 32 33 extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp, 33 34 struct xfs_inode *ip, bool *has_shared); 34 35 extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, 35 36 struct xfs_trans **tpp); 36 37 extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, 37 38 xfs_off_t len); 39 + extern int xfs_reflink_remap_prep(struct file *file_in, loff_t pos_in, 40 + struct file *file_out, loff_t pos_out, loff_t *len, 41 + unsigned int remap_flags); 42 + extern int xfs_reflink_remap_blocks(struct xfs_inode *src, loff_t pos_in, 43 + struct xfs_inode *dest, loff_t pos_out, loff_t remap_len, 44 + loff_t *remapped); 45 + extern int xfs_reflink_update_dest(struct xfs_inode *dest, xfs_off_t newlen, 46 + xfs_extlen_t cowextsize, unsigned int remap_flags); 47 + extern void xfs_reflink_remap_unlock(struct file *file_in, 48 + struct file *file_out); 38 49 39 50 #endif /* __XFS_REFLINK_H */

+38 -17

include/linux/fs.h

··· 1752 1752 #define NOMMU_VMFLAGS \ 1753 1753 (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC) 1754 1754 1755 + /* 1756 + * These flags control the behavior of the remap_file_range function pointer. 1757 + * If it is called with len == 0 that means "remap to end of source file". 1758 + * See Documentation/filesystems/vfs.txt for more details about this call. 1759 + * 1760 + * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate) 1761 + * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request 1762 + */ 1763 + #define REMAP_FILE_DEDUP (1 << 0) 1764 + #define REMAP_FILE_CAN_SHORTEN (1 << 1) 1765 + 1766 + /* 1767 + * These flags signal that the caller is ok with altering various aspects of 1768 + * the behavior of the remap operation. The changes must be made by the 1769 + * implementation; the vfs remap helper functions can take advantage of them. 1770 + * Flags in this category exist to preserve the quirky behavior of the hoisted 1771 + * btrfs clone/dedupe ioctls. 1772 + */ 1773 + #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) 1755 1774 1756 1775 struct iov_iter; 1757 1776 ··· 1809 1790 #endif 1810 1791 ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, 1811 1792 loff_t, size_t, unsigned int); 1812 - int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, 1813 - u64); 1814 - int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, 1815 - u64); 1793 + loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, 1794 + struct file *file_out, loff_t pos_out, 1795 + loff_t len, unsigned int remap_flags); 1816 1796 int (*fadvise)(struct file *, loff_t, loff_t, int); 1817 1797 } __randomize_layout; 1818 1798 ··· 1874 1856 unsigned long, loff_t *, rwf_t); 1875 1857 extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, 1876 1858 loff_t, size_t, unsigned int); 1877 - extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, 1878 - struct inode *inode_out, loff_t pos_out, 1879 - u64 *len, bool is_dedupe); 1880 - extern int do_clone_file_range(struct file *file_in, loff_t pos_in, 1881 - struct file *file_out, loff_t pos_out, u64 len); 1882 - extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, 1883 - struct file *file_out, loff_t pos_out, u64 len); 1884 - extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, 1885 - struct inode *dest, loff_t destoff, 1886 - loff_t len, bool *is_same); 1859 + extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, 1860 + struct file *file_out, loff_t pos_out, 1861 + loff_t *count, 1862 + unsigned int remap_flags); 1863 + extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, 1864 + struct file *file_out, loff_t pos_out, 1865 + loff_t len, unsigned int remap_flags); 1866 + extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, 1867 + struct file *file_out, loff_t pos_out, 1868 + loff_t len, unsigned int remap_flags); 1887 1869 extern int vfs_dedupe_file_range(struct file *file, 1888 1870 struct file_dedupe_range *same); 1889 - extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, 1890 - struct file *dst_file, loff_t dst_pos, 1891 - u64 len); 1871 + extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, 1872 + struct file *dst_file, loff_t dst_pos, 1873 + loff_t len, unsigned int remap_flags); 1892 1874 1893 1875 1894 1876 struct super_operations { ··· 3016 2998 extern int generic_file_mmap(struct file *, struct vm_area_struct *); 3017 2999 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); 3018 3000 extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); 3001 + extern int generic_remap_checks(struct file *file_in, loff_t pos_in, 3002 + struct file *file_out, loff_t pos_out, 3003 + loff_t *count, unsigned int remap_flags); 3019 3004 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); 3020 3005 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); 3021 3006 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);

+124 -32

mm/filemap.c

··· 2825 2825 EXPORT_SYMBOL(read_cache_page_gfp); 2826 2826 2827 2827 /* 2828 + * Don't operate on ranges the page cache doesn't support, and don't exceed the 2829 + * LFS limits. If pos is under the limit it becomes a short access. If it 2830 + * exceeds the limit we return -EFBIG. 2831 + */ 2832 + static int generic_access_check_limits(struct file *file, loff_t pos, 2833 + loff_t *count) 2834 + { 2835 + struct inode *inode = file->f_mapping->host; 2836 + loff_t max_size = inode->i_sb->s_maxbytes; 2837 + 2838 + if (!(file->f_flags & O_LARGEFILE)) 2839 + max_size = MAX_NON_LFS; 2840 + 2841 + if (unlikely(pos >= max_size)) 2842 + return -EFBIG; 2843 + *count = min(*count, max_size - pos); 2844 + return 0; 2845 + } 2846 + 2847 + static int generic_write_check_limits(struct file *file, loff_t pos, 2848 + loff_t *count) 2849 + { 2850 + loff_t limit = rlimit(RLIMIT_FSIZE); 2851 + 2852 + if (limit != RLIM_INFINITY) { 2853 + if (pos >= limit) { 2854 + send_sig(SIGXFSZ, current, 0); 2855 + return -EFBIG; 2856 + } 2857 + *count = min(*count, limit - pos); 2858 + } 2859 + 2860 + return generic_access_check_limits(file, pos, count); 2861 + } 2862 + 2863 + /* 2828 2864 * Performs necessary checks before doing a write 2829 2865 * 2830 2866 * Can adjust writing position or amount of bytes to write. ··· 2871 2835 { 2872 2836 struct file *file = iocb->ki_filp; 2873 2837 struct inode *inode = file->f_mapping->host; 2874 - unsigned long limit = rlimit(RLIMIT_FSIZE); 2875 - loff_t pos; 2838 + loff_t count; 2839 + int ret; 2876 2840 2877 2841 if (!iov_iter_count(from)) 2878 2842 return 0; ··· 2881 2845 if (iocb->ki_flags & IOCB_APPEND) 2882 2846 iocb->ki_pos = i_size_read(inode); 2883 2847 2884 - pos = iocb->ki_pos; 2885 - 2886 2848 if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) 2887 2849 return -EINVAL; 2888 2850 2889 - if (limit != RLIM_INFINITY) { 2890 - if (iocb->ki_pos >= limit) { 2891 - send_sig(SIGXFSZ, current, 0); 2892 - return -EFBIG; 2893 - } 2894 - iov_iter_truncate(from, limit - (unsigned long)pos); 2895 - } 2851 + count = iov_iter_count(from); 2852 + ret = generic_write_check_limits(file, iocb->ki_pos, &count); 2853 + if (ret) 2854 + return ret; 2896 2855 2897 - /* 2898 - * LFS rule 2899 - */ 2900 - if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS && 2901 - !(file->f_flags & O_LARGEFILE))) { 2902 - if (pos >= MAX_NON_LFS) 2903 - return -EFBIG; 2904 - iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos); 2905 - } 2906 - 2907 - /* 2908 - * Are we about to exceed the fs block limit ? 2909 - * 2910 - * If we have written data it becomes a short write. If we have 2911 - * exceeded without writing data we send a signal and return EFBIG. 2912 - * Linus frestrict idea will clean these up nicely.. 2913 - */ 2914 - if (unlikely(pos >= inode->i_sb->s_maxbytes)) 2915 - return -EFBIG; 2916 - 2917 - iov_iter_truncate(from, inode->i_sb->s_maxbytes - pos); 2856 + iov_iter_truncate(from, count); 2918 2857 return iov_iter_count(from); 2919 2858 } 2920 2859 EXPORT_SYMBOL(generic_write_checks); 2860 + 2861 + /* 2862 + * Performs necessary checks before doing a clone. 2863 + * 2864 + * Can adjust amount of bytes to clone. 2865 + * Returns appropriate error code that caller should return or 2866 + * zero in case the clone should be allowed. 2867 + */ 2868 + int generic_remap_checks(struct file *file_in, loff_t pos_in, 2869 + struct file *file_out, loff_t pos_out, 2870 + loff_t *req_count, unsigned int remap_flags) 2871 + { 2872 + struct inode *inode_in = file_in->f_mapping->host; 2873 + struct inode *inode_out = file_out->f_mapping->host; 2874 + uint64_t count = *req_count; 2875 + uint64_t bcount; 2876 + loff_t size_in, size_out; 2877 + loff_t bs = inode_out->i_sb->s_blocksize; 2878 + int ret; 2879 + 2880 + /* The start of both ranges must be aligned to an fs block. */ 2881 + if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs)) 2882 + return -EINVAL; 2883 + 2884 + /* Ensure offsets don't wrap. */ 2885 + if (pos_in + count < pos_in || pos_out + count < pos_out) 2886 + return -EINVAL; 2887 + 2888 + size_in = i_size_read(inode_in); 2889 + size_out = i_size_read(inode_out); 2890 + 2891 + /* Dedupe requires both ranges to be within EOF. */ 2892 + if ((remap_flags & REMAP_FILE_DEDUP) && 2893 + (pos_in >= size_in || pos_in + count > size_in || 2894 + pos_out >= size_out || pos_out + count > size_out)) 2895 + return -EINVAL; 2896 + 2897 + /* Ensure the infile range is within the infile. */ 2898 + if (pos_in >= size_in) 2899 + return -EINVAL; 2900 + count = min(count, size_in - (uint64_t)pos_in); 2901 + 2902 + ret = generic_access_check_limits(file_in, pos_in, &count); 2903 + if (ret) 2904 + return ret; 2905 + 2906 + ret = generic_write_check_limits(file_out, pos_out, &count); 2907 + if (ret) 2908 + return ret; 2909 + 2910 + /* 2911 + * If the user wanted us to link to the infile's EOF, round up to the 2912 + * next block boundary for this check. 2913 + * 2914 + * Otherwise, make sure the count is also block-aligned, having 2915 + * already confirmed the starting offsets' block alignment. 2916 + */ 2917 + if (pos_in + count == size_in) { 2918 + bcount = ALIGN(size_in, bs) - pos_in; 2919 + } else { 2920 + if (!IS_ALIGNED(count, bs)) 2921 + count = ALIGN_DOWN(count, bs); 2922 + bcount = count; 2923 + } 2924 + 2925 + /* Don't allow overlapped cloning within the same file. */ 2926 + if (inode_in == inode_out && 2927 + pos_out + bcount > pos_in && 2928 + pos_out < pos_in + bcount) 2929 + return -EINVAL; 2930 + 2931 + /* 2932 + * We shortened the request but the caller can't deal with that, so 2933 + * bounce the request back to userspace. 2934 + */ 2935 + if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN)) 2936 + return -EINVAL; 2937 + 2938 + *req_count = count; 2939 + return 0; 2940 + } 2921 2941 2922 2942 int pagecache_write_begin(struct file *file, struct address_space *mapping, 2923 2943 loff_t pos, unsigned len, unsigned flags,

Configure Feed

Configure Feed