Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

fs/ntfs3: add delayed-allocation (delalloc) support

This patch implements delayed allocation (delalloc) in ntfs3 driver.

It introduces an in-memory delayed-runlist (run_da) and the helpers to
track, reserve and later convert those delayed reservations into real
clusters at writeback time. The change keeps on-disk formats untouched and
focuses on pagecache integration, correctness and safe interaction with
fallocate, truncate, and dio/iomap paths.

Key points:

- add run_da (delay-allocated run tree) and bookkeeping for delayed clusters.

- mark ranges as delalloc (DELALLOC_LCN) instead of immediately allocating.
Actual allocation performed later (writeback / attr_set_size_ex / explicit
flush paths).

- direct i/o / iomap paths updated to avoid dio collisions with
delalloc: dio falls back or forces allocation of delayed blocks before
proceeding.

- punch/collapse/truncate/fallocate check and cancel delay-alloc reservations.
Sparse/compressed files handled specially.

- free-space checks updated (ntfs_check_free_space) to account for reserved
delalloc clusters and MFT record budgeting.

- delayed allocations are committed on last writer (file release) and on
explicit allocation flush paths.

Tested-by: syzbot@syzkaller.appspotmail.com
Reported-by: syzbot+2bd8e813c7f767aa9bb1@syzkaller.appspotmail.com
Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>

+890 -348
+243 -90
fs/ntfs3/attrib.c
··· 91 91 * run_deallocate_ex - Deallocate clusters. 92 92 */ 93 93 static int run_deallocate_ex(struct ntfs_sb_info *sbi, struct runs_tree *run, 94 - CLST vcn, CLST len, CLST *done, bool trim) 94 + CLST vcn, CLST len, CLST *done, bool trim, 95 + struct runs_tree *run_da) 95 96 { 96 97 int err = 0; 97 98 CLST vcn_next, vcn0 = vcn, lcn, clen, dn = 0; ··· 121 120 if (sbi) { 122 121 /* mark bitmap range [lcn + clen) as free and trim clusters. */ 123 122 mark_as_free_ex(sbi, lcn, clen, trim); 123 + 124 + if (run_da) { 125 + CLST da_len; 126 + if (!run_remove_range(run_da, vcn, clen, 127 + &da_len)) { 128 + err = -ENOMEM; 129 + goto failed; 130 + } 131 + ntfs_sub_da(sbi, da_len); 132 + } 124 133 } 125 134 dn += clen; 126 135 } ··· 158 147 * attr_allocate_clusters - Find free space, mark it as used and store in @run. 159 148 */ 160 149 int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, 161 - CLST vcn, CLST lcn, CLST len, CLST *pre_alloc, 162 - enum ALLOCATE_OPT opt, CLST *alen, const size_t fr, 163 - CLST *new_lcn, CLST *new_len) 150 + struct runs_tree *run_da, CLST vcn, CLST lcn, 151 + CLST len, CLST *pre_alloc, enum ALLOCATE_OPT opt, 152 + CLST *alen, const size_t fr, CLST *new_lcn, 153 + CLST *new_len) 164 154 { 165 155 int err; 166 156 CLST flen, vcn0 = vcn, pre = pre_alloc ? *pre_alloc : 0; ··· 197 185 198 186 /* Add new fragment into run storage. */ 199 187 if (!run_add_entry(run, vcn, lcn, flen, opt & ALLOCATE_MFT)) { 188 + undo_alloc: 200 189 /* Undo last 'ntfs_look_for_free_space' */ 201 190 mark_as_free_ex(sbi, lcn, len, false); 202 191 err = -ENOMEM; 203 192 goto out; 193 + } 194 + 195 + if (run_da) { 196 + CLST da_len; 197 + if (!run_remove_range(run_da, vcn, flen, &da_len)) { 198 + goto undo_alloc; 199 + } 200 + ntfs_sub_da(sbi, da_len); 204 201 } 205 202 206 203 if (opt & ALLOCATE_ZERO) { ··· 226 205 vcn += flen; 227 206 228 207 if (flen >= len || (opt & ALLOCATE_MFT) || 229 - (fr && run->count - cnt >= fr)) { 208 + (opt & ALLOCATE_ONE_FR) || (fr && run->count - cnt >= fr)) { 230 209 *alen = vcn - vcn0; 231 210 return 0; 232 211 } ··· 237 216 out: 238 217 /* Undo 'ntfs_look_for_free_space' */ 239 218 if (vcn - vcn0) { 240 - run_deallocate_ex(sbi, run, vcn0, vcn - vcn0, NULL, false); 219 + run_deallocate_ex(sbi, run, vcn0, vcn - vcn0, NULL, false, 220 + run_da); 241 221 run_truncate(run, vcn0); 242 222 } 243 223 ··· 303 281 } else { 304 282 const char *data = resident_data(attr); 305 283 306 - err = attr_allocate_clusters(sbi, run, 0, 0, len, NULL, 284 + err = attr_allocate_clusters(sbi, run, NULL, 0, 0, len, NULL, 307 285 ALLOCATE_DEF, &alen, 0, NULL, 308 286 NULL); 309 287 if (err) ··· 419 397 } 420 398 421 399 /* 422 - * attr_set_size - Change the size of attribute. 400 + * attr_set_size_ex - Change the size of attribute. 423 401 * 424 402 * Extend: 425 403 * - Sparse/compressed: No allocated clusters. ··· 427 405 * Shrink: 428 406 * - No deallocate if @keep_prealloc is set. 429 407 */ 430 - int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, 431 - const __le16 *name, u8 name_len, struct runs_tree *run, 432 - u64 new_size, const u64 *new_valid, bool keep_prealloc, 433 - struct ATTRIB **ret) 408 + int attr_set_size_ex(struct ntfs_inode *ni, enum ATTR_TYPE type, 409 + const __le16 *name, u8 name_len, struct runs_tree *run, 410 + u64 new_size, const u64 *new_valid, bool keep_prealloc, 411 + struct ATTRIB **ret, bool no_da) 434 412 { 435 413 int err = 0; 436 414 struct ntfs_sb_info *sbi = ni->mi.sbi; 437 415 u8 cluster_bits = sbi->cluster_bits; 438 416 bool is_mft = ni->mi.rno == MFT_REC_MFT && type == ATTR_DATA && 439 417 !name_len; 440 - u64 old_valid, old_size, old_alloc, new_alloc, new_alloc_tmp; 418 + u64 old_valid, old_size, old_alloc, new_alloc_tmp; 419 + u64 new_alloc = 0; 441 420 struct ATTRIB *attr = NULL, *attr_b; 442 421 struct ATTR_LIST_ENTRY *le, *le_b; 443 422 struct mft_inode *mi, *mi_b; 444 423 CLST alen, vcn, lcn, new_alen, old_alen, svcn, evcn; 445 424 CLST next_svcn, pre_alloc = -1, done = 0; 446 - bool is_ext, is_bad = false; 425 + bool is_ext = false, is_bad = false; 447 426 bool dirty = false; 427 + struct runs_tree *run_da = run == &ni->file.run ? &ni->file.run_da : 428 + NULL; 429 + bool da = !is_mft && sbi->options->delalloc && run_da && !no_da; 448 430 u32 align; 449 431 struct MFT_REC *rec; 450 432 ··· 483 457 if (is_ext) { 484 458 align <<= attr_b->nres.c_unit; 485 459 keep_prealloc = false; 460 + da = false; 486 461 } 487 462 488 463 old_valid = le64_to_cpu(attr_b->nres.valid_size); ··· 500 473 attr_b->nres.data_size = cpu_to_le64(new_size); 501 474 mi_b->dirty = dirty = true; 502 475 goto ok; 476 + } 477 + 478 + if (da && 479 + (vcn = old_alen + run_len(&ni->file.run_da), new_alen > vcn)) { 480 + /* Resize up normal file. Delay new clusters allocation. */ 481 + alen = new_alen - vcn; 482 + 483 + if (ntfs_check_free_space(sbi, alen, 0, true)) { 484 + if (!run_add_entry(&ni->file.run_da, vcn, SPARSE_LCN, 485 + alen, false)) { 486 + err = -ENOMEM; 487 + goto out; 488 + } 489 + 490 + ntfs_add_da(sbi, alen); 491 + goto ok1; 492 + } 493 + } 494 + 495 + if (!keep_prealloc && run_da && run_da->count && 496 + (vcn = run_get_max_vcn(run_da), new_alen < vcn)) { 497 + /* Shrink delayed clusters. */ 498 + 499 + /* Try to remove fragment from delay allocated run. */ 500 + if (!run_remove_range(run_da, new_alen, vcn - new_alen, 501 + &alen)) { 502 + err = -ENOMEM; 503 + goto out; 504 + } 505 + 506 + ntfs_sub_da(sbi, alen); 503 507 } 504 508 505 509 vcn = old_alen - 1; ··· 638 580 } else { 639 581 /* ~3 bytes per fragment. */ 640 582 err = attr_allocate_clusters( 641 - sbi, run, vcn, lcn, to_allocate, &pre_alloc, 583 + sbi, run, run_da, vcn, lcn, to_allocate, 584 + &pre_alloc, 642 585 is_mft ? ALLOCATE_MFT : ALLOCATE_DEF, &alen, 643 586 is_mft ? 0 : 644 587 (sbi->record_size - ··· 818 759 mi_b->dirty = dirty = true; 819 760 820 761 err = run_deallocate_ex(sbi, run, vcn, evcn - vcn + 1, &dlen, 821 - true); 762 + true, run_da); 822 763 if (err) 823 764 goto out; 824 765 825 766 if (is_ext) { 826 767 /* dlen - really deallocated clusters. */ 827 768 le64_sub_cpu(&attr_b->nres.total_size, 828 - ((u64)dlen << cluster_bits)); 769 + (u64)dlen << cluster_bits); 829 770 } 830 771 831 772 run_truncate(run, vcn); ··· 880 821 if (((type == ATTR_DATA && !name_len) || 881 822 (type == ATTR_ALLOC && name == I30_NAME))) { 882 823 /* Update inode_set_bytes. */ 883 - if (attr_b->non_res) { 884 - new_alloc = le64_to_cpu(attr_b->nres.alloc_size); 885 - if (inode_get_bytes(&ni->vfs_inode) != new_alloc) { 886 - inode_set_bytes(&ni->vfs_inode, new_alloc); 887 - dirty = true; 888 - } 824 + if (attr_b->non_res && 825 + inode_get_bytes(&ni->vfs_inode) != new_alloc) { 826 + inode_set_bytes(&ni->vfs_inode, new_alloc); 827 + dirty = true; 889 828 } 829 + 830 + i_size_write(&ni->vfs_inode, new_size); 890 831 891 832 /* Don't forget to update duplicate information in parent. */ 892 833 if (dirty) { ··· 928 869 is_bad = true; 929 870 930 871 undo_1: 931 - run_deallocate_ex(sbi, run, vcn, alen, NULL, false); 872 + run_deallocate_ex(sbi, run, vcn, alen, NULL, false, run_da); 932 873 933 874 run_truncate(run, vcn); 934 875 out: ··· 951 892 * - new allocated clusters are zeroed via blkdev_issue_zeroout. 952 893 */ 953 894 int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, 954 - CLST *len, bool *new, bool zero, void **res) 895 + CLST *len, bool *new, bool zero, void **res, bool no_da) 955 896 { 956 - int err = 0; 957 - struct runs_tree *run = &ni->file.run; 958 - struct ntfs_sb_info *sbi; 959 - u8 cluster_bits; 960 - struct ATTRIB *attr, *attr_b; 961 - struct ATTR_LIST_ENTRY *le, *le_b; 962 - struct mft_inode *mi, *mi_b; 963 - CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0, alen; 964 - CLST alloc, evcn; 965 - unsigned fr; 966 - u64 total_size, total_size0; 967 - int step = 0; 897 + int err; 968 898 969 899 if (new) 970 900 *new = false; ··· 962 914 963 915 /* Try to find in cache. */ 964 916 down_read(&ni->file.run_lock); 965 - if (!run_lookup_entry(run, vcn, lcn, len, NULL)) 917 + if (!no_da && run_lookup_entry(&ni->file.run_da, vcn, lcn, len, NULL)) { 918 + /* The requested vcn is delay allocated. */ 919 + *lcn = DELALLOC_LCN; 920 + } else if (run_lookup_entry(&ni->file.run, vcn, lcn, len, NULL)) { 921 + /* The requested vcn is known in current run. */ 922 + } else { 966 923 *len = 0; 924 + } 967 925 up_read(&ni->file.run_lock); 968 926 969 927 if (*len && (*lcn != SPARSE_LCN || !new)) 970 928 return 0; /* Fast normal way without allocation. */ 971 929 972 930 /* No cluster in cache or we need to allocate cluster in hole. */ 973 - sbi = ni->mi.sbi; 974 - cluster_bits = sbi->cluster_bits; 975 - 976 931 ni_lock(ni); 977 932 down_write(&ni->file.run_lock); 978 933 979 - /* Repeat the code above (under write lock). */ 980 - if (!run_lookup_entry(run, vcn, lcn, len, NULL)) 934 + err = attr_data_get_block_locked(ni, vcn, clen, lcn, len, new, zero, 935 + res, no_da); 936 + 937 + up_write(&ni->file.run_lock); 938 + ni_unlock(ni); 939 + 940 + return err; 941 + } 942 + 943 + /* 944 + * attr_data_get_block_locked - Helper for attr_data_get_block. 945 + */ 946 + int attr_data_get_block_locked(struct ntfs_inode *ni, CLST vcn, CLST clen, 947 + CLST *lcn, CLST *len, bool *new, bool zero, 948 + void **res, bool no_da) 949 + { 950 + int err = 0; 951 + struct ntfs_sb_info *sbi = ni->mi.sbi; 952 + struct runs_tree *run = &ni->file.run; 953 + struct runs_tree *run_da = &ni->file.run_da; 954 + bool da = sbi->options->delalloc && !no_da; 955 + u8 cluster_bits; 956 + struct ATTRIB *attr, *attr_b; 957 + struct ATTR_LIST_ENTRY *le, *le_b; 958 + struct mft_inode *mi, *mi_b; 959 + CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0; 960 + CLST alloc, evcn; 961 + unsigned fr; 962 + u64 total_size, total_size0; 963 + int step; 964 + 965 + again: 966 + if (da && run_lookup_entry(run_da, vcn, lcn, len, NULL)) { 967 + /* The requested vcn is delay allocated. */ 968 + *lcn = DELALLOC_LCN; 969 + } else if (run_lookup_entry(run, vcn, lcn, len, NULL)) { 970 + /* The requested vcn is known in current run. */ 971 + } else { 981 972 *len = 0; 973 + } 982 974 983 975 if (*len) { 984 976 if (*lcn != SPARSE_LCN || !new) ··· 1026 938 if (clen > *len) 1027 939 clen = *len; 1028 940 } 941 + 942 + cluster_bits = sbi->cluster_bits; 943 + step = 0; 1029 944 1030 945 le_b = NULL; 1031 946 attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); ··· 1152 1061 if (err) 1153 1062 goto out; 1154 1063 } 1064 + da = false; /* no delalloc for compressed file. */ 1155 1065 } 1156 1066 1157 1067 if (vcn + to_alloc > asize) 1158 1068 to_alloc = asize - vcn; 1069 + 1070 + if (da) { 1071 + CLST rlen1, rlen2; 1072 + if (!ntfs_check_free_space(sbi, to_alloc, 0, true)) { 1073 + err = ni_allocate_da_blocks_locked(ni); 1074 + if (err) 1075 + goto out; 1076 + /* Layout of records may be changed. Start again without 'da'. */ 1077 + da = false; 1078 + goto again; 1079 + } 1080 + 1081 + /* run_add_entry consolidates existed ranges. */ 1082 + rlen1 = run_len(run_da); 1083 + if (!run_add_entry(run_da, vcn, SPARSE_LCN, to_alloc, false)) { 1084 + err = -ENOMEM; 1085 + goto out; 1086 + } 1087 + rlen2 = run_len(run_da); 1088 + 1089 + /* new added delay clusters = rlen2 - rlen1. */ 1090 + ntfs_add_da(sbi, rlen2 - rlen1); 1091 + *len = to_alloc; 1092 + *lcn = DELALLOC_LCN; 1093 + goto ok; 1094 + } 1159 1095 1160 1096 /* Get the last LCN to allocate from. */ 1161 1097 hint = 0; ··· 1198 1080 } 1199 1081 1200 1082 /* Allocate and zeroout new clusters. */ 1201 - err = attr_allocate_clusters(sbi, run, vcn, hint + 1, to_alloc, NULL, 1202 - zero ? ALLOCATE_ZERO : ALLOCATE_DEF, &alen, 1203 - fr, lcn, len); 1083 + err = attr_allocate_clusters(sbi, run, run_da, vcn, hint + 1, to_alloc, 1084 + NULL, 1085 + zero ? ALLOCATE_ZERO : ALLOCATE_ONE_FR, 1086 + len, fr, lcn, len); 1204 1087 if (err) 1205 1088 goto out; 1206 1089 *new = true; 1207 1090 step = 1; 1208 1091 1209 - end = vcn + alen; 1092 + end = vcn + *len; 1210 1093 /* Save 'total_size0' to restore if error. */ 1211 1094 total_size0 = le64_to_cpu(attr_b->nres.total_size); 1212 - total_size = total_size0 + ((u64)alen << cluster_bits); 1095 + total_size = total_size0 + ((u64)*len << cluster_bits); 1213 1096 1214 1097 if (vcn != vcn0) { 1215 1098 if (!run_lookup_entry(run, vcn0, lcn, len, NULL)) { ··· 1276 1157 * in 'ni_insert_nonresident'. 1277 1158 * Return in advance -ENOSPC here if there are no free cluster and no free MFT. 1278 1159 */ 1279 - if (!ntfs_check_for_free_space(sbi, 1, 1)) { 1160 + if (!ntfs_check_free_space(sbi, 1, 1, false)) { 1280 1161 /* Undo step 1. */ 1281 1162 err = -ENOSPC; 1282 1163 goto undo1; ··· 1361 1242 /* Too complex to restore. */ 1362 1243 _ntfs_bad_inode(&ni->vfs_inode); 1363 1244 } 1364 - up_write(&ni->file.run_lock); 1365 - ni_unlock(ni); 1366 1245 1367 1246 return err; 1368 1247 ··· 1369 1252 attr_b->nres.total_size = cpu_to_le64(total_size0); 1370 1253 inode_set_bytes(&ni->vfs_inode, total_size0); 1371 1254 1372 - if (run_deallocate_ex(sbi, run, vcn, alen, NULL, false) || 1373 - !run_add_entry(run, vcn, SPARSE_LCN, alen, false) || 1255 + if (run_deallocate_ex(sbi, run, vcn, *len, NULL, false, run_da) || 1256 + !run_add_entry(run, vcn, SPARSE_LCN, *len, false) || 1374 1257 mi_pack_runs(mi, attr, run, max(end, evcn1) - svcn)) { 1375 1258 _ntfs_bad_inode(&ni->vfs_inode); 1376 1259 } ··· 1805 1688 1806 1689 if (len < clst_data) { 1807 1690 err = run_deallocate_ex(sbi, run, vcn + len, clst_data - len, 1808 - NULL, true); 1691 + NULL, true, NULL); 1809 1692 if (err) 1810 1693 goto out; 1811 1694 ··· 1825 1708 hint = -1; 1826 1709 } 1827 1710 1828 - err = attr_allocate_clusters(sbi, run, vcn + clst_data, 1711 + err = attr_allocate_clusters(sbi, run, NULL, vcn + clst_data, 1829 1712 hint + 1, len - clst_data, NULL, 1830 1713 ALLOCATE_DEF, &alen, 0, NULL, 1831 1714 NULL); ··· 1980 1863 CLST vcn, end; 1981 1864 u64 valid_size, data_size, alloc_size, total_size; 1982 1865 u32 mask; 1866 + u64 i_size; 1983 1867 __le16 a_flags; 1984 1868 1985 1869 if (!bytes) ··· 1996 1878 return 0; 1997 1879 } 1998 1880 1999 - data_size = le64_to_cpu(attr_b->nres.data_size); 2000 - alloc_size = le64_to_cpu(attr_b->nres.alloc_size); 2001 - a_flags = attr_b->flags; 2002 - 2003 - if (is_attr_ext(attr_b)) { 2004 - total_size = le64_to_cpu(attr_b->nres.total_size); 2005 - mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1; 2006 - } else { 2007 - total_size = alloc_size; 2008 - mask = sbi->cluster_mask; 2009 - } 2010 - 2011 - if ((vbo & mask) || (bytes & mask)) { 1881 + mask = is_attr_ext(attr_b) ? 1882 + ((sbi->cluster_size << attr_b->nres.c_unit) - 1) : 1883 + sbi->cluster_mask; 1884 + if ((vbo | bytes) & mask) { 2012 1885 /* Allow to collapse only cluster aligned ranges. */ 2013 1886 return -EINVAL; 2014 1887 } 2015 1888 2016 - if (vbo > data_size) 1889 + /* i_size - size of file with delay allocated clusters. */ 1890 + i_size = ni->vfs_inode.i_size; 1891 + 1892 + if (vbo > i_size) 2017 1893 return -EINVAL; 2018 1894 2019 1895 down_write(&ni->file.run_lock); 2020 1896 2021 - if (vbo + bytes >= data_size) { 2022 - u64 new_valid = min(ni->i_valid, vbo); 1897 + if (vbo + bytes >= i_size) { 1898 + valid_size = min(ni->i_valid, vbo); 2023 1899 2024 1900 /* Simple truncate file at 'vbo'. */ 2025 1901 truncate_setsize(&ni->vfs_inode, vbo); 2026 1902 err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, vbo, 2027 - &new_valid, true, NULL); 1903 + &valid_size, true); 2028 1904 2029 - if (!err && new_valid < ni->i_valid) 2030 - ni->i_valid = new_valid; 1905 + if (!err && valid_size < ni->i_valid) 1906 + ni->i_valid = valid_size; 2031 1907 2032 1908 goto out; 2033 1909 } 2034 1910 2035 - /* 2036 - * Enumerate all attribute segments and collapse. 2037 - */ 2038 - alen = alloc_size >> sbi->cluster_bits; 2039 1911 vcn = vbo >> sbi->cluster_bits; 2040 1912 len = bytes >> sbi->cluster_bits; 2041 1913 end = vcn + len; 2042 1914 dealloc = 0; 2043 1915 done = 0; 2044 1916 1917 + /* 1918 + * Check delayed clusters. 1919 + */ 1920 + if (ni->file.run_da.count) { 1921 + struct runs_tree *run_da = &ni->file.run_da; 1922 + if (run_is_mapped_full(run_da, vcn, end - 1)) { 1923 + /* 1924 + * The requested range is full in delayed clusters. 1925 + */ 1926 + err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, run, 1927 + i_size - bytes, NULL, false, 1928 + NULL, true); 1929 + goto out; 1930 + } 1931 + 1932 + /* Collapse request crosses real and delayed clusters. */ 1933 + err = ni_allocate_da_blocks_locked(ni); 1934 + if (err) 1935 + goto out; 1936 + 1937 + /* Layout of records maybe changed. */ 1938 + le_b = NULL; 1939 + attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, 1940 + &mi_b); 1941 + if (!attr_b || !attr_b->non_res) { 1942 + err = -ENOENT; 1943 + goto out; 1944 + } 1945 + } 1946 + 1947 + data_size = le64_to_cpu(attr_b->nres.data_size); 1948 + alloc_size = le64_to_cpu(attr_b->nres.alloc_size); 1949 + total_size = is_attr_ext(attr_b) ? 1950 + le64_to_cpu(attr_b->nres.total_size) : 1951 + alloc_size; 1952 + alen = alloc_size >> sbi->cluster_bits; 1953 + a_flags = attr_b->flags; 2045 1954 svcn = le64_to_cpu(attr_b->nres.svcn); 2046 1955 evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1; 2047 1956 ··· 2091 1946 goto out; 2092 1947 } 2093 1948 1949 + /* 1950 + * Enumerate all attribute segments and collapse. 1951 + */ 2094 1952 for (;;) { 2095 1953 CLST vcn1, eat, next_svcn; 2096 1954 ··· 2121 1973 vcn1 = vcn + done; /* original vcn in attr/run. */ 2122 1974 eat = min(end, evcn1) - vcn1; 2123 1975 2124 - err = run_deallocate_ex(sbi, run, vcn1, eat, &dealloc, true); 1976 + err = run_deallocate_ex(sbi, run, vcn1, eat, &dealloc, true, 1977 + NULL); 2125 1978 if (err) 2126 1979 goto out; 2127 1980 2128 1981 if (svcn + eat < evcn1) { 2129 1982 /* Collapse a part of this attribute segment. */ 2130 - 2131 1983 if (!run_collapse_range(run, vcn1, eat, done)) { 2132 1984 err = -ENOMEM; 2133 1985 goto out; ··· 2308 2160 bytes = alloc_size; 2309 2161 bytes -= vbo; 2310 2162 2311 - if ((vbo & mask) || (bytes & mask)) { 2163 + if ((vbo | bytes) & mask) { 2312 2164 /* We have to zero a range(s). */ 2313 - if (frame_size == NULL) { 2165 + if (!frame_size) { 2314 2166 /* Caller insists range is aligned. */ 2315 2167 return -EINVAL; 2316 2168 } ··· 2369 2221 * Calculate how many clusters there are. 2370 2222 * Don't do any destructive actions. 2371 2223 */ 2372 - err = run_deallocate_ex(NULL, run, vcn1, zero, &hole2, false); 2224 + err = run_deallocate_ex(NULL, run, vcn1, zero, &hole2, false, 2225 + NULL); 2373 2226 if (err) 2374 2227 goto done; 2375 2228 ··· 2408 2259 } 2409 2260 2410 2261 /* Real deallocate. Should not fail. */ 2411 - run_deallocate_ex(sbi, &run2, vcn1, zero, &hole, true); 2262 + run_deallocate_ex(sbi, &run2, vcn1, zero, &hole, true, 2263 + &ni->file.run_da); 2412 2264 2413 2265 next_attr: 2414 2266 /* Free all allocated memory. */ ··· 2521 2371 return -EINVAL; 2522 2372 } 2523 2373 2524 - if ((vbo & mask) || (bytes & mask)) { 2374 + if ((vbo | bytes) & mask) { 2525 2375 /* Allow to insert only frame aligned ranges. */ 2526 2376 return -EINVAL; 2527 2377 } ··· 2540 2390 2541 2391 if (!attr_b->non_res) { 2542 2392 err = attr_set_size(ni, ATTR_DATA, NULL, 0, run, 2543 - data_size + bytes, NULL, false, NULL); 2393 + data_size + bytes, NULL, false); 2544 2394 2545 2395 le_b = NULL; 2546 2396 attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, ··· 2563 2413 goto done; 2564 2414 } 2565 2415 2566 - /* Resident files becomes nonresident. */ 2416 + /* Resident file becomes nonresident. */ 2567 2417 data_size = le64_to_cpu(attr_b->nres.data_size); 2568 2418 alloc_size = le64_to_cpu(attr_b->nres.alloc_size); 2569 2419 } ··· 2600 2450 if (err) 2601 2451 goto out; 2602 2452 2603 - if (!run_insert_range(run, vcn, len)) { 2604 - err = -ENOMEM; 2453 + err = run_insert_range(run, vcn, len); 2454 + if (err) 2605 2455 goto out; 2606 - } 2456 + 2457 + err = run_insert_range_da(&ni->file.run_da, vcn, len); 2458 + if (err) 2459 + goto out; 2607 2460 2608 2461 /* Try to pack in current record as much as possible. */ 2609 2462 err = mi_pack_runs(mi, attr, run, evcn1 + len - svcn);
+4 -4
fs/ntfs3/attrlist.c
··· 345 345 le->id = id; 346 346 memcpy(le->name, name, sizeof(short) * name_len); 347 347 348 - err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, new_size, 349 - &new_size, true, &attr); 348 + err = attr_set_size_ex(ni, ATTR_LIST, NULL, 0, &al->run, new_size, 349 + &new_size, true, &attr, false); 350 350 if (err) { 351 351 /* Undo memmove above. */ 352 352 memmove(le, Add2Ptr(le, sz), old_size - off); ··· 404 404 * Attribute list increased on demand in al_add_le. 405 405 * Attribute list decreased here. 406 406 */ 407 - err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, al->size, NULL, 408 - false, &attr); 407 + err = attr_set_size_ex(ni, ATTR_LIST, NULL, 0, &al->run, al->size, NULL, 408 + false, &attr, false); 409 409 if (err) 410 410 goto out; 411 411
+168 -146
fs/ntfs3/file.c
··· 26 26 */ 27 27 #define NTFS3_IOC_SHUTDOWN _IOR('X', 125, __u32) 28 28 29 + /* 30 + * Helper for ntfs_should_use_dio. 31 + */ 32 + static u32 ntfs_dio_alignment(struct inode *inode) 33 + { 34 + struct ntfs_inode *ni = ntfs_i(inode); 35 + 36 + if (is_resident(ni)) { 37 + /* Check delalloc. */ 38 + if (!ni->file.run_da.count) 39 + return 0; 40 + } 41 + 42 + /* In most cases this is bdev_logical_block_size(bdev). */ 43 + return ni->mi.sbi->bdev_blocksize; 44 + } 45 + 46 + /* 47 + * Returns %true if the given DIO request should be attempted with DIO, or 48 + * %false if it should fall back to buffered I/O. 49 + */ 50 + static bool ntfs_should_use_dio(struct kiocb *iocb, struct iov_iter *iter) 51 + { 52 + struct inode *inode = file_inode(iocb->ki_filp); 53 + u32 dio_align = ntfs_dio_alignment(inode); 54 + 55 + if (!dio_align) 56 + return false; 57 + 58 + return IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), dio_align); 59 + } 60 + 29 61 static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg) 30 62 { 31 63 struct fstrim_range __user *user_range; ··· 218 186 219 187 static int ntfs_extend_initialized_size(struct file *file, 220 188 struct ntfs_inode *ni, 221 - const loff_t valid, 222 189 const loff_t new_valid) 223 190 { 224 191 struct inode *inode = &ni->vfs_inode; 192 + const loff_t valid = ni->i_valid; 225 193 int err; 226 194 227 195 if (valid >= new_valid) ··· 231 199 ni->i_valid = new_valid; 232 200 return 0; 233 201 } 234 - 235 - WARN_ON(is_compressed(ni)); 236 202 237 203 err = iomap_zero_range(inode, valid, new_valid - valid, NULL, 238 204 &ntfs_iomap_ops, &ntfs_iomap_folio_ops, NULL); ··· 321 291 for (; vcn < end; vcn += len) { 322 292 err = attr_data_get_block(ni, vcn, 1, &lcn, 323 293 &len, &new, true, 324 - NULL); 294 + NULL, false); 325 295 if (err) 326 296 goto out; 327 297 } ··· 332 302 err = -EAGAIN; 333 303 goto out; 334 304 } 335 - err = ntfs_extend_initialized_size(file, ni, 336 - ni->i_valid, to); 305 + err = ntfs_extend_initialized_size(file, ni, to); 337 306 inode_unlock(inode); 338 307 if (err) 339 308 goto out; ··· 362 333 ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_DIRTY); 363 334 364 335 if (end > inode->i_size) { 336 + /* 337 + * Normal files: increase file size, allocate space. 338 + * Sparse/Compressed: increase file size. No space allocated. 339 + */ 365 340 err = ntfs_set_size(inode, end); 366 341 if (err) 367 342 goto out; 368 343 } 369 344 370 345 if (extend_init && !is_compressed(ni)) { 371 - err = ntfs_extend_initialized_size(file, ni, ni->i_valid, pos); 346 + err = ntfs_extend_initialized_size(file, ni, pos); 372 347 if (err) 373 348 goto out; 374 349 } else { 375 350 err = 0; 376 - } 377 - 378 - if (file && is_sparsed(ni)) { 379 - /* 380 - * This code optimizes large writes to sparse file. 381 - * TODO: merge this fragment with fallocate fragment. 382 - */ 383 - struct ntfs_sb_info *sbi = ni->mi.sbi; 384 - CLST vcn = pos >> sbi->cluster_bits; 385 - CLST cend = bytes_to_cluster(sbi, end); 386 - CLST cend_v = bytes_to_cluster(sbi, ni->i_valid); 387 - CLST lcn, clen; 388 - bool new; 389 - 390 - if (cend_v > cend) 391 - cend_v = cend; 392 - 393 - /* 394 - * Allocate and zero new clusters. 395 - * Zeroing these clusters may be too long. 396 - */ 397 - for (; vcn < cend_v; vcn += clen) { 398 - err = attr_data_get_block(ni, vcn, cend_v - vcn, &lcn, 399 - &clen, &new, true, NULL); 400 - if (err) 401 - goto out; 402 - } 403 - /* 404 - * Allocate but not zero new clusters. 405 - */ 406 - for (; vcn < cend; vcn += clen) { 407 - err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, 408 - &clen, &new, false, NULL); 409 - if (err) 410 - goto out; 411 - } 412 351 } 413 352 414 353 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); ··· 411 414 ni_lock(ni); 412 415 413 416 down_write(&ni->file.run_lock); 414 - err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, 415 - &new_valid, ni->mi.sbi->options->prealloc, NULL); 417 + err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, 418 + &new_valid, ni->mi.sbi->options->prealloc, NULL, 419 + false); 416 420 up_write(&ni->file.run_lock); 417 421 418 422 ni->i_valid = new_valid; ··· 505 507 506 508 if (mode & FALLOC_FL_PUNCH_HOLE) { 507 509 u32 frame_size; 508 - loff_t mask, vbo_a, end_a, tmp; 510 + loff_t mask, vbo_a, end_a, tmp, from; 509 511 510 512 err = filemap_write_and_wait_range(mapping, vbo_down, 511 513 LLONG_MAX); ··· 525 527 526 528 /* Process not aligned punch. */ 527 529 err = 0; 530 + if (end > i_size) 531 + end = i_size; 528 532 mask = frame_size - 1; 529 533 vbo_a = (vbo + mask) & ~mask; 530 534 end_a = end & ~mask; 531 535 532 536 tmp = min(vbo_a, end); 533 - if (tmp > vbo) { 534 - err = iomap_zero_range(inode, vbo, tmp - vbo, NULL, 537 + from = min_t(loff_t, ni->i_valid, vbo); 538 + /* Zero head of punch. */ 539 + if (tmp > from) { 540 + err = iomap_zero_range(inode, from, tmp - from, NULL, 535 541 &ntfs_iomap_ops, 536 542 &ntfs_iomap_folio_ops, NULL); 537 543 if (err) 538 544 goto out; 539 545 } 540 546 541 - if (vbo < end_a && end_a < end) { 542 - err = iomap_zero_range(inode, end_a, end - end_a, NULL, 543 - &ntfs_iomap_ops, 544 - &ntfs_iomap_folio_ops, NULL); 545 - if (err) 546 - goto out; 547 - } 548 - 549 - /* Aligned punch_hole */ 547 + /* Aligned punch_hole. Deallocate clusters. */ 550 548 if (end_a > vbo_a) { 551 549 ni_lock(ni); 552 550 err = attr_punch_hole(ni, vbo_a, end_a - vbo_a, NULL); 553 551 ni_unlock(ni); 552 + if (err) 553 + goto out; 554 + } 555 + 556 + /* Zero tail of punch. */ 557 + if (vbo < end_a && end_a < end) { 558 + err = iomap_zero_range(inode, end_a, end - end_a, NULL, 559 + &ntfs_iomap_ops, 560 + &ntfs_iomap_folio_ops, NULL); 554 561 if (err) 555 562 goto out; 556 563 } ··· 656 653 for (; vcn < cend_v; vcn += clen) { 657 654 err = attr_data_get_block(ni, vcn, cend_v - vcn, 658 655 &lcn, &clen, &new, 659 - true, NULL); 656 + true, NULL, false); 660 657 if (err) 661 658 goto out; 662 659 } 660 + 661 + /* 662 + * Moving up 'valid size'. 663 + */ 664 + err = ntfs_extend_initialized_size( 665 + file, ni, (u64)cend_v << cluster_bits); 666 + if (err) 667 + goto out; 668 + 663 669 /* 664 670 * Allocate but not zero new clusters. 665 671 */ 666 672 for (; vcn < cend; vcn += clen) { 667 673 err = attr_data_get_block(ni, vcn, cend - vcn, 668 674 &lcn, &clen, &new, 669 - false, NULL); 675 + false, NULL, false); 670 676 if (err) 671 677 goto out; 672 678 } ··· 686 674 /* True - Keep preallocated. */ 687 675 err = attr_set_size(ni, ATTR_DATA, NULL, 0, 688 676 &ni->file.run, i_size, &ni->i_valid, 689 - true, NULL); 677 + true); 690 678 ni_unlock(ni); 691 679 if (err) 692 680 goto out; ··· 828 816 struct inode *inode = file_inode(file); 829 817 struct ntfs_inode *ni = ntfs_i(inode); 830 818 size_t bytes = iov_iter_count(iter); 819 + loff_t valid, i_size, vbo, end; 820 + unsigned int dio_flags; 831 821 ssize_t err; 832 822 833 823 err = check_read_restriction(inode); ··· 849 835 file->f_ra.ra_pages = 0; 850 836 } 851 837 852 - /* Check minimum alignment for dio. */ 853 - if ((iocb->ki_flags & IOCB_DIRECT) && 854 - (is_resident(ni) || ((iocb->ki_pos | iov_iter_alignment(iter)) & 855 - ni->mi.sbi->bdev_blocksize_mask))) { 856 - /* Fallback to buffered I/O */ 838 + /* Fallback to buffered I/O if the inode does not support direct I/O. */ 839 + if (!(iocb->ki_flags & IOCB_DIRECT) || 840 + !ntfs_should_use_dio(iocb, iter)) { 857 841 iocb->ki_flags &= ~IOCB_DIRECT; 842 + return generic_file_read_iter(iocb, iter); 858 843 } 859 844 860 - if (iocb->ki_flags & IOCB_DIRECT) { 861 - loff_t valid, i_size; 862 - loff_t vbo = iocb->ki_pos; 863 - loff_t end = vbo + bytes; 864 - unsigned int dio_flags = IOMAP_DIO_PARTIAL; 865 - 866 - if (iocb->ki_flags & IOCB_NOWAIT) { 867 - if (!inode_trylock_shared(inode)) 868 - return -EAGAIN; 869 - } else { 870 - inode_lock_shared(inode); 871 - } 872 - 873 - valid = ni->i_valid; 874 - i_size = inode->i_size; 875 - 876 - if (vbo < valid) { 877 - if (valid < end) { 878 - /* read cross 'valid' size. */ 879 - dio_flags |= IOMAP_DIO_FORCE_WAIT; 880 - } 881 - 882 - err = iomap_dio_rw(iocb, iter, &ntfs_iomap_ops, NULL, 883 - dio_flags, NULL, 0); 884 - 885 - if (err > 0) { 886 - end = vbo + err; 887 - if (valid < end) { 888 - size_t to_zero = end - valid; 889 - /* Fix iter. */ 890 - iov_iter_revert(iter, to_zero); 891 - iov_iter_zero(to_zero, iter); 892 - } 893 - } 894 - } else if (vbo < i_size) { 895 - if (end > i_size) 896 - bytes = i_size - vbo; 897 - iov_iter_zero(bytes, iter); 898 - iocb->ki_pos += bytes; 899 - err = bytes; 900 - } 901 - 902 - inode_unlock_shared(inode); 903 - file_accessed(iocb->ki_filp); 904 - return err; 845 + if (iocb->ki_flags & IOCB_NOWAIT) { 846 + if (!inode_trylock_shared(inode)) 847 + return -EAGAIN; 848 + } else { 849 + inode_lock_shared(inode); 905 850 } 906 851 907 - return generic_file_read_iter(iocb, iter); 852 + vbo = iocb->ki_pos; 853 + end = vbo + bytes; 854 + dio_flags = 0; 855 + valid = ni->i_valid; 856 + i_size = inode->i_size; 857 + 858 + if (vbo < valid) { 859 + if (valid < end) { 860 + /* read cross 'valid' size. */ 861 + dio_flags |= IOMAP_DIO_FORCE_WAIT; 862 + } 863 + 864 + if (ni->file.run_da.count) { 865 + /* Direct I/O is not compatible with delalloc. */ 866 + err = ni_allocate_da_blocks(ni); 867 + if (err) 868 + goto out; 869 + } 870 + 871 + err = iomap_dio_rw(iocb, iter, &ntfs_iomap_ops, NULL, dio_flags, 872 + NULL, 0); 873 + 874 + if (err <= 0) 875 + goto out; 876 + end = vbo + err; 877 + if (valid < end) { 878 + size_t to_zero = end - valid; 879 + /* Fix iter. */ 880 + iov_iter_revert(iter, to_zero); 881 + iov_iter_zero(to_zero, iter); 882 + } 883 + } else if (vbo < i_size) { 884 + if (end > i_size) 885 + bytes = i_size - vbo; 886 + iov_iter_zero(bytes, iter); 887 + iocb->ki_pos += bytes; 888 + err = bytes; 889 + } 890 + 891 + out: 892 + inode_unlock_shared(inode); 893 + file_accessed(iocb->ki_filp); 894 + return err; 908 895 } 909 896 910 897 /* ··· 1026 1011 off = valid & (frame_size - 1); 1027 1012 1028 1013 err = attr_data_get_block(ni, frame << NTFS_LZNT_CUNIT, 1, &lcn, 1029 - &clen, NULL, false, NULL); 1014 + &clen, NULL, false, NULL, false); 1030 1015 if (err) 1031 1016 goto out; 1032 1017 1033 1018 if (lcn == SPARSE_LCN) { 1034 - valid = frame_vbo + ((u64)clen << sbi->cluster_bits); 1035 - if (ni->i_valid == valid) { 1036 - err = -EINVAL; 1037 - goto out; 1038 - } 1039 - ni->i_valid = valid; 1019 + ni->i_valid = valid = 1020 + frame_vbo + ((u64)clen << sbi->cluster_bits); 1040 1021 continue; 1041 1022 } 1042 1023 ··· 1218 1207 return -EOPNOTSUPP; 1219 1208 } 1220 1209 1210 + if (unlikely(IS_IMMUTABLE(inode))) 1211 + return -EPERM; 1212 + 1221 1213 return 0; 1222 1214 } 1223 1215 ··· 1232 1218 struct file *file = iocb->ki_filp; 1233 1219 struct inode *inode = file_inode(file); 1234 1220 struct ntfs_inode *ni = ntfs_i(inode); 1235 - struct super_block *sb = inode->i_sb; 1236 - struct ntfs_sb_info *sbi = sb->s_fs_info; 1237 1221 ssize_t ret, err; 1238 1222 1239 1223 if (!inode_trylock(inode)) { ··· 1275 1263 goto out; 1276 1264 } 1277 1265 1278 - /* Check minimum alignment for dio. */ 1279 - if ((iocb->ki_flags & IOCB_DIRECT) && 1280 - (is_resident(ni) || ((iocb->ki_pos | iov_iter_alignment(from)) & 1281 - sbi->bdev_blocksize_mask))) { 1282 - /* Fallback to buffered I/O */ 1266 + /* Fallback to buffered I/O if the inode does not support direct I/O. */ 1267 + if (!(iocb->ki_flags & IOCB_DIRECT) || 1268 + !ntfs_should_use_dio(iocb, from)) { 1283 1269 iocb->ki_flags &= ~IOCB_DIRECT; 1284 - } 1285 1270 1286 - if (!(iocb->ki_flags & IOCB_DIRECT)) { 1287 1271 ret = iomap_file_buffered_write(iocb, from, &ntfs_iomap_ops, 1288 1272 &ntfs_iomap_folio_ops, NULL); 1289 1273 inode_unlock(inode); ··· 1290 1282 return ret; 1291 1283 } 1292 1284 1293 - ret = iomap_dio_rw(iocb, from, &ntfs_iomap_ops, NULL, IOMAP_DIO_PARTIAL, 1294 - NULL, 0); 1285 + if (ni->file.run_da.count) { 1286 + /* Direct I/O is not compatible with delalloc. */ 1287 + ret = ni_allocate_da_blocks(ni); 1288 + if (ret) 1289 + goto out; 1290 + } 1291 + 1292 + ret = iomap_dio_rw(iocb, from, &ntfs_iomap_ops, NULL, 0, NULL, 0); 1295 1293 1296 1294 if (ret == -ENOTBLK) { 1297 1295 /* Returns -ENOTBLK in case of a page invalidation failure for writes.*/ ··· 1384 1370 1385 1371 /* 1386 1372 * ntfs_file_release - file_operations::release 1373 + * 1374 + * Called when an inode is released. Note that this is different 1375 + * from ntfs_file_open: open gets called at every open, but release 1376 + * gets called only when /all/ the files are closed. 1387 1377 */ 1388 1378 static int ntfs_file_release(struct inode *inode, struct file *file) 1389 1379 { 1390 - struct ntfs_inode *ni = ntfs_i(inode); 1391 - struct ntfs_sb_info *sbi = ni->mi.sbi; 1392 - int err = 0; 1380 + int err; 1381 + struct ntfs_inode *ni; 1393 1382 1394 - /* If we are last writer on the inode, drop the block reservation. */ 1395 - if (sbi->options->prealloc && 1396 - ((file->f_mode & FMODE_WRITE) && 1397 - atomic_read(&inode->i_writecount) == 1) 1398 - /* 1399 - * The only file when inode->i_fop = &ntfs_file_operations and 1400 - * init_rwsem(&ni->file.run_lock) is not called explicitly is MFT. 1401 - * 1402 - * Add additional check here. 1403 - */ 1404 - && inode->i_ino != MFT_REC_MFT) { 1383 + if (!(file->f_mode & FMODE_WRITE) || 1384 + atomic_read(&inode->i_writecount) != 1 || 1385 + inode->i_ino == MFT_REC_MFT) { 1386 + return 0; 1387 + } 1388 + 1389 + /* Close the last writer on the inode. */ 1390 + ni = ntfs_i(inode); 1391 + 1392 + /* Allocate delayed blocks (clusters). */ 1393 + err = ni_allocate_da_blocks(ni); 1394 + if (err) 1395 + goto out; 1396 + 1397 + if (ni->mi.sbi->options->prealloc) { 1405 1398 ni_lock(ni); 1406 1399 down_write(&ni->file.run_lock); 1407 1400 1401 + /* Deallocate preallocated. */ 1408 1402 err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, 1409 - i_size_read(inode), &ni->i_valid, false, 1410 - NULL); 1403 + inode->i_size, &ni->i_valid, false); 1411 1404 1412 1405 up_write(&ni->file.run_lock); 1413 1406 ni_unlock(ni); 1414 1407 } 1408 + out: 1415 1409 return err; 1416 1410 } 1417 1411 ··· 1528 1506 1529 1507 if (whence == SEEK_DATA || whence == SEEK_HOLE) { 1530 1508 inode_lock_shared(inode); 1531 - /* Scan fragments for hole or data. */ 1509 + /* Scan file for hole or data. */ 1532 1510 ret = ni_seek_data_or_hole(ni, offset, whence == SEEK_DATA); 1533 1511 inode_unlock_shared(inode); 1534 1512
+67 -5
fs/ntfs3/frecord.c
··· 123 123 indx_clear(&ni->dir); 124 124 else { 125 125 run_close(&ni->file.run); 126 + ntfs_sub_da(ni->mi.sbi, run_len(&ni->file.run_da)); 127 + run_close(&ni->file.run_da); 126 128 #ifdef CONFIG_NTFS3_LZX_XPRESS 127 129 if (ni->file.offs_folio) { 128 130 /* On-demand allocated page for offsets. */ ··· 2016 2014 2017 2015 for (vcn = vbo >> sbi->cluster_bits; vcn < end; vcn += clen) { 2018 2016 err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, 2019 - &clen, &new, false, NULL); 2017 + &clen, &new, false, NULL, 2018 + false); 2020 2019 if (err) 2021 2020 goto out; 2022 2021 } ··· 2238 2235 struct runs_tree *run = &ni->file.run; 2239 2236 u64 valid_size = ni->i_valid; 2240 2237 u64 vbo_disk; 2241 - size_t unc_size; 2238 + size_t unc_size = 0; 2242 2239 u32 frame_size, i, ondisk_size; 2243 2240 struct page *pg; 2244 2241 struct ATTRIB *attr; ··· 2849 2846 /* Enumerate all fragments. */ 2850 2847 for (vcn = offset >> cluster_bits;; vcn += clen) { 2851 2848 err = attr_data_get_block(ni, vcn, 1, &lcn, &clen, NULL, false, 2852 - NULL); 2849 + NULL, false); 2853 2850 if (err) { 2854 2851 return err; 2855 2852 } ··· 2889 2886 } 2890 2887 } else { 2891 2888 /* 2892 - * Adjust the file offset to the next hole in the file greater than or 2889 + * Adjust the file offset to the next hole in the file greater than or 2893 2890 * equal to offset. If offset points into the middle of a hole, then the 2894 - * file offset is set to offset. If there is no hole past offset, then the 2891 + * file offset is set to offset. If there is no hole past offset, then the 2895 2892 * file offset is adjusted to the end of the file 2896 2893 * (i.e., there is an implicit hole at the end of any file). 2897 2894 */ ··· 3237 3234 mark_inode_dirty_sync(inode); 3238 3235 3239 3236 return 0; 3237 + } 3238 + 3239 + /* 3240 + * Force to allocate all delay allocated clusters. 3241 + */ 3242 + int ni_allocate_da_blocks(struct ntfs_inode *ni) 3243 + { 3244 + int err; 3245 + 3246 + ni_lock(ni); 3247 + down_write(&ni->file.run_lock); 3248 + 3249 + err = ni_allocate_da_blocks_locked(ni); 3250 + 3251 + up_write(&ni->file.run_lock); 3252 + ni_unlock(ni); 3253 + 3254 + return err; 3255 + } 3256 + 3257 + /* 3258 + * Force to allocate all delay allocated clusters. 3259 + */ 3260 + int ni_allocate_da_blocks_locked(struct ntfs_inode *ni) 3261 + { 3262 + int err; 3263 + 3264 + if (!ni->file.run_da.count) 3265 + return 0; 3266 + 3267 + if (is_sparsed(ni)) { 3268 + CLST vcn, lcn, clen, alen; 3269 + bool new; 3270 + 3271 + /* 3272 + * Sparse file allocates clusters in 'attr_data_get_block_locked' 3273 + */ 3274 + while (run_get_entry(&ni->file.run_da, 0, &vcn, &lcn, &clen)) { 3275 + /* TODO: zero=true? */ 3276 + err = attr_data_get_block_locked(ni, vcn, clen, &lcn, 3277 + &alen, &new, true, 3278 + NULL, true); 3279 + if (err) 3280 + break; 3281 + if (!new) { 3282 + err = -EINVAL; 3283 + break; 3284 + } 3285 + } 3286 + } else { 3287 + /* 3288 + * Normal file allocates clusters in 'attr_set_size' 3289 + */ 3290 + err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, &ni->file.run, 3291 + ni->vfs_inode.i_size, &ni->i_valid, 3292 + false, NULL, true); 3293 + } 3294 + 3295 + return err; 3240 3296 }
+38 -15
fs/ntfs3/fsntfs.c
··· 445 445 } 446 446 447 447 /* 448 - * ntfs_check_for_free_space 448 + * ntfs_check_free_space 449 449 * 450 450 * Check if it is possible to allocate 'clen' clusters and 'mlen' Mft records 451 451 */ 452 - bool ntfs_check_for_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen) 452 + bool ntfs_check_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen, 453 + bool da) 453 454 { 454 455 size_t free, zlen, avail; 455 456 struct wnd_bitmap *wnd; 457 + CLST da_clusters = ntfs_get_da(sbi); 456 458 457 459 wnd = &sbi->used.bitmap; 458 460 down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); 459 461 free = wnd_zeroes(wnd); 462 + 463 + if (free >= da_clusters) { 464 + free -= da_clusters; 465 + } else { 466 + free = 0; 467 + } 468 + 460 469 zlen = min_t(size_t, NTFS_MIN_MFT_ZONE, wnd_zone_len(wnd)); 461 470 up_read(&wnd->rw_lock); 462 471 463 - if (free < zlen + clen) 472 + if (free < zlen + clen) { 464 473 return false; 474 + } 465 475 466 476 avail = free - (zlen + clen); 467 477 468 - wnd = &sbi->mft.bitmap; 469 - down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); 470 - free = wnd_zeroes(wnd); 471 - zlen = wnd_zone_len(wnd); 472 - up_read(&wnd->rw_lock); 478 + /* 479 + * When delalloc is active then keep in mind some reserved space. 480 + * The worst case: 1 mft record per each ~500 clusters. 481 + */ 482 + if (da) { 483 + /* 1 mft record per each 1024 clusters. */ 484 + mlen += da_clusters >> 10; 485 + } 473 486 474 - if (free >= zlen + mlen) 475 - return true; 487 + if (mlen || !avail) { 488 + wnd = &sbi->mft.bitmap; 489 + down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); 490 + free = wnd_zeroes(wnd); 491 + zlen = wnd_zone_len(wnd); 492 + up_read(&wnd->rw_lock); 476 493 477 - return avail >= bytes_to_cluster(sbi, mlen << sbi->record_bits); 494 + if (free < zlen + mlen && 495 + avail < bytes_to_cluster(sbi, mlen << sbi->record_bits)) { 496 + return false; 497 + } 498 + } 499 + 500 + return true; 478 501 } 479 502 480 503 /* ··· 532 509 533 510 /* Step 1: Resize $MFT::DATA. */ 534 511 down_write(&ni->file.run_lock); 535 - err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, 536 - new_mft_bytes, NULL, false, &attr); 512 + err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, &ni->file.run, 513 + new_mft_bytes, NULL, false, &attr, false); 537 514 538 515 if (err) { 539 516 up_write(&ni->file.run_lock); ··· 548 525 new_bitmap_bytes = ntfs3_bitmap_size(new_mft_total); 549 526 550 527 err = attr_set_size(ni, ATTR_BITMAP, NULL, 0, &sbi->mft.bitmap.run, 551 - new_bitmap_bytes, &new_bitmap_bytes, true, NULL); 528 + new_bitmap_bytes, &new_bitmap_bytes, true); 552 529 553 530 /* Refresh MFT Zone if necessary. */ 554 531 down_write_nested(&sbi->used.bitmap.rw_lock, BITMAP_MUTEX_CLUSTERS); ··· 2214 2191 if (new_sds_size > ni->vfs_inode.i_size) { 2215 2192 err = attr_set_size(ni, ATTR_DATA, SDS_NAME, 2216 2193 ARRAY_SIZE(SDS_NAME), &ni->file.run, 2217 - new_sds_size, &new_sds_size, false, NULL); 2194 + new_sds_size, &new_sds_size, false); 2218 2195 if (err) 2219 2196 goto out; 2220 2197 }
+11 -12
fs/ntfs3/index.c
··· 1446 1446 1447 1447 run_init(&run); 1448 1448 1449 - err = attr_allocate_clusters(sbi, &run, 0, 0, len, NULL, ALLOCATE_DEF, 1450 - &alen, 0, NULL, NULL); 1449 + err = attr_allocate_clusters(sbi, &run, NULL, 0, 0, len, NULL, 1450 + ALLOCATE_DEF, &alen, 0, NULL, NULL); 1451 1451 if (err) 1452 1452 goto out; 1453 1453 ··· 1531 1531 /* Increase bitmap. */ 1532 1532 err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, 1533 1533 &indx->bitmap_run, 1534 - ntfs3_bitmap_size(bit + 1), NULL, true, 1535 - NULL); 1534 + ntfs3_bitmap_size(bit + 1), NULL, true); 1536 1535 if (err) 1537 1536 goto out1; 1538 1537 } ··· 1552 1553 1553 1554 /* Increase allocation. */ 1554 1555 err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, 1555 - &indx->alloc_run, data_size, &data_size, true, 1556 - NULL); 1556 + &indx->alloc_run, data_size, &data_size, true); 1557 1557 if (err) { 1558 1558 if (bmp) 1559 1559 goto out2; ··· 1570 1572 out2: 1571 1573 /* Ops. No space? */ 1572 1574 attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, 1573 - &indx->bitmap_run, bmp_size, &bmp_size_v, false, NULL); 1575 + &indx->bitmap_run, bmp_size, &bmp_size_v, false); 1574 1576 1575 1577 out1: 1576 1578 return err; ··· 2104 2106 new_data = (u64)bit << indx->index_bits; 2105 2107 2106 2108 err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, 2107 - &indx->alloc_run, new_data, &new_data, false, NULL); 2109 + &indx->alloc_run, new_data, &new_data, false); 2108 2110 if (err) 2109 2111 return err; 2110 2112 ··· 2116 2118 return 0; 2117 2119 2118 2120 err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, 2119 - &indx->bitmap_run, bpb, &bpb, false, NULL); 2121 + &indx->bitmap_run, bpb, &bpb, false); 2120 2122 2121 2123 return err; 2122 2124 } ··· 2331 2333 hdr = &root->ihdr; 2332 2334 e = fnd->root_de; 2333 2335 n = NULL; 2336 + ib = NULL; 2334 2337 } 2335 2338 2336 2339 e_size = le16_to_cpu(e->size); ··· 2354 2355 * Check to see if removing that entry made 2355 2356 * the leaf empty. 2356 2357 */ 2357 - if (ib_is_leaf(ib) && ib_is_empty(ib)) { 2358 + if (ib && ib_is_leaf(ib) && ib_is_empty(ib)) { 2358 2359 fnd_pop(fnd); 2359 2360 fnd_push(fnd2, n, e); 2360 2361 } ··· 2602 2603 in = &s_index_names[indx->type]; 2603 2604 2604 2605 err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, 2605 - &indx->alloc_run, 0, NULL, false, NULL); 2606 + &indx->alloc_run, 0, NULL, false); 2606 2607 if (in->name == I30_NAME) 2607 2608 i_size_write(&ni->vfs_inode, 0); 2608 2609 ··· 2611 2612 run_close(&indx->alloc_run); 2612 2613 2613 2614 err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, 2614 - &indx->bitmap_run, 0, NULL, false, NULL); 2615 + &indx->bitmap_run, 0, NULL, false); 2615 2616 err = ni_remove_attr(ni, ATTR_BITMAP, in->name, in->name_len, 2616 2617 false, NULL); 2617 2618 run_close(&indx->bitmap_run);
+112 -49
fs/ntfs3/inode.c
··· 40 40 u32 rp_fa = 0, asize, t32; 41 41 u16 roff, rsize, names = 0, links = 0; 42 42 const struct ATTR_FILE_NAME *fname = NULL; 43 - const struct INDEX_ROOT *root; 43 + const struct INDEX_ROOT *root = NULL; 44 44 struct REPARSE_DATA_BUFFER rp; // 0x18 bytes 45 45 u64 t64; 46 46 struct MFT_REC *rec; ··· 556 556 557 557 static sector_t ntfs_bmap(struct address_space *mapping, sector_t block) 558 558 { 559 + struct inode *inode = mapping->host; 560 + struct ntfs_inode *ni = ntfs_i(inode); 561 + 562 + /* 563 + * We can get here for an inline file via the FIBMAP ioctl 564 + */ 565 + if (is_resident(ni)) 566 + return 0; 567 + 568 + if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && 569 + !run_is_empty(&ni->file.run_da)) { 570 + /* 571 + * With delalloc data we want to sync the file so 572 + * that we can make sure we allocate blocks for file and data 573 + * is in place for the user to see it 574 + */ 575 + ni_allocate_da_blocks(ni); 576 + } 577 + 559 578 return iomap_bmap(mapping, block, &ntfs_iomap_ops); 560 579 } 561 580 ··· 741 722 down_write(&ni->file.run_lock); 742 723 743 724 err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, 744 - &ni->i_valid, true, NULL); 725 + &ni->i_valid, true); 745 726 746 727 if (!err) { 747 728 i_size_write(inode, new_size); ··· 754 735 return err; 755 736 } 756 737 738 + /* 739 + * Special value to detect ntfs_writeback_range call 740 + */ 741 + #define WB_NO_DA (struct iomap *)1 757 742 /* 758 743 * Function to get mapping vbo -> lbo. 759 744 * used with: ··· 783 760 loff_t endbyte = offset + length; 784 761 void *res = NULL; 785 762 int err; 786 - CLST lcn, clen, clen_max; 763 + CLST lcn, clen, clen_max = 1; 787 764 bool new_clst = false; 765 + bool no_da; 766 + bool zero = false; 788 767 if (unlikely(ntfs3_forced_shutdown(sbi->sb))) 789 768 return -EIO; 790 769 791 - if ((flags & IOMAP_REPORT) && offset > ntfs_get_maxbytes(ni)) { 792 - /* called from fiemap/bmap. */ 793 - return -EINVAL; 770 + if (flags & IOMAP_REPORT) { 771 + if (offset > ntfs_get_maxbytes(ni)) { 772 + /* called from fiemap/bmap. */ 773 + return -EINVAL; 774 + } 775 + 776 + if (offset >= inode->i_size) { 777 + /* special code for report. */ 778 + return -ENOENT; 779 + } 794 780 } 795 781 796 - clen_max = rw ? (bytes_to_cluster(sbi, endbyte) - vcn) : 1; 782 + if (IOMAP_ZERO == flags && (endbyte & sbi->cluster_mask)) { 783 + rw = true; 784 + } else if (rw) { 785 + clen_max = bytes_to_cluster(sbi, endbyte) - vcn; 786 + } 797 787 798 - err = attr_data_get_block( 799 - ni, vcn, clen_max, &lcn, &clen, rw ? &new_clst : NULL, 800 - flags == IOMAP_WRITE && (off || (endbyte & sbi->cluster_mask)), 801 - &res); 788 + /* 789 + * Force to allocate clusters if directIO(write) or writeback_range. 790 + * NOTE: attr_data_get_block allocates clusters only for sparse file. 791 + * Normal file allocates clusters in attr_set_size. 792 + */ 793 + no_da = flags == (IOMAP_DIRECT | IOMAP_WRITE) || srcmap == WB_NO_DA; 794 + 795 + err = attr_data_get_block(ni, vcn, clen_max, &lcn, &clen, 796 + rw ? &new_clst : NULL, zero, &res, no_da); 802 797 803 798 if (err) { 804 799 return err; ··· 836 795 lcn = SPARSE_LCN; 837 796 } 838 797 798 + iomap->flags = new_clst ? IOMAP_F_NEW : 0; 799 + 839 800 if (lcn == RESIDENT_LCN) { 840 801 if (offset >= clen) { 841 802 kfree(res); ··· 852 809 iomap->type = IOMAP_INLINE; 853 810 iomap->offset = 0; 854 811 iomap->length = clen; /* resident size in bytes. */ 855 - iomap->flags = 0; 856 812 return 0; 857 813 } 858 814 ··· 860 818 return -EINVAL; 861 819 } 862 820 821 + iomap->bdev = inode->i_sb->s_bdev; 822 + iomap->offset = offset; 823 + iomap->length = ((loff_t)clen << cluster_bits) - off; 824 + 863 825 if (lcn == COMPRESSED_LCN) { 864 826 /* should never be here. */ 865 827 return -EOPNOTSUPP; 866 828 } 867 829 868 - iomap->flags = new_clst ? IOMAP_F_NEW : 0; 869 - iomap->bdev = inode->i_sb->s_bdev; 870 - 871 - /* Translate clusters into bytes. */ 872 - iomap->offset = offset; 873 - iomap->addr = ((loff_t)lcn << cluster_bits) + off; 874 - iomap->length = ((loff_t)clen << cluster_bits) - off; 875 - if (length && iomap->length > length) 876 - iomap->length = length; 877 - else 878 - endbyte = offset + iomap->length; 879 - 880 - if (lcn == SPARSE_LCN) { 830 + if (lcn == DELALLOC_LCN) { 831 + iomap->type = IOMAP_DELALLOC; 881 832 iomap->addr = IOMAP_NULL_ADDR; 882 - iomap->type = IOMAP_HOLE; 883 - } else if (endbyte <= ni->i_valid) { 884 - iomap->type = IOMAP_MAPPED; 885 - } else if (offset < ni->i_valid) { 886 - iomap->type = IOMAP_MAPPED; 887 - if (flags & IOMAP_REPORT) 888 - iomap->length = ni->i_valid - offset; 889 - } else if (rw || (flags & IOMAP_ZERO)) { 890 - iomap->type = IOMAP_MAPPED; 891 833 } else { 892 - iomap->type = IOMAP_UNWRITTEN; 834 + 835 + /* Translate clusters into bytes. */ 836 + iomap->addr = ((loff_t)lcn << cluster_bits) + off; 837 + if (length && iomap->length > length) 838 + iomap->length = length; 839 + else 840 + endbyte = offset + iomap->length; 841 + 842 + if (lcn == SPARSE_LCN) { 843 + iomap->addr = IOMAP_NULL_ADDR; 844 + iomap->type = IOMAP_HOLE; 845 + // if (IOMAP_ZERO == flags && !off) { 846 + // iomap->length = (endbyte - offset) & 847 + // sbi->cluster_mask_inv; 848 + // } 849 + } else if (endbyte <= ni->i_valid) { 850 + iomap->type = IOMAP_MAPPED; 851 + } else if (offset < ni->i_valid) { 852 + iomap->type = IOMAP_MAPPED; 853 + if (flags & IOMAP_REPORT) 854 + iomap->length = ni->i_valid - offset; 855 + } else if (rw || (flags & IOMAP_ZERO)) { 856 + iomap->type = IOMAP_MAPPED; 857 + } else { 858 + iomap->type = IOMAP_UNWRITTEN; 859 + } 893 860 } 894 861 895 - if ((flags & IOMAP_ZERO) && iomap->type == IOMAP_MAPPED) { 862 + if ((flags & IOMAP_ZERO) && 863 + (iomap->type == IOMAP_MAPPED || iomap->type == IOMAP_DELALLOC)) { 896 864 /* Avoid too large requests. */ 897 865 u32 tail; 898 - u32 off_a = iomap->addr & (PAGE_SIZE - 1); 866 + u32 off_a = offset & (PAGE_SIZE - 1); 899 867 if (off_a) 900 868 tail = PAGE_SIZE - off_a; 901 869 else ··· 956 904 } 957 905 } 958 906 959 - if ((flags & IOMAP_ZERO) && iomap->type == IOMAP_MAPPED) { 907 + if ((flags & IOMAP_ZERO) && 908 + (iomap->type == IOMAP_MAPPED || iomap->type == IOMAP_DELALLOC)) { 909 + /* Pair for code in ntfs_iomap_begin. */ 960 910 balance_dirty_pages_ratelimited(inode->i_mapping); 961 911 cond_resched(); 962 912 } ··· 987 933 loff_t f_pos = folio_pos(folio); 988 934 loff_t f_end = f_pos + f_size; 989 935 990 - if (ni->i_valid < end && end < f_end) { 936 + if (ni->i_valid <= end && end < f_end) { 991 937 /* zero range [end - f_end). */ 992 938 /* The only thing ntfs_iomap_put_folio used for. */ 993 939 folio_zero_segment(folio, offset_in_folio(folio, end), f_size); ··· 996 942 folio_put(folio); 997 943 } 998 944 945 + /* 946 + * iomap_writeback_ops::writeback_range 947 + */ 999 948 static ssize_t ntfs_writeback_range(struct iomap_writepage_ctx *wpc, 1000 949 struct folio *folio, u64 offset, 1001 950 unsigned int len, u64 end_pos) 1002 951 { 1003 952 struct iomap *iomap = &wpc->iomap; 1004 - struct inode *inode = wpc->inode; 1005 - 1006 953 /* Check iomap position. */ 1007 - if (!(iomap->offset <= offset && 1008 - offset < iomap->offset + iomap->length)) { 954 + if (iomap->offset + iomap->length <= offset || offset < iomap->offset) { 1009 955 int err; 956 + struct inode *inode = wpc->inode; 957 + struct ntfs_inode *ni = ntfs_i(inode); 1010 958 struct ntfs_sb_info *sbi = ntfs_sb(inode->i_sb); 1011 959 loff_t i_size_up = ntfs_up_cluster(sbi, inode->i_size); 1012 960 loff_t len_max = i_size_up - offset; 1013 961 1014 - err = ntfs_iomap_begin(inode, offset, len_max, IOMAP_WRITE, 1015 - iomap, NULL); 962 + err = ni->file.run_da.count ? ni_allocate_da_blocks(ni) : 0; 963 + 964 + if (!err) { 965 + /* Use local special value 'WB_NO_DA' to disable delalloc. */ 966 + err = ntfs_iomap_begin(inode, offset, len_max, 967 + IOMAP_WRITE, iomap, WB_NO_DA); 968 + } 969 + 1016 970 if (err) { 1017 971 ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); 1018 972 return err; ··· 1594 1532 attr->nres.alloc_size = 1595 1533 cpu_to_le64(ntfs_up_cluster(sbi, nsize)); 1596 1534 1597 - err = attr_allocate_clusters(sbi, &ni->file.run, 0, 0, 1598 - clst, NULL, ALLOCATE_DEF, 1599 - &alen, 0, NULL, NULL); 1535 + err = attr_allocate_clusters(sbi, &ni->file.run, NULL, 1536 + 0, 0, clst, NULL, 1537 + ALLOCATE_DEF, &alen, 0, 1538 + NULL, NULL); 1600 1539 if (err) 1601 1540 goto out5; 1602 1541 ··· 1738 1675 /* Delete ATTR_EA, if non-resident. */ 1739 1676 struct runs_tree run; 1740 1677 run_init(&run); 1741 - attr_set_size(ni, ATTR_EA, NULL, 0, &run, 0, NULL, false, NULL); 1678 + attr_set_size(ni, ATTR_EA, NULL, 0, &run, 0, NULL, false); 1742 1679 run_close(&run); 1743 1680 } 1744 1681
+3
fs/ntfs3/ntfs.h
··· 77 77 typedef u32 CLST; 78 78 #endif 79 79 80 + /* On-disk sparsed cluster is marked as -1. */ 80 81 #define SPARSE_LCN64 ((u64)-1) 81 82 #define SPARSE_LCN ((CLST)-1) 83 + /* Below is virtual (not on-disk) values. */ 82 84 #define RESIDENT_LCN ((CLST)-2) 83 85 #define COMPRESSED_LCN ((CLST)-3) 84 86 #define EOF_LCN ((CLST)-4) 87 + #define DELALLOC_LCN ((CLST)-5) 85 88 86 89 enum RECORD_NUM { 87 90 MFT_REC_MFT = 0,
+78 -13
fs/ntfs3/ntfs_fs.h
··· 108 108 unsigned force : 1; /* RW mount dirty volume. */ 109 109 unsigned prealloc : 1; /* Preallocate space when file is growing. */ 110 110 unsigned nocase : 1; /* case insensitive. */ 111 + unsigned delalloc : 1; /* delay allocation. */ 111 112 }; 112 113 113 114 /* Special value to unpack and deallocate. */ ··· 133 132 enum ALLOCATE_OPT { 134 133 ALLOCATE_DEF = 0, // Allocate all clusters. 135 134 ALLOCATE_MFT = 1, // Allocate for MFT. 136 - ALLOCATE_ZERO = 2, // Zeroout new allocated clusters 135 + ALLOCATE_ZERO = 2, // Zeroout new allocated clusters. 136 + ALLOCATE_ONE_FR = 4, // Allocate one fragment only. 137 137 }; 138 138 139 139 enum bitmap_mutex_classes { ··· 215 213 216 214 u32 discard_granularity; 217 215 u64 discard_granularity_mask_inv; // ~(discard_granularity_mask_inv-1) 218 - u32 bdev_blocksize_mask; // bdev_logical_block_size(bdev) - 1; 216 + u32 bdev_blocksize; // bdev_logical_block_size(bdev) 219 217 220 218 u32 cluster_size; // bytes per cluster 221 219 u32 cluster_mask; // == cluster_size - 1 ··· 274 272 struct { 275 273 struct wnd_bitmap bitmap; // $Bitmap::Data 276 274 CLST next_free_lcn; 275 + /* Total sum of delay allocated clusters in all files. */ 276 + #ifdef CONFIG_NTFS3_64BIT_CLUSTER 277 + atomic64_t da; 278 + #else 279 + atomic_t da; 280 + #endif 277 281 } used; 278 282 279 283 struct { ··· 387 379 */ 388 380 u8 mi_loaded; 389 381 390 - /* 382 + /* 391 383 * Use this field to avoid any write(s). 392 384 * If inode is bad during initialization - use make_bad_inode 393 385 * If inode is bad during operations - use this field ··· 398 390 struct ntfs_index dir; 399 391 struct { 400 392 struct rw_semaphore run_lock; 393 + /* Unpacked runs from just one record. */ 401 394 struct runs_tree run; 395 + /* 396 + * Pairs [vcn, len] for all delay allocated clusters. 397 + * Normal file always contains delayed clusters in one fragment. 398 + * TODO: use 2 CLST per pair instead of 3. 399 + */ 400 + struct runs_tree run_da; 402 401 #ifdef CONFIG_NTFS3_LZX_XPRESS 403 402 struct folio *offs_folio; 404 403 #endif ··· 445 430 446 431 /* Functions from attrib.c */ 447 432 int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, 448 - CLST vcn, CLST lcn, CLST len, CLST *pre_alloc, 449 - enum ALLOCATE_OPT opt, CLST *alen, const size_t fr, 450 - CLST *new_lcn, CLST *new_len); 433 + struct runs_tree *run_da, CLST vcn, CLST lcn, 434 + CLST len, CLST *pre_alloc, enum ALLOCATE_OPT opt, 435 + CLST *alen, const size_t fr, CLST *new_lcn, 436 + CLST *new_len); 451 437 int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, 452 438 struct ATTR_LIST_ENTRY *le, struct mft_inode *mi, 453 439 u64 new_size, struct runs_tree *run, 454 440 struct ATTRIB **ins_attr, struct page *page); 455 - int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, 456 - const __le16 *name, u8 name_len, struct runs_tree *run, 457 - u64 new_size, const u64 *new_valid, bool keep_prealloc, 458 - struct ATTRIB **ret); 441 + int attr_set_size_ex(struct ntfs_inode *ni, enum ATTR_TYPE type, 442 + const __le16 *name, u8 name_len, struct runs_tree *run, 443 + u64 new_size, const u64 *new_valid, bool keep_prealloc, 444 + struct ATTRIB **ret, bool no_da); 445 + static inline int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, 446 + const __le16 *name, u8 name_len, 447 + struct runs_tree *run, u64 new_size, 448 + const u64 *new_valid, bool keep_prealloc) 449 + { 450 + return attr_set_size_ex(ni, type, name, name_len, run, new_size, 451 + new_valid, keep_prealloc, NULL, false); 452 + } 459 453 int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, 460 - CLST *len, bool *new, bool zero, void **res); 454 + CLST *len, bool *new, bool zero, void **res, 455 + bool no_da); 456 + int attr_data_get_block_locked(struct ntfs_inode *ni, CLST vcn, CLST clen, 457 + CLST *lcn, CLST *len, bool *new, bool zero, 458 + void **res, bool no_da); 461 459 int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio); 462 460 int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type, 463 461 const __le16 *name, u8 name_len, struct runs_tree *run, ··· 618 590 bool ni_is_dirty(struct inode *inode); 619 591 loff_t ni_seek_data_or_hole(struct ntfs_inode *ni, loff_t offset, bool data); 620 592 int ni_write_parents(struct ntfs_inode *ni, int sync); 593 + int ni_allocate_da_blocks(struct ntfs_inode *ni); 594 + int ni_allocate_da_blocks_locked(struct ntfs_inode *ni); 621 595 622 596 /* Globals from fslog.c */ 623 597 bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes); ··· 635 605 int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len, 636 606 CLST *new_lcn, CLST *new_len, 637 607 enum ALLOCATE_OPT opt); 638 - bool ntfs_check_for_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen); 608 + bool ntfs_check_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen, 609 + bool da); 639 610 int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft, 640 611 struct ntfs_inode *ni, struct mft_inode **mi); 641 612 void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno, bool is_mft); ··· 862 831 bool run_add_entry(struct runs_tree *run, CLST vcn, CLST lcn, CLST len, 863 832 bool is_mft); 864 833 bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len, CLST sub); 865 - bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len); 834 + int run_insert_range(struct runs_tree *run, CLST vcn, CLST len); 835 + int run_insert_range_da(struct runs_tree *run, CLST vcn, CLST len); 866 836 bool run_get_entry(const struct runs_tree *run, size_t index, CLST *vcn, 867 837 CLST *lcn, CLST *len); 868 838 bool run_is_mapped_full(const struct runs_tree *run, CLST svcn, CLST evcn); ··· 883 851 #endif 884 852 int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn); 885 853 int run_clone(const struct runs_tree *run, struct runs_tree *new_run); 854 + bool run_remove_range(struct runs_tree *run, CLST vcn, CLST len, CLST *done); 855 + CLST run_len(const struct runs_tree *run); 856 + CLST run_get_max_vcn(const struct runs_tree *run); 886 857 887 858 /* Globals from super.c */ 888 859 void *ntfs_set_shared(void *ptr, u32 bytes); ··· 1060 1025 static inline int ntfs3_forced_shutdown(struct super_block *sb) 1061 1026 { 1062 1027 return test_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags); 1028 + } 1029 + 1030 + /* Returns total sum of delay allocated clusters in all files. */ 1031 + static inline CLST ntfs_get_da(struct ntfs_sb_info *sbi) 1032 + { 1033 + #ifdef CONFIG_NTFS3_64BIT_CLUSTER 1034 + return atomic64_read(&sbi->used.da); 1035 + #else 1036 + return atomic_read(&sbi->used.da); 1037 + #endif 1038 + } 1039 + 1040 + /* Update total count of delay allocated clusters. */ 1041 + static inline void ntfs_add_da(struct ntfs_sb_info *sbi, CLST da) 1042 + { 1043 + #ifdef CONFIG_NTFS3_64BIT_CLUSTER 1044 + atomic64_add(da, &sbi->used.da); 1045 + #else 1046 + atomic_add(da, &sbi->used.da); 1047 + #endif 1048 + } 1049 + 1050 + /* Update total count of delay allocated clusters. */ 1051 + static inline void ntfs_sub_da(struct ntfs_sb_info *sbi, CLST da) 1052 + { 1053 + #ifdef CONFIG_NTFS3_64BIT_CLUSTER 1054 + atomic64_sub(da, &sbi->used.da); 1055 + #else 1056 + atomic_sub(da, &sbi->used.da); 1057 + #endif 1063 1058 } 1064 1059 1065 1060 /*
+141 -9
fs/ntfs3/run.c
··· 454 454 455 455 /* 456 456 * If existing range fits then were done. 457 - * Otherwise extend found one and fall back to range jocode. 457 + * Otherwise extend found one and fall back to range join code. 458 458 */ 459 459 if (r->vcn + r->len < vcn + len) 460 460 r->len += len - ((r->vcn + r->len) - vcn); ··· 482 482 return true; 483 483 } 484 484 485 - /* run_collapse_range 485 + /* 486 + * run_collapse_range 486 487 * 487 488 * Helper for attr_collapse_range(), 488 489 * which is helper for fallocate(collapse_range). ··· 494 493 struct ntfs_run *r, *e, *eat_start, *eat_end; 495 494 CLST end; 496 495 497 - if (WARN_ON(!run_lookup(run, vcn, &index))) 498 - return true; /* Should never be here. */ 496 + if (!run_lookup(run, vcn, &index) && index >= run->count) { 497 + return true; 498 + } 499 499 500 500 e = run->runs + run->count; 501 501 r = run->runs + index; ··· 562 560 * Helper for attr_insert_range(), 563 561 * which is helper for fallocate(insert_range). 564 562 */ 565 - bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len) 563 + int run_insert_range(struct runs_tree *run, CLST vcn, CLST len) 566 564 { 567 565 size_t index; 568 566 struct ntfs_run *r, *e; 569 567 570 568 if (WARN_ON(!run_lookup(run, vcn, &index))) 571 - return false; /* Should never be here. */ 569 + return -EINVAL; /* Should never be here. */ 572 570 573 571 e = run->runs + run->count; 574 572 r = run->runs + index; ··· 590 588 r->len = len1; 591 589 592 590 if (!run_add_entry(run, vcn + len, lcn2, len2, false)) 593 - return false; 591 + return -ENOMEM; 594 592 } 595 593 596 594 if (!run_add_entry(run, vcn, SPARSE_LCN, len, false)) 597 - return false; 595 + return -ENOMEM; 598 596 599 - return true; 597 + return 0; 598 + } 599 + 600 + /* run_insert_range_da 601 + * 602 + * Helper for attr_insert_range(), 603 + * which is helper for fallocate(insert_range). 604 + */ 605 + int run_insert_range_da(struct runs_tree *run, CLST vcn, CLST len) 606 + { 607 + struct ntfs_run *r, *r0 = NULL, *e = run->runs + run->count; 608 + ; 609 + 610 + for (r = run->runs; r < e; r++) { 611 + CLST end = r->vcn + r->len; 612 + 613 + if (vcn >= end) 614 + continue; 615 + 616 + if (!r0 && r->vcn < vcn) { 617 + r0 = r; 618 + } else { 619 + r->vcn += len; 620 + } 621 + } 622 + 623 + if (r0) { 624 + /* split fragment. */ 625 + CLST len1 = vcn - r0->vcn; 626 + CLST len2 = r0->len - len1; 627 + 628 + r0->len = len1; 629 + if (!run_add_entry(run, vcn + len, SPARSE_LCN, len2, false)) 630 + return -ENOMEM; 631 + } 632 + 633 + return 0; 600 634 } 601 635 602 636 /* ··· 1246 1208 memcpy(new_run->runs, run->runs, bytes); 1247 1209 new_run->count = run->count; 1248 1210 return 0; 1211 + } 1212 + 1213 + /* 1214 + * run_remove_range 1215 + * 1216 + */ 1217 + bool run_remove_range(struct runs_tree *run, CLST vcn, CLST len, CLST *done) 1218 + { 1219 + size_t index, eat; 1220 + struct ntfs_run *r, *e, *eat_start, *eat_end; 1221 + CLST end, d; 1222 + 1223 + *done = 0; 1224 + 1225 + /* Fast check. */ 1226 + if (!run->count) 1227 + return true; 1228 + 1229 + if (!run_lookup(run, vcn, &index) && index >= run->count) { 1230 + /* No entries in this run. */ 1231 + return true; 1232 + } 1233 + 1234 + 1235 + e = run->runs + run->count; 1236 + r = run->runs + index; 1237 + end = vcn + len; 1238 + 1239 + if (vcn > r->vcn) { 1240 + CLST r_end = r->vcn + r->len; 1241 + d = vcn - r->vcn; 1242 + 1243 + if (r_end > end) { 1244 + /* Remove a middle part, split. */ 1245 + *done += len; 1246 + r->len = d; 1247 + return run_add_entry(run, end, r->lcn, r_end - end, 1248 + false); 1249 + } 1250 + /* Remove tail of run .*/ 1251 + *done += r->len - d; 1252 + r->len = d; 1253 + r += 1; 1254 + } 1255 + 1256 + eat_start = r; 1257 + eat_end = r; 1258 + 1259 + for (; r < e; r++) { 1260 + if (r->vcn >= end) 1261 + continue; 1262 + 1263 + if (r->vcn + r->len <= end) { 1264 + /* Eat this run. */ 1265 + *done += r->len; 1266 + eat_end = r + 1; 1267 + continue; 1268 + } 1269 + 1270 + d = end - r->vcn; 1271 + *done += d; 1272 + if (r->lcn != SPARSE_LCN) 1273 + r->lcn += d; 1274 + r->len -= d; 1275 + r->vcn = end; 1276 + } 1277 + 1278 + eat = eat_end - eat_start; 1279 + memmove(eat_start, eat_end, (e - eat_end) * sizeof(*r)); 1280 + run->count -= eat; 1281 + 1282 + return true; 1283 + } 1284 + 1285 + CLST run_len(const struct runs_tree *run) 1286 + { 1287 + const struct ntfs_run *r, *e; 1288 + CLST len = 0; 1289 + 1290 + for (r = run->runs, e = r + run->count; r < e; r++) { 1291 + len += r->len; 1292 + } 1293 + 1294 + return len; 1295 + } 1296 + 1297 + CLST run_get_max_vcn(const struct runs_tree *run) 1298 + { 1299 + const struct ntfs_run *r; 1300 + if (!run->count) 1301 + return 0; 1302 + 1303 + r = run->runs + run->count - 1; 1304 + return r->vcn + r->len; 1249 1305 }
+24 -4
fs/ntfs3/super.c
··· 269 269 Opt_prealloc, 270 270 Opt_prealloc_bool, 271 271 Opt_nocase, 272 + Opt_delalloc, 273 + Opt_delalloc_bool, 272 274 Opt_err, 273 275 }; 274 276 ··· 295 293 fsparam_flag("prealloc", Opt_prealloc), 296 294 fsparam_bool("prealloc", Opt_prealloc_bool), 297 295 fsparam_flag("nocase", Opt_nocase), 296 + fsparam_flag("delalloc", Opt_delalloc), 297 + fsparam_bool("delalloc", Opt_delalloc_bool), 298 298 {} 299 299 }; 300 300 // clang-format on ··· 413 409 break; 414 410 case Opt_nocase: 415 411 opts->nocase = 1; 412 + break; 413 + case Opt_delalloc: 414 + opts->delalloc = 1; 415 + break; 416 + case Opt_delalloc_bool: 417 + opts->delalloc = result.boolean; 416 418 break; 417 419 default: 418 420 /* Should not be here unless we forget add case. */ ··· 736 726 struct super_block *sb = dentry->d_sb; 737 727 struct ntfs_sb_info *sbi = sb->s_fs_info; 738 728 struct wnd_bitmap *wnd = &sbi->used.bitmap; 729 + CLST da_clusters = ntfs_get_da(sbi); 739 730 740 731 buf->f_type = sb->s_magic; 741 - buf->f_bsize = sbi->cluster_size; 732 + buf->f_bsize = buf->f_frsize = sbi->cluster_size; 742 733 buf->f_blocks = wnd->nbits; 743 734 744 - buf->f_bfree = buf->f_bavail = wnd_zeroes(wnd); 735 + buf->f_bfree = wnd_zeroes(wnd); 736 + if (buf->f_bfree > da_clusters) { 737 + buf->f_bfree -= da_clusters; 738 + } else { 739 + buf->f_bfree = 0; 740 + } 741 + buf->f_bavail = buf->f_bfree; 742 + 745 743 buf->f_fsid.val[0] = sbi->volume.ser_num; 746 - buf->f_fsid.val[1] = (sbi->volume.ser_num >> 32); 744 + buf->f_fsid.val[1] = sbi->volume.ser_num >> 32; 747 745 buf->f_namelen = NTFS_NAME_LEN; 748 746 749 747 return 0; ··· 796 778 seq_puts(m, ",prealloc"); 797 779 if (opts->nocase) 798 780 seq_puts(m, ",nocase"); 781 + if (opts->delalloc) 782 + seq_puts(m, ",delalloc"); 799 783 800 784 return 0; 801 785 } ··· 1108 1088 dev_size += sector_size - 1; 1109 1089 } 1110 1090 1111 - sbi->bdev_blocksize_mask = max(boot_sector_size, sector_size) - 1; 1091 + sbi->bdev_blocksize = max(boot_sector_size, sector_size); 1112 1092 sbi->mft.lbo = mlcn << cluster_bits; 1113 1093 sbi->mft.lbo2 = mlcn2 << cluster_bits; 1114 1094
+1 -1
fs/ntfs3/xattr.c
··· 460 460 461 461 new_sz = size; 462 462 err = attr_set_size(ni, ATTR_EA, NULL, 0, &ea_run, new_sz, &new_sz, 463 - false, NULL); 463 + false); 464 464 if (err) 465 465 goto out; 466 466