Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

ntfs: update iomap and address space operations

Update the address space operations to use the iomap framework,
replacing legacy buffer-head based code.

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Hyunchul Lee <hyc.lee@gmail.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>

+990 -1601
+120 -1601
fs/ntfs/aops.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0-or-later 2 2 /* 3 - * aops.c - NTFS kernel address space operations and page cache handling. 3 + * NTFS kernel address space operations and page cache handling. 4 4 * 5 5 * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. 6 6 * Copyright (c) 2002 Richard Russon 7 + * Copyright (c) 2025 LG Electronics Co., Ltd. 7 8 */ 8 9 9 - #include <linux/errno.h> 10 - #include <linux/fs.h> 11 - #include <linux/gfp.h> 12 - #include <linux/mm.h> 13 - #include <linux/pagemap.h> 14 - #include <linux/swap.h> 15 - #include <linux/buffer_head.h> 16 10 #include <linux/writeback.h> 17 - #include <linux/bit_spinlock.h> 18 - #include <linux/bio.h> 19 11 20 - #include "aops.h" 21 12 #include "attrib.h" 22 - #include "debug.h" 23 - #include "inode.h" 24 13 #include "mft.h" 25 - #include "runlist.h" 26 - #include "types.h" 27 14 #include "ntfs.h" 15 + #include "debug.h" 16 + #include "iomap.h" 28 17 29 - /** 30 - * ntfs_end_buffer_async_read - async io completion for reading attributes 31 - * @bh: buffer head on which io is completed 32 - * @uptodate: whether @bh is now uptodate or not 33 - * 34 - * Asynchronous I/O completion handler for reading pages belonging to the 35 - * attribute address space of an inode. The inodes can either be files or 36 - * directories or they can be fake inodes describing some attribute. 37 - * 38 - * If NInoMstProtected(), perform the post read mst fixups when all IO on the 39 - * page has been completed and mark the page uptodate or set the error bit on 40 - * the page. To determine the size of the records that need fixing up, we 41 - * cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs 42 - * record size, and index_block_size_bits, to the log(base 2) of the ntfs 43 - * record size. 44 - */ 45 - static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) 46 - { 47 - unsigned long flags; 48 - struct buffer_head *first, *tmp; 49 - struct page *page; 50 - struct inode *vi; 51 - ntfs_inode *ni; 52 - int page_uptodate = 1; 53 - 54 - page = bh->b_page; 55 - vi = page->mapping->host; 56 - ni = NTFS_I(vi); 57 - 58 - if (likely(uptodate)) { 59 - loff_t i_size; 60 - s64 file_ofs, init_size; 61 - 62 - set_buffer_uptodate(bh); 63 - 64 - file_ofs = ((s64)page->index << PAGE_SHIFT) + 65 - bh_offset(bh); 66 - read_lock_irqsave(&ni->size_lock, flags); 67 - init_size = ni->initialized_size; 68 - i_size = i_size_read(vi); 69 - read_unlock_irqrestore(&ni->size_lock, flags); 70 - if (unlikely(init_size > i_size)) { 71 - /* Race with shrinking truncate. */ 72 - init_size = i_size; 73 - } 74 - /* Check for the current buffer head overflowing. */ 75 - if (unlikely(file_ofs + bh->b_size > init_size)) { 76 - int ofs; 77 - void *kaddr; 78 - 79 - ofs = 0; 80 - if (file_ofs < init_size) 81 - ofs = init_size - file_ofs; 82 - kaddr = kmap_atomic(page); 83 - memset(kaddr + bh_offset(bh) + ofs, 0, 84 - bh->b_size - ofs); 85 - flush_dcache_page(page); 86 - kunmap_atomic(kaddr); 87 - } 88 - } else { 89 - clear_buffer_uptodate(bh); 90 - SetPageError(page); 91 - ntfs_error(ni->vol->sb, "Buffer I/O error, logical block " 92 - "0x%llx.", (unsigned long long)bh->b_blocknr); 93 - } 94 - first = page_buffers(page); 95 - spin_lock_irqsave(&first->b_uptodate_lock, flags); 96 - clear_buffer_async_read(bh); 97 - unlock_buffer(bh); 98 - tmp = bh; 99 - do { 100 - if (!buffer_uptodate(tmp)) 101 - page_uptodate = 0; 102 - if (buffer_async_read(tmp)) { 103 - if (likely(buffer_locked(tmp))) 104 - goto still_busy; 105 - /* Async buffers must be locked. */ 106 - BUG(); 107 - } 108 - tmp = tmp->b_this_page; 109 - } while (tmp != bh); 110 - spin_unlock_irqrestore(&first->b_uptodate_lock, flags); 111 - /* 112 - * If none of the buffers had errors then we can set the page uptodate, 113 - * but we first have to perform the post read mst fixups, if the 114 - * attribute is mst protected, i.e. if NInoMstProteced(ni) is true. 115 - * Note we ignore fixup errors as those are detected when 116 - * map_mft_record() is called which gives us per record granularity 117 - * rather than per page granularity. 118 - */ 119 - if (!NInoMstProtected(ni)) { 120 - if (likely(page_uptodate && !PageError(page))) 121 - SetPageUptodate(page); 122 - } else { 123 - u8 *kaddr; 124 - unsigned int i, recs; 125 - u32 rec_size; 126 - 127 - rec_size = ni->itype.index.block_size; 128 - recs = PAGE_SIZE / rec_size; 129 - /* Should have been verified before we got here... */ 130 - BUG_ON(!recs); 131 - kaddr = kmap_atomic(page); 132 - for (i = 0; i < recs; i++) 133 - post_read_mst_fixup((NTFS_RECORD*)(kaddr + 134 - i * rec_size), rec_size); 135 - kunmap_atomic(kaddr); 136 - flush_dcache_page(page); 137 - if (likely(page_uptodate && !PageError(page))) 138 - SetPageUptodate(page); 139 - } 140 - unlock_page(page); 141 - return; 142 - still_busy: 143 - spin_unlock_irqrestore(&first->b_uptodate_lock, flags); 144 - return; 145 - } 146 - 147 - /** 148 - * ntfs_read_block - fill a @folio of an address space with data 149 - * @folio: page cache folio to fill with data 150 - * 151 - * We read each buffer asynchronously and when all buffers are read in, our io 152 - * completion handler ntfs_end_buffer_read_async(), if required, automatically 153 - * applies the mst fixups to the folio before finally marking it uptodate and 154 - * unlocking it. 155 - * 156 - * We only enforce allocated_size limit because i_size is checked for in 157 - * generic_file_read(). 158 - * 159 - * Return 0 on success and -errno on error. 160 - * 161 - * Contains an adapted version of fs/buffer.c::block_read_full_folio(). 162 - */ 163 - static int ntfs_read_block(struct folio *folio) 164 - { 165 - loff_t i_size; 166 - VCN vcn; 167 - LCN lcn; 168 - s64 init_size; 169 - struct inode *vi; 170 - ntfs_inode *ni; 171 - ntfs_volume *vol; 172 - runlist_element *rl; 173 - struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 174 - sector_t iblock, lblock, zblock; 175 - unsigned long flags; 176 - unsigned int blocksize, vcn_ofs; 177 - int i, nr; 178 - unsigned char blocksize_bits; 179 - 180 - vi = folio->mapping->host; 181 - ni = NTFS_I(vi); 182 - vol = ni->vol; 183 - 184 - /* $MFT/$DATA must have its complete runlist in memory at all times. */ 185 - BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni)); 186 - 187 - blocksize = vol->sb->s_blocksize; 188 - blocksize_bits = vol->sb->s_blocksize_bits; 189 - 190 - head = folio_buffers(folio); 191 - if (!head) 192 - head = create_empty_buffers(folio, blocksize, 0); 193 - bh = head; 194 - 195 - /* 196 - * We may be racing with truncate. To avoid some of the problems we 197 - * now take a snapshot of the various sizes and use those for the whole 198 - * of the function. In case of an extending truncate it just means we 199 - * may leave some buffers unmapped which are now allocated. This is 200 - * not a problem since these buffers will just get mapped when a write 201 - * occurs. In case of a shrinking truncate, we will detect this later 202 - * on due to the runlist being incomplete and if the folio is being 203 - * fully truncated, truncate will throw it away as soon as we unlock 204 - * it so no need to worry what we do with it. 205 - */ 206 - iblock = (s64)folio->index << (PAGE_SHIFT - blocksize_bits); 207 - read_lock_irqsave(&ni->size_lock, flags); 208 - lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; 209 - init_size = ni->initialized_size; 210 - i_size = i_size_read(vi); 211 - read_unlock_irqrestore(&ni->size_lock, flags); 212 - if (unlikely(init_size > i_size)) { 213 - /* Race with shrinking truncate. */ 214 - init_size = i_size; 215 - } 216 - zblock = (init_size + blocksize - 1) >> blocksize_bits; 217 - 218 - /* Loop through all the buffers in the folio. */ 219 - rl = NULL; 220 - nr = i = 0; 221 - do { 222 - int err = 0; 223 - 224 - if (unlikely(buffer_uptodate(bh))) 225 - continue; 226 - if (unlikely(buffer_mapped(bh))) { 227 - arr[nr++] = bh; 228 - continue; 229 - } 230 - bh->b_bdev = vol->sb->s_bdev; 231 - /* Is the block within the allowed limits? */ 232 - if (iblock < lblock) { 233 - bool is_retry = false; 234 - 235 - /* Convert iblock into corresponding vcn and offset. */ 236 - vcn = (VCN)iblock << blocksize_bits >> 237 - vol->cluster_size_bits; 238 - vcn_ofs = ((VCN)iblock << blocksize_bits) & 239 - vol->cluster_size_mask; 240 - if (!rl) { 241 - lock_retry_remap: 242 - down_read(&ni->runlist.lock); 243 - rl = ni->runlist.rl; 244 - } 245 - if (likely(rl != NULL)) { 246 - /* Seek to element containing target vcn. */ 247 - while (rl->length && rl[1].vcn <= vcn) 248 - rl++; 249 - lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 250 - } else 251 - lcn = LCN_RL_NOT_MAPPED; 252 - /* Successful remap. */ 253 - if (lcn >= 0) { 254 - /* Setup buffer head to correct block. */ 255 - bh->b_blocknr = ((lcn << vol->cluster_size_bits) 256 - + vcn_ofs) >> blocksize_bits; 257 - set_buffer_mapped(bh); 258 - /* Only read initialized data blocks. */ 259 - if (iblock < zblock) { 260 - arr[nr++] = bh; 261 - continue; 262 - } 263 - /* Fully non-initialized data block, zero it. */ 264 - goto handle_zblock; 265 - } 266 - /* It is a hole, need to zero it. */ 267 - if (lcn == LCN_HOLE) 268 - goto handle_hole; 269 - /* If first try and runlist unmapped, map and retry. */ 270 - if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { 271 - is_retry = true; 272 - /* 273 - * Attempt to map runlist, dropping lock for 274 - * the duration. 275 - */ 276 - up_read(&ni->runlist.lock); 277 - err = ntfs_map_runlist(ni, vcn); 278 - if (likely(!err)) 279 - goto lock_retry_remap; 280 - rl = NULL; 281 - } else if (!rl) 282 - up_read(&ni->runlist.lock); 283 - /* 284 - * If buffer is outside the runlist, treat it as a 285 - * hole. This can happen due to concurrent truncate 286 - * for example. 287 - */ 288 - if (err == -ENOENT || lcn == LCN_ENOENT) { 289 - err = 0; 290 - goto handle_hole; 291 - } 292 - /* Hard error, zero out region. */ 293 - if (!err) 294 - err = -EIO; 295 - bh->b_blocknr = -1; 296 - folio_set_error(folio); 297 - ntfs_error(vol->sb, "Failed to read from inode 0x%lx, " 298 - "attribute type 0x%x, vcn 0x%llx, " 299 - "offset 0x%x because its location on " 300 - "disk could not be determined%s " 301 - "(error code %i).", ni->mft_no, 302 - ni->type, (unsigned long long)vcn, 303 - vcn_ofs, is_retry ? " even after " 304 - "retrying" : "", err); 305 - } 306 - /* 307 - * Either iblock was outside lblock limits or 308 - * ntfs_rl_vcn_to_lcn() returned error. Just zero that portion 309 - * of the folio and set the buffer uptodate. 310 - */ 311 - handle_hole: 312 - bh->b_blocknr = -1UL; 313 - clear_buffer_mapped(bh); 314 - handle_zblock: 315 - folio_zero_range(folio, i * blocksize, blocksize); 316 - if (likely(!err)) 317 - set_buffer_uptodate(bh); 318 - } while (i++, iblock++, (bh = bh->b_this_page) != head); 319 - 320 - /* Release the lock if we took it. */ 321 - if (rl) 322 - up_read(&ni->runlist.lock); 323 - 324 - /* Check we have at least one buffer ready for i/o. */ 325 - if (nr) { 326 - struct buffer_head *tbh; 327 - 328 - /* Lock the buffers. */ 329 - for (i = 0; i < nr; i++) { 330 - tbh = arr[i]; 331 - lock_buffer(tbh); 332 - tbh->b_end_io = ntfs_end_buffer_async_read; 333 - set_buffer_async_read(tbh); 334 - } 335 - /* Finally, start i/o on the buffers. */ 336 - for (i = 0; i < nr; i++) { 337 - tbh = arr[i]; 338 - if (likely(!buffer_uptodate(tbh))) 339 - submit_bh(REQ_OP_READ, tbh); 340 - else 341 - ntfs_end_buffer_async_read(tbh, 1); 342 - } 343 - return 0; 344 - } 345 - /* No i/o was scheduled on any of the buffers. */ 346 - if (likely(!folio_test_error(folio))) 347 - folio_mark_uptodate(folio); 348 - else /* Signal synchronous i/o error. */ 349 - nr = -EIO; 350 - folio_unlock(folio); 351 - return nr; 352 - } 353 - 354 - /** 355 - * ntfs_read_folio - fill a @folio of a @file with data from the device 18 + /* 19 + * ntfs_read_folio - Read data for a folio from the device 356 20 * @file: open file to which the folio @folio belongs or NULL 357 21 * @folio: page cache folio to fill with data 358 22 * 359 - * For non-resident attributes, ntfs_read_folio() fills the @folio of the open 360 - * file @file by calling the ntfs version of the generic block_read_full_folio() 361 - * function, ntfs_read_block(), which in turn creates and reads in the buffers 362 - * associated with the folio asynchronously. 23 + * This function handles reading data into the page cache. It first checks 24 + * for specific ntfs attribute type like encryption and compression. 363 25 * 364 - * For resident attributes, OTOH, ntfs_read_folio() fills @folio by copying the 365 - * data from the mft record (which at this stage is most likely in memory) and 366 - * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as 367 - * even if the mft record is not cached at this point in time, we need to wait 368 - * for it to be read in before we can do the copy. 26 + * - If the attribute is encrypted, access is denied (-EACCES) because 27 + * decryption is not supported in this path. 28 + * - If the attribute is non-resident and compressed, the read operation is 29 + * delegated to ntfs_read_compressed_block(). 30 + * - For normal resident or non-resident attribute, it utilizes the generic 31 + * iomap infrastructure via iomap_bio_read_folio() to perform the I/O. 369 32 * 370 - * Return 0 on success and -errno on error. 33 + * Return: 0 on success, or -errno on error. 371 34 */ 372 35 static int ntfs_read_folio(struct file *file, struct folio *folio) 373 36 { 374 - struct page *page = &folio->page; 375 - loff_t i_size; 376 - struct inode *vi; 377 - ntfs_inode *ni, *base_ni; 378 - u8 *addr; 379 - ntfs_attr_search_ctx *ctx; 380 - MFT_RECORD *mrec; 381 - unsigned long flags; 382 - u32 attr_len; 383 - int err = 0; 37 + struct ntfs_inode *ni = NTFS_I(folio->mapping->host); 384 38 385 - retry_readpage: 386 - BUG_ON(!PageLocked(page)); 387 - vi = page->mapping->host; 388 - i_size = i_size_read(vi); 389 - /* Is the page fully outside i_size? (truncate in progress) */ 390 - if (unlikely(page->index >= (i_size + PAGE_SIZE - 1) >> 391 - PAGE_SHIFT)) { 392 - zero_user(page, 0, PAGE_SIZE); 393 - ntfs_debug("Read outside i_size - truncated?"); 394 - goto done; 395 - } 396 - /* 397 - * This can potentially happen because we clear PageUptodate() during 398 - * ntfs_writepage() of MstProtected() attributes. 399 - */ 400 - if (PageUptodate(page)) { 401 - unlock_page(page); 402 - return 0; 403 - } 404 - ni = NTFS_I(vi); 405 39 /* 406 40 * Only $DATA attributes can be encrypted and only unnamed $DATA 407 41 * attributes can be compressed. Index root can have the flags set but ··· 45 411 * index inodes. 46 412 */ 47 413 if (ni->type != AT_INDEX_ALLOCATION) { 48 - /* If attribute is encrypted, deny access, just like NT4. */ 49 - if (NInoEncrypted(ni)) { 50 - BUG_ON(ni->type != AT_DATA); 51 - err = -EACCES; 52 - goto err_out; 53 - } 54 - /* Compressed data streams are handled in compress.c. */ 55 - if (NInoNonResident(ni) && NInoCompressed(ni)) { 56 - BUG_ON(ni->type != AT_DATA); 57 - BUG_ON(ni->name_len); 58 - return ntfs_read_compressed_block(page); 59 - } 60 - } 61 - /* NInoNonResident() == NInoIndexAllocPresent() */ 62 - if (NInoNonResident(ni)) { 63 - /* Normal, non-resident data stream. */ 64 - return ntfs_read_block(folio); 65 - } 66 - /* 67 - * Attribute is resident, implying it is not compressed or encrypted. 68 - * This also means the attribute is smaller than an mft record and 69 - * hence smaller than a page, so can simply zero out any pages with 70 - * index above 0. Note the attribute can actually be marked compressed 71 - * but if it is resident the actual data is not compressed so we are 72 - * ok to ignore the compressed flag here. 73 - */ 74 - if (unlikely(page->index > 0)) { 75 - zero_user(page, 0, PAGE_SIZE); 76 - goto done; 77 - } 78 - if (!NInoAttr(ni)) 79 - base_ni = ni; 80 - else 81 - base_ni = ni->ext.base_ntfs_ino; 82 - /* Map, pin, and lock the mft record. */ 83 - mrec = map_mft_record(base_ni); 84 - if (IS_ERR(mrec)) { 85 - err = PTR_ERR(mrec); 86 - goto err_out; 87 - } 88 - /* 89 - * If a parallel write made the attribute non-resident, drop the mft 90 - * record and retry the read_folio. 91 - */ 92 - if (unlikely(NInoNonResident(ni))) { 93 - unmap_mft_record(base_ni); 94 - goto retry_readpage; 95 - } 96 - ctx = ntfs_attr_get_search_ctx(base_ni, mrec); 97 - if (unlikely(!ctx)) { 98 - err = -ENOMEM; 99 - goto unm_err_out; 100 - } 101 - err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 102 - CASE_SENSITIVE, 0, NULL, 0, ctx); 103 - if (unlikely(err)) 104 - goto put_unm_err_out; 105 - attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 106 - read_lock_irqsave(&ni->size_lock, flags); 107 - if (unlikely(attr_len > ni->initialized_size)) 108 - attr_len = ni->initialized_size; 109 - i_size = i_size_read(vi); 110 - read_unlock_irqrestore(&ni->size_lock, flags); 111 - if (unlikely(attr_len > i_size)) { 112 - /* Race with shrinking truncate. */ 113 - attr_len = i_size; 114 - } 115 - addr = kmap_atomic(page); 116 - /* Copy the data to the page. */ 117 - memcpy(addr, (u8*)ctx->attr + 118 - le16_to_cpu(ctx->attr->data.resident.value_offset), 119 - attr_len); 120 - /* Zero the remainder of the page. */ 121 - memset(addr + attr_len, 0, PAGE_SIZE - attr_len); 122 - flush_dcache_page(page); 123 - kunmap_atomic(addr); 124 - put_unm_err_out: 125 - ntfs_attr_put_search_ctx(ctx); 126 - unm_err_out: 127 - unmap_mft_record(base_ni); 128 - done: 129 - SetPageUptodate(page); 130 - err_out: 131 - unlock_page(page); 132 - return err; 133 - } 134 - 135 - #ifdef NTFS_RW 136 - 137 - /** 138 - * ntfs_write_block - write a @folio to the backing store 139 - * @folio: page cache folio to write out 140 - * @wbc: writeback control structure 141 - * 142 - * This function is for writing folios belonging to non-resident, non-mst 143 - * protected attributes to their backing store. 144 - * 145 - * For a folio with buffers, map and write the dirty buffers asynchronously 146 - * under folio writeback. For a folio without buffers, create buffers for the 147 - * folio, then proceed as above. 148 - * 149 - * If a folio doesn't have buffers the folio dirty state is definitive. If 150 - * a folio does have buffers, the folio dirty state is just a hint, 151 - * and the buffer dirty state is definitive. (A hint which has rules: 152 - * dirty buffers against a clean folio is illegal. Other combinations are 153 - * legal and need to be handled. In particular a dirty folio containing 154 - * clean buffers for example.) 155 - * 156 - * Return 0 on success and -errno on error. 157 - * 158 - * Based on ntfs_read_block() and __block_write_full_folio(). 159 - */ 160 - static int ntfs_write_block(struct folio *folio, struct writeback_control *wbc) 161 - { 162 - VCN vcn; 163 - LCN lcn; 164 - s64 initialized_size; 165 - loff_t i_size; 166 - sector_t block, dblock, iblock; 167 - struct inode *vi; 168 - ntfs_inode *ni; 169 - ntfs_volume *vol; 170 - runlist_element *rl; 171 - struct buffer_head *bh, *head; 172 - unsigned long flags; 173 - unsigned int blocksize, vcn_ofs; 174 - int err; 175 - bool need_end_writeback; 176 - unsigned char blocksize_bits; 177 - 178 - vi = folio->mapping->host; 179 - ni = NTFS_I(vi); 180 - vol = ni->vol; 181 - 182 - ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " 183 - "0x%lx.", ni->mft_no, ni->type, folio->index); 184 - 185 - BUG_ON(!NInoNonResident(ni)); 186 - BUG_ON(NInoMstProtected(ni)); 187 - blocksize = vol->sb->s_blocksize; 188 - blocksize_bits = vol->sb->s_blocksize_bits; 189 - head = folio_buffers(folio); 190 - if (!head) { 191 - BUG_ON(!folio_test_uptodate(folio)); 192 - head = create_empty_buffers(folio, blocksize, 193 - (1 << BH_Uptodate) | (1 << BH_Dirty)); 194 - } 195 - bh = head; 196 - 197 - /* NOTE: Different naming scheme to ntfs_read_block()! */ 198 - 199 - /* The first block in the folio. */ 200 - block = (s64)folio->index << (PAGE_SHIFT - blocksize_bits); 201 - 202 - read_lock_irqsave(&ni->size_lock, flags); 203 - i_size = i_size_read(vi); 204 - initialized_size = ni->initialized_size; 205 - read_unlock_irqrestore(&ni->size_lock, flags); 206 - 207 - /* The first out of bounds block for the data size. */ 208 - dblock = (i_size + blocksize - 1) >> blocksize_bits; 209 - 210 - /* The last (fully or partially) initialized block. */ 211 - iblock = initialized_size >> blocksize_bits; 212 - 213 - /* 214 - * Be very careful. We have no exclusion from block_dirty_folio 215 - * here, and the (potentially unmapped) buffers may become dirty at 216 - * any time. If a buffer becomes dirty here after we've inspected it 217 - * then we just miss that fact, and the folio stays dirty. 218 - * 219 - * Buffers outside i_size may be dirtied by block_dirty_folio; 220 - * handle that here by just cleaning them. 221 - */ 222 - 223 - /* 224 - * Loop through all the buffers in the folio, mapping all the dirty 225 - * buffers to disk addresses and handling any aliases from the 226 - * underlying block device's mapping. 227 - */ 228 - rl = NULL; 229 - err = 0; 230 - do { 231 - bool is_retry = false; 232 - 233 - if (unlikely(block >= dblock)) { 234 - /* 235 - * Mapped buffers outside i_size will occur, because 236 - * this folio can be outside i_size when there is a 237 - * truncate in progress. The contents of such buffers 238 - * were zeroed by ntfs_writepage(). 239 - * 240 - * FIXME: What about the small race window where 241 - * ntfs_writepage() has not done any clearing because 242 - * the folio was within i_size but before we get here, 243 - * vmtruncate() modifies i_size? 244 - */ 245 - clear_buffer_dirty(bh); 246 - set_buffer_uptodate(bh); 247 - continue; 248 - } 249 - 250 - /* Clean buffers are not written out, so no need to map them. */ 251 - if (!buffer_dirty(bh)) 252 - continue; 253 - 254 - /* Make sure we have enough initialized size. */ 255 - if (unlikely((block >= iblock) && 256 - (initialized_size < i_size))) { 257 - /* 258 - * If this folio is fully outside initialized 259 - * size, zero out all folios between the current 260 - * initialized size and the current folio. Just 261 - * use ntfs_read_folio() to do the zeroing 262 - * transparently. 263 - */ 264 - if (block > iblock) { 265 - // TODO: 266 - // For each folio do: 267 - // - read_cache_folio() 268 - // Again for each folio do: 269 - // - wait_on_folio_locked() 270 - // - Check (folio_test_uptodate(folio) && 271 - // !folio_test_error(folio)) 272 - // Update initialized size in the attribute and 273 - // in the inode. 274 - // Again, for each folio do: 275 - // block_dirty_folio(); 276 - // folio_put() 277 - // We don't need to wait on the writes. 278 - // Update iblock. 279 - } 280 - /* 281 - * The current folio straddles initialized size. Zero 282 - * all non-uptodate buffers and set them uptodate (and 283 - * dirty?). Note, there aren't any non-uptodate buffers 284 - * if the folio is uptodate. 285 - * FIXME: For an uptodate folio, the buffers may need to 286 - * be written out because they were not initialized on 287 - * disk before. 288 - */ 289 - if (!folio_test_uptodate(folio)) { 290 - // TODO: 291 - // Zero any non-uptodate buffers up to i_size. 292 - // Set them uptodate and dirty. 293 - } 294 - // TODO: 295 - // Update initialized size in the attribute and in the 296 - // inode (up to i_size). 297 - // Update iblock. 298 - // FIXME: This is inefficient. Try to batch the two 299 - // size changes to happen in one go. 300 - ntfs_error(vol->sb, "Writing beyond initialized size " 301 - "is not supported yet. Sorry."); 302 - err = -EOPNOTSUPP; 303 - break; 304 - // Do NOT set_buffer_new() BUT DO clear buffer range 305 - // outside write request range. 306 - // set_buffer_uptodate() on complete buffers as well as 307 - // set_buffer_dirty(). 308 - } 309 - 310 - /* No need to map buffers that are already mapped. */ 311 - if (buffer_mapped(bh)) 312 - continue; 313 - 314 - /* Unmapped, dirty buffer. Need to map it. */ 315 - bh->b_bdev = vol->sb->s_bdev; 316 - 317 - /* Convert block into corresponding vcn and offset. */ 318 - vcn = (VCN)block << blocksize_bits; 319 - vcn_ofs = vcn & vol->cluster_size_mask; 320 - vcn >>= vol->cluster_size_bits; 321 - if (!rl) { 322 - lock_retry_remap: 323 - down_read(&ni->runlist.lock); 324 - rl = ni->runlist.rl; 325 - } 326 - if (likely(rl != NULL)) { 327 - /* Seek to element containing target vcn. */ 328 - while (rl->length && rl[1].vcn <= vcn) 329 - rl++; 330 - lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 331 - } else 332 - lcn = LCN_RL_NOT_MAPPED; 333 - /* Successful remap. */ 334 - if (lcn >= 0) { 335 - /* Setup buffer head to point to correct block. */ 336 - bh->b_blocknr = ((lcn << vol->cluster_size_bits) + 337 - vcn_ofs) >> blocksize_bits; 338 - set_buffer_mapped(bh); 339 - continue; 340 - } 341 - /* It is a hole, need to instantiate it. */ 342 - if (lcn == LCN_HOLE) { 343 - u8 *kaddr; 344 - unsigned long *bpos, *bend; 345 - 346 - /* Check if the buffer is zero. */ 347 - kaddr = kmap_local_folio(folio, bh_offset(bh)); 348 - bpos = (unsigned long *)kaddr; 349 - bend = (unsigned long *)(kaddr + blocksize); 350 - do { 351 - if (unlikely(*bpos)) 352 - break; 353 - } while (likely(++bpos < bend)); 354 - kunmap_local(kaddr); 355 - if (bpos == bend) { 356 - /* 357 - * Buffer is zero and sparse, no need to write 358 - * it. 359 - */ 360 - bh->b_blocknr = -1; 361 - clear_buffer_dirty(bh); 362 - continue; 363 - } 364 - // TODO: Instantiate the hole. 365 - // clear_buffer_new(bh); 366 - // clean_bdev_bh_alias(bh); 367 - ntfs_error(vol->sb, "Writing into sparse regions is " 368 - "not supported yet. Sorry."); 369 - err = -EOPNOTSUPP; 370 - break; 371 - } 372 - /* If first try and runlist unmapped, map and retry. */ 373 - if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { 374 - is_retry = true; 375 - /* 376 - * Attempt to map runlist, dropping lock for 377 - * the duration. 378 - */ 379 - up_read(&ni->runlist.lock); 380 - err = ntfs_map_runlist(ni, vcn); 381 - if (likely(!err)) 382 - goto lock_retry_remap; 383 - rl = NULL; 384 - } else if (!rl) 385 - up_read(&ni->runlist.lock); 386 414 /* 387 - * If buffer is outside the runlist, truncate has cut it out 388 - * of the runlist. Just clean and clear the buffer and set it 389 - * uptodate so it can get discarded by the VM. 415 + * EFS-encrypted files are not supported. 416 + * (decryption/encryption is not implemented yet) 390 417 */ 391 - if (err == -ENOENT || lcn == LCN_ENOENT) { 392 - bh->b_blocknr = -1; 393 - clear_buffer_dirty(bh); 394 - folio_zero_range(folio, bh_offset(bh), blocksize); 395 - set_buffer_uptodate(bh); 396 - err = 0; 397 - continue; 398 - } 399 - /* Failed to map the buffer, even after retrying. */ 400 - if (!err) 401 - err = -EIO; 402 - bh->b_blocknr = -1; 403 - ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " 404 - "attribute type 0x%x, vcn 0x%llx, offset 0x%x " 405 - "because its location on disk could not be " 406 - "determined%s (error code %i).", ni->mft_no, 407 - ni->type, (unsigned long long)vcn, 408 - vcn_ofs, is_retry ? " even after " 409 - "retrying" : "", err); 410 - break; 411 - } while (block++, (bh = bh->b_this_page) != head); 412 - 413 - /* Release the lock if we took it. */ 414 - if (rl) 415 - up_read(&ni->runlist.lock); 416 - 417 - /* For the error case, need to reset bh to the beginning. */ 418 - bh = head; 419 - 420 - /* Just an optimization, so ->read_folio() is not called later. */ 421 - if (unlikely(!folio_test_uptodate(folio))) { 422 - int uptodate = 1; 423 - do { 424 - if (!buffer_uptodate(bh)) { 425 - uptodate = 0; 426 - bh = head; 427 - break; 428 - } 429 - } while ((bh = bh->b_this_page) != head); 430 - if (uptodate) 431 - folio_mark_uptodate(folio); 432 - } 433 - 434 - /* Setup all mapped, dirty buffers for async write i/o. */ 435 - do { 436 - if (buffer_mapped(bh) && buffer_dirty(bh)) { 437 - lock_buffer(bh); 438 - if (test_clear_buffer_dirty(bh)) { 439 - BUG_ON(!buffer_uptodate(bh)); 440 - mark_buffer_async_write(bh); 441 - } else 442 - unlock_buffer(bh); 443 - } else if (unlikely(err)) { 444 - /* 445 - * For the error case. The buffer may have been set 446 - * dirty during attachment to a dirty folio. 447 - */ 448 - if (err != -ENOMEM) 449 - clear_buffer_dirty(bh); 450 - } 451 - } while ((bh = bh->b_this_page) != head); 452 - 453 - if (unlikely(err)) { 454 - // TODO: Remove the -EOPNOTSUPP check later on... 455 - if (unlikely(err == -EOPNOTSUPP)) 456 - err = 0; 457 - else if (err == -ENOMEM) { 458 - ntfs_warning(vol->sb, "Error allocating memory. " 459 - "Redirtying folio so we try again " 460 - "later."); 461 - /* 462 - * Put the folio back on mapping->dirty_pages, but 463 - * leave its buffer's dirty state as-is. 464 - */ 465 - folio_redirty_for_writepage(wbc, folio); 466 - err = 0; 467 - } else 468 - folio_set_error(folio); 469 - } 470 - 471 - BUG_ON(folio_test_writeback(folio)); 472 - folio_start_writeback(folio); /* Keeps try_to_free_buffers() away. */ 473 - 474 - /* Submit the prepared buffers for i/o. */ 475 - need_end_writeback = true; 476 - do { 477 - struct buffer_head *next = bh->b_this_page; 478 - if (buffer_async_write(bh)) { 479 - submit_bh(REQ_OP_WRITE, bh); 480 - need_end_writeback = false; 481 - } 482 - bh = next; 483 - } while (bh != head); 484 - folio_unlock(folio); 485 - 486 - /* If no i/o was started, need to end writeback here. */ 487 - if (unlikely(need_end_writeback)) 488 - folio_end_writeback(folio); 489 - 490 - ntfs_debug("Done."); 491 - return err; 492 - } 493 - 494 - /** 495 - * ntfs_write_mst_block - write a @page to the backing store 496 - * @page: page cache page to write out 497 - * @wbc: writeback control structure 498 - * 499 - * This function is for writing pages belonging to non-resident, mst protected 500 - * attributes to their backing store. The only supported attributes are index 501 - * allocation and $MFT/$DATA. Both directory inodes and index inodes are 502 - * supported for the index allocation case. 503 - * 504 - * The page must remain locked for the duration of the write because we apply 505 - * the mst fixups, write, and then undo the fixups, so if we were to unlock the 506 - * page before undoing the fixups, any other user of the page will see the 507 - * page contents as corrupt. 508 - * 509 - * We clear the page uptodate flag for the duration of the function to ensure 510 - * exclusion for the $MFT/$DATA case against someone mapping an mft record we 511 - * are about to apply the mst fixups to. 512 - * 513 - * Return 0 on success and -errno on error. 514 - * 515 - * Based on ntfs_write_block(), ntfs_mft_writepage(), and 516 - * write_mft_record_nolock(). 517 - */ 518 - static int ntfs_write_mst_block(struct page *page, 519 - struct writeback_control *wbc) 520 - { 521 - sector_t block, dblock, rec_block; 522 - struct inode *vi = page->mapping->host; 523 - ntfs_inode *ni = NTFS_I(vi); 524 - ntfs_volume *vol = ni->vol; 525 - u8 *kaddr; 526 - unsigned int rec_size = ni->itype.index.block_size; 527 - ntfs_inode *locked_nis[PAGE_SIZE / NTFS_BLOCK_SIZE]; 528 - struct buffer_head *bh, *head, *tbh, *rec_start_bh; 529 - struct buffer_head *bhs[MAX_BUF_PER_PAGE]; 530 - runlist_element *rl; 531 - int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2; 532 - unsigned bh_size, rec_size_bits; 533 - bool sync, is_mft, page_is_dirty, rec_is_dirty; 534 - unsigned char bh_size_bits; 535 - 536 - if (WARN_ON(rec_size < NTFS_BLOCK_SIZE)) 537 - return -EINVAL; 538 - 539 - ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " 540 - "0x%lx.", vi->i_ino, ni->type, page->index); 541 - BUG_ON(!NInoNonResident(ni)); 542 - BUG_ON(!NInoMstProtected(ni)); 543 - is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino); 544 - /* 545 - * NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page 546 - * in its page cache were to be marked dirty. However this should 547 - * never happen with the current driver and considering we do not 548 - * handle this case here we do want to BUG(), at least for now. 549 - */ 550 - BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) || 551 - (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION))); 552 - bh_size = vol->sb->s_blocksize; 553 - bh_size_bits = vol->sb->s_blocksize_bits; 554 - max_bhs = PAGE_SIZE / bh_size; 555 - BUG_ON(!max_bhs); 556 - BUG_ON(max_bhs > MAX_BUF_PER_PAGE); 557 - 558 - /* Were we called for sync purposes? */ 559 - sync = (wbc->sync_mode == WB_SYNC_ALL); 560 - 561 - /* Make sure we have mapped buffers. */ 562 - bh = head = page_buffers(page); 563 - BUG_ON(!bh); 564 - 565 - rec_size_bits = ni->itype.index.block_size_bits; 566 - BUG_ON(!(PAGE_SIZE >> rec_size_bits)); 567 - bhs_per_rec = rec_size >> bh_size_bits; 568 - BUG_ON(!bhs_per_rec); 569 - 570 - /* The first block in the page. */ 571 - rec_block = block = (sector_t)page->index << 572 - (PAGE_SHIFT - bh_size_bits); 573 - 574 - /* The first out of bounds block for the data size. */ 575 - dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits; 576 - 577 - rl = NULL; 578 - err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0; 579 - page_is_dirty = rec_is_dirty = false; 580 - rec_start_bh = NULL; 581 - do { 582 - bool is_retry = false; 583 - 584 - if (likely(block < rec_block)) { 585 - if (unlikely(block >= dblock)) { 586 - clear_buffer_dirty(bh); 587 - set_buffer_uptodate(bh); 588 - continue; 589 - } 590 - /* 591 - * This block is not the first one in the record. We 592 - * ignore the buffer's dirty state because we could 593 - * have raced with a parallel mark_ntfs_record_dirty(). 594 - */ 595 - if (!rec_is_dirty) 596 - continue; 597 - if (unlikely(err2)) { 598 - if (err2 != -ENOMEM) 599 - clear_buffer_dirty(bh); 600 - continue; 601 - } 602 - } else /* if (block == rec_block) */ { 603 - BUG_ON(block > rec_block); 604 - /* This block is the first one in the record. */ 605 - rec_block += bhs_per_rec; 606 - err2 = 0; 607 - if (unlikely(block >= dblock)) { 608 - clear_buffer_dirty(bh); 609 - continue; 610 - } 611 - if (!buffer_dirty(bh)) { 612 - /* Clean records are not written out. */ 613 - rec_is_dirty = false; 614 - continue; 615 - } 616 - rec_is_dirty = true; 617 - rec_start_bh = bh; 618 - } 619 - /* Need to map the buffer if it is not mapped already. */ 620 - if (unlikely(!buffer_mapped(bh))) { 621 - VCN vcn; 622 - LCN lcn; 623 - unsigned int vcn_ofs; 624 - 625 - bh->b_bdev = vol->sb->s_bdev; 626 - /* Obtain the vcn and offset of the current block. */ 627 - vcn = (VCN)block << bh_size_bits; 628 - vcn_ofs = vcn & vol->cluster_size_mask; 629 - vcn >>= vol->cluster_size_bits; 630 - if (!rl) { 631 - lock_retry_remap: 632 - down_read(&ni->runlist.lock); 633 - rl = ni->runlist.rl; 634 - } 635 - if (likely(rl != NULL)) { 636 - /* Seek to element containing target vcn. */ 637 - while (rl->length && rl[1].vcn <= vcn) 638 - rl++; 639 - lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 640 - } else 641 - lcn = LCN_RL_NOT_MAPPED; 642 - /* Successful remap. */ 643 - if (likely(lcn >= 0)) { 644 - /* Setup buffer head to correct block. */ 645 - bh->b_blocknr = ((lcn << 646 - vol->cluster_size_bits) + 647 - vcn_ofs) >> bh_size_bits; 648 - set_buffer_mapped(bh); 649 - } else { 650 - /* 651 - * Remap failed. Retry to map the runlist once 652 - * unless we are working on $MFT which always 653 - * has the whole of its runlist in memory. 654 - */ 655 - if (!is_mft && !is_retry && 656 - lcn == LCN_RL_NOT_MAPPED) { 657 - is_retry = true; 658 - /* 659 - * Attempt to map runlist, dropping 660 - * lock for the duration. 661 - */ 662 - up_read(&ni->runlist.lock); 663 - err2 = ntfs_map_runlist(ni, vcn); 664 - if (likely(!err2)) 665 - goto lock_retry_remap; 666 - if (err2 == -ENOMEM) 667 - page_is_dirty = true; 668 - lcn = err2; 669 - } else { 670 - err2 = -EIO; 671 - if (!rl) 672 - up_read(&ni->runlist.lock); 673 - } 674 - /* Hard error. Abort writing this record. */ 675 - if (!err || err == -ENOMEM) 676 - err = err2; 677 - bh->b_blocknr = -1; 678 - ntfs_error(vol->sb, "Cannot write ntfs record " 679 - "0x%llx (inode 0x%lx, " 680 - "attribute type 0x%x) because " 681 - "its location on disk could " 682 - "not be determined (error " 683 - "code %lli).", 684 - (long long)block << 685 - bh_size_bits >> 686 - vol->mft_record_size_bits, 687 - ni->mft_no, ni->type, 688 - (long long)lcn); 689 - /* 690 - * If this is not the first buffer, remove the 691 - * buffers in this record from the list of 692 - * buffers to write and clear their dirty bit 693 - * if not error -ENOMEM. 694 - */ 695 - if (rec_start_bh != bh) { 696 - while (bhs[--nr_bhs] != rec_start_bh) 697 - ; 698 - if (err2 != -ENOMEM) { 699 - do { 700 - clear_buffer_dirty( 701 - rec_start_bh); 702 - } while ((rec_start_bh = 703 - rec_start_bh-> 704 - b_this_page) != 705 - bh); 706 - } 707 - } 708 - continue; 709 - } 710 - } 711 - BUG_ON(!buffer_uptodate(bh)); 712 - BUG_ON(nr_bhs >= max_bhs); 713 - bhs[nr_bhs++] = bh; 714 - } while (block++, (bh = bh->b_this_page) != head); 715 - if (unlikely(rl)) 716 - up_read(&ni->runlist.lock); 717 - /* If there were no dirty buffers, we are done. */ 718 - if (!nr_bhs) 719 - goto done; 720 - /* Map the page so we can access its contents. */ 721 - kaddr = kmap(page); 722 - /* Clear the page uptodate flag whilst the mst fixups are applied. */ 723 - BUG_ON(!PageUptodate(page)); 724 - ClearPageUptodate(page); 725 - for (i = 0; i < nr_bhs; i++) { 726 - unsigned int ofs; 727 - 728 - /* Skip buffers which are not at the beginning of records. */ 729 - if (i % bhs_per_rec) 730 - continue; 731 - tbh = bhs[i]; 732 - ofs = bh_offset(tbh); 733 - if (is_mft) { 734 - ntfs_inode *tni; 735 - unsigned long mft_no; 736 - 737 - /* Get the mft record number. */ 738 - mft_no = (((s64)page->index << PAGE_SHIFT) + ofs) 739 - >> rec_size_bits; 740 - /* Check whether to write this mft record. */ 741 - tni = NULL; 742 - if (!ntfs_may_write_mft_record(vol, mft_no, 743 - (MFT_RECORD*)(kaddr + ofs), &tni)) { 744 - /* 745 - * The record should not be written. This 746 - * means we need to redirty the page before 747 - * returning. 748 - */ 749 - page_is_dirty = true; 750 - /* 751 - * Remove the buffers in this mft record from 752 - * the list of buffers to write. 753 - */ 754 - do { 755 - bhs[i] = NULL; 756 - } while (++i % bhs_per_rec); 757 - continue; 758 - } 759 - /* 760 - * The record should be written. If a locked ntfs 761 - * inode was returned, add it to the array of locked 762 - * ntfs inodes. 763 - */ 764 - if (tni) 765 - locked_nis[nr_locked_nis++] = tni; 766 - } 767 - /* Apply the mst protection fixups. */ 768 - err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs), 769 - rec_size); 770 - if (unlikely(err2)) { 771 - if (!err || err == -ENOMEM) 772 - err = -EIO; 773 - ntfs_error(vol->sb, "Failed to apply mst fixups " 774 - "(inode 0x%lx, attribute type 0x%x, " 775 - "page index 0x%lx, page offset 0x%x)!" 776 - " Unmount and run chkdsk.", vi->i_ino, 777 - ni->type, page->index, ofs); 778 - /* 779 - * Mark all the buffers in this record clean as we do 780 - * not want to write corrupt data to disk. 781 - */ 782 - do { 783 - clear_buffer_dirty(bhs[i]); 784 - bhs[i] = NULL; 785 - } while (++i % bhs_per_rec); 786 - continue; 787 - } 788 - nr_recs++; 789 - } 790 - /* If no records are to be written out, we are done. */ 791 - if (!nr_recs) 792 - goto unm_done; 793 - flush_dcache_page(page); 794 - /* Lock buffers and start synchronous write i/o on them. */ 795 - for (i = 0; i < nr_bhs; i++) { 796 - tbh = bhs[i]; 797 - if (!tbh) 798 - continue; 799 - if (!trylock_buffer(tbh)) 800 - BUG(); 801 - /* The buffer dirty state is now irrelevant, just clean it. */ 802 - clear_buffer_dirty(tbh); 803 - BUG_ON(!buffer_uptodate(tbh)); 804 - BUG_ON(!buffer_mapped(tbh)); 805 - get_bh(tbh); 806 - tbh->b_end_io = end_buffer_write_sync; 807 - submit_bh(REQ_OP_WRITE, tbh); 808 - } 809 - /* Synchronize the mft mirror now if not @sync. */ 810 - if (is_mft && !sync) 811 - goto do_mirror; 812 - do_wait: 813 - /* Wait on i/o completion of buffers. */ 814 - for (i = 0; i < nr_bhs; i++) { 815 - tbh = bhs[i]; 816 - if (!tbh) 817 - continue; 818 - wait_on_buffer(tbh); 819 - if (unlikely(!buffer_uptodate(tbh))) { 820 - ntfs_error(vol->sb, "I/O error while writing ntfs " 821 - "record buffer (inode 0x%lx, " 822 - "attribute type 0x%x, page index " 823 - "0x%lx, page offset 0x%lx)! Unmount " 824 - "and run chkdsk.", vi->i_ino, ni->type, 825 - page->index, bh_offset(tbh)); 826 - if (!err || err == -ENOMEM) 827 - err = -EIO; 828 - /* 829 - * Set the buffer uptodate so the page and buffer 830 - * states do not become out of sync. 831 - */ 832 - set_buffer_uptodate(tbh); 833 - } 834 - } 835 - /* If @sync, now synchronize the mft mirror. */ 836 - if (is_mft && sync) { 837 - do_mirror: 838 - for (i = 0; i < nr_bhs; i++) { 839 - unsigned long mft_no; 840 - unsigned int ofs; 841 - 842 - /* 843 - * Skip buffers which are not at the beginning of 844 - * records. 845 - */ 846 - if (i % bhs_per_rec) 847 - continue; 848 - tbh = bhs[i]; 849 - /* Skip removed buffers (and hence records). */ 850 - if (!tbh) 851 - continue; 852 - ofs = bh_offset(tbh); 853 - /* Get the mft record number. */ 854 - mft_no = (((s64)page->index << PAGE_SHIFT) + ofs) 855 - >> rec_size_bits; 856 - if (mft_no < vol->mftmirr_size) 857 - ntfs_sync_mft_mirror(vol, mft_no, 858 - (MFT_RECORD*)(kaddr + ofs), 859 - sync); 860 - } 861 - if (!sync) 862 - goto do_wait; 863 - } 864 - /* Remove the mst protection fixups again. */ 865 - for (i = 0; i < nr_bhs; i++) { 866 - if (!(i % bhs_per_rec)) { 867 - tbh = bhs[i]; 868 - if (!tbh) 869 - continue; 870 - post_write_mst_fixup((NTFS_RECORD*)(kaddr + 871 - bh_offset(tbh))); 872 - } 873 - } 874 - flush_dcache_page(page); 875 - unm_done: 876 - /* Unlock any locked inodes. */ 877 - while (nr_locked_nis-- > 0) { 878 - ntfs_inode *tni, *base_tni; 879 - 880 - tni = locked_nis[nr_locked_nis]; 881 - /* Get the base inode. */ 882 - mutex_lock(&tni->extent_lock); 883 - if (tni->nr_extents >= 0) 884 - base_tni = tni; 885 - else { 886 - base_tni = tni->ext.base_ntfs_ino; 887 - BUG_ON(!base_tni); 888 - } 889 - mutex_unlock(&tni->extent_lock); 890 - ntfs_debug("Unlocking %s inode 0x%lx.", 891 - tni == base_tni ? "base" : "extent", 892 - tni->mft_no); 893 - mutex_unlock(&tni->mrec_lock); 894 - atomic_dec(&tni->count); 895 - iput(VFS_I(base_tni)); 896 - } 897 - SetPageUptodate(page); 898 - kunmap(page); 899 - done: 900 - if (unlikely(err && err != -ENOMEM)) { 901 - /* 902 - * Set page error if there is only one ntfs record in the page. 903 - * Otherwise we would loose per-record granularity. 904 - */ 905 - if (ni->itype.index.block_size == PAGE_SIZE) 906 - SetPageError(page); 907 - NVolSetErrors(vol); 908 - } 909 - if (page_is_dirty) { 910 - ntfs_debug("Page still contains one or more dirty ntfs " 911 - "records. Redirtying the page starting at " 912 - "record 0x%lx.", page->index << 913 - (PAGE_SHIFT - rec_size_bits)); 914 - redirty_page_for_writepage(wbc, page); 915 - unlock_page(page); 916 - } else { 917 - /* 918 - * Keep the VM happy. This must be done otherwise the 919 - * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though 920 - * the page is clean. 921 - */ 922 - BUG_ON(PageWriteback(page)); 923 - set_page_writeback(page); 924 - unlock_page(page); 925 - end_page_writeback(page); 926 - } 927 - if (likely(!err)) 928 - ntfs_debug("Done."); 929 - return err; 930 - } 931 - 932 - /** 933 - * ntfs_writepage - write a @page to the backing store 934 - * @page: page cache page to write out 935 - * @wbc: writeback control structure 936 - * 937 - * This is called from the VM when it wants to have a dirty ntfs page cache 938 - * page cleaned. The VM has already locked the page and marked it clean. 939 - * 940 - * For non-resident attributes, ntfs_writepage() writes the @page by calling 941 - * the ntfs version of the generic block_write_full_folio() function, 942 - * ntfs_write_block(), which in turn if necessary creates and writes the 943 - * buffers associated with the page asynchronously. 944 - * 945 - * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying 946 - * the data to the mft record (which at this stage is most likely in memory). 947 - * The mft record is then marked dirty and written out asynchronously via the 948 - * vfs inode dirty code path for the inode the mft record belongs to or via the 949 - * vm page dirty code path for the page the mft record is in. 950 - * 951 - * Based on ntfs_read_folio() and fs/buffer.c::block_write_full_folio(). 952 - * 953 - * Return 0 on success and -errno on error. 954 - */ 955 - static int ntfs_writepage(struct page *page, struct writeback_control *wbc) 956 - { 957 - struct folio *folio = page_folio(page); 958 - loff_t i_size; 959 - struct inode *vi = folio->mapping->host; 960 - ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi); 961 - char *addr; 962 - ntfs_attr_search_ctx *ctx = NULL; 963 - MFT_RECORD *m = NULL; 964 - u32 attr_len; 965 - int err; 966 - 967 - retry_writepage: 968 - BUG_ON(!folio_test_locked(folio)); 969 - i_size = i_size_read(vi); 970 - /* Is the folio fully outside i_size? (truncate in progress) */ 971 - if (unlikely(folio->index >= (i_size + PAGE_SIZE - 1) >> 972 - PAGE_SHIFT)) { 973 - /* 974 - * The folio may have dirty, unmapped buffers. Make them 975 - * freeable here, so the page does not leak. 976 - */ 977 - block_invalidate_folio(folio, 0, folio_size(folio)); 978 - folio_unlock(folio); 979 - ntfs_debug("Write outside i_size - truncated?"); 980 - return 0; 981 - } 982 - /* 983 - * Only $DATA attributes can be encrypted and only unnamed $DATA 984 - * attributes can be compressed. Index root can have the flags set but 985 - * this means to create compressed/encrypted files, not that the 986 - * attribute is compressed/encrypted. Note we need to check for 987 - * AT_INDEX_ALLOCATION since this is the type of both directory and 988 - * index inodes. 989 - */ 990 - if (ni->type != AT_INDEX_ALLOCATION) { 991 - /* If file is encrypted, deny access, just like NT4. */ 992 418 if (NInoEncrypted(ni)) { 993 419 folio_unlock(folio); 994 - BUG_ON(ni->type != AT_DATA); 995 - ntfs_debug("Denying write access to encrypted file."); 996 - return -EACCES; 420 + return -EOPNOTSUPP; 997 421 } 998 422 /* Compressed data streams are handled in compress.c. */ 999 - if (NInoNonResident(ni) && NInoCompressed(ni)) { 1000 - BUG_ON(ni->type != AT_DATA); 1001 - BUG_ON(ni->name_len); 1002 - // TODO: Implement and replace this with 1003 - // return ntfs_write_compressed_block(page); 1004 - folio_unlock(folio); 1005 - ntfs_error(vi->i_sb, "Writing to compressed files is " 1006 - "not supported yet. Sorry."); 1007 - return -EOPNOTSUPP; 1008 - } 1009 - // TODO: Implement and remove this check. 1010 - if (NInoNonResident(ni) && NInoSparse(ni)) { 1011 - folio_unlock(folio); 1012 - ntfs_error(vi->i_sb, "Writing to sparse files is not " 1013 - "supported yet. Sorry."); 1014 - return -EOPNOTSUPP; 1015 - } 423 + if (NInoNonResident(ni) && NInoCompressed(ni)) 424 + return ntfs_read_compressed_block(folio); 1016 425 } 1017 - /* NInoNonResident() == NInoIndexAllocPresent() */ 1018 - if (NInoNonResident(ni)) { 1019 - /* We have to zero every time due to mmap-at-end-of-file. */ 1020 - if (folio->index >= (i_size >> PAGE_SHIFT)) { 1021 - /* The folio straddles i_size. */ 1022 - unsigned int ofs = i_size & (folio_size(folio) - 1); 1023 - folio_zero_segment(folio, ofs, folio_size(folio)); 1024 - } 1025 - /* Handle mst protected attributes. */ 1026 - if (NInoMstProtected(ni)) 1027 - return ntfs_write_mst_block(page, wbc); 1028 - /* Normal, non-resident data stream. */ 1029 - return ntfs_write_block(folio, wbc); 1030 - } 1031 - /* 1032 - * Attribute is resident, implying it is not compressed, encrypted, or 1033 - * mst protected. This also means the attribute is smaller than an mft 1034 - * record and hence smaller than a folio, so can simply return error on 1035 - * any folios with index above 0. Note the attribute can actually be 1036 - * marked compressed but if it is resident the actual data is not 1037 - * compressed so we are ok to ignore the compressed flag here. 1038 - */ 1039 - BUG_ON(folio_buffers(folio)); 1040 - BUG_ON(!folio_test_uptodate(folio)); 1041 - if (unlikely(folio->index > 0)) { 1042 - ntfs_error(vi->i_sb, "BUG()! folio->index (0x%lx) > 0. " 1043 - "Aborting write.", folio->index); 1044 - BUG_ON(folio_test_writeback(folio)); 1045 - folio_start_writeback(folio); 1046 - folio_unlock(folio); 1047 - folio_end_writeback(folio); 1048 - return -EIO; 1049 - } 1050 - if (!NInoAttr(ni)) 1051 - base_ni = ni; 1052 - else 1053 - base_ni = ni->ext.base_ntfs_ino; 1054 - /* Map, pin, and lock the mft record. */ 1055 - m = map_mft_record(base_ni); 1056 - if (IS_ERR(m)) { 1057 - err = PTR_ERR(m); 1058 - m = NULL; 1059 - ctx = NULL; 1060 - goto err_out; 1061 - } 1062 - /* 1063 - * If a parallel write made the attribute non-resident, drop the mft 1064 - * record and retry the writepage. 1065 - */ 1066 - if (unlikely(NInoNonResident(ni))) { 1067 - unmap_mft_record(base_ni); 1068 - goto retry_writepage; 1069 - } 1070 - ctx = ntfs_attr_get_search_ctx(base_ni, m); 1071 - if (unlikely(!ctx)) { 1072 - err = -ENOMEM; 1073 - goto err_out; 1074 - } 1075 - err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1076 - CASE_SENSITIVE, 0, NULL, 0, ctx); 1077 - if (unlikely(err)) 1078 - goto err_out; 1079 - /* 1080 - * Keep the VM happy. This must be done otherwise 1081 - * PAGECACHE_TAG_DIRTY remains set even though the folio is clean. 1082 - */ 1083 - BUG_ON(folio_test_writeback(folio)); 1084 - folio_start_writeback(folio); 1085 - folio_unlock(folio); 1086 - attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 1087 - i_size = i_size_read(vi); 1088 - if (unlikely(attr_len > i_size)) { 1089 - /* Race with shrinking truncate or a failed truncate. */ 1090 - attr_len = i_size; 1091 - /* 1092 - * If the truncate failed, fix it up now. If a concurrent 1093 - * truncate, we do its job, so it does not have to do anything. 1094 - */ 1095 - err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr, 1096 - attr_len); 1097 - /* Shrinking cannot fail. */ 1098 - BUG_ON(err); 1099 - } 1100 - addr = kmap_local_folio(folio, 0); 1101 - /* Copy the data from the folio to the mft record. */ 1102 - memcpy((u8*)ctx->attr + 1103 - le16_to_cpu(ctx->attr->data.resident.value_offset), 1104 - addr, attr_len); 1105 - /* Zero out of bounds area in the page cache folio. */ 1106 - memset(addr + attr_len, 0, folio_size(folio) - attr_len); 1107 - kunmap_local(addr); 1108 - flush_dcache_folio(folio); 1109 - flush_dcache_mft_record_page(ctx->ntfs_ino); 1110 - /* We are done with the folio. */ 1111 - folio_end_writeback(folio); 1112 - /* Finally, mark the mft record dirty, so it gets written back. */ 1113 - mark_mft_record_dirty(ctx->ntfs_ino); 1114 - ntfs_attr_put_search_ctx(ctx); 1115 - unmap_mft_record(base_ni); 426 + 427 + iomap_bio_read_folio(folio, &ntfs_read_iomap_ops); 1116 428 return 0; 1117 - err_out: 1118 - if (err == -ENOMEM) { 1119 - ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying " 1120 - "page so we try again later."); 1121 - /* 1122 - * Put the folio back on mapping->dirty_pages, but leave its 1123 - * buffers' dirty state as-is. 1124 - */ 1125 - folio_redirty_for_writepage(wbc, folio); 1126 - err = 0; 1127 - } else { 1128 - ntfs_error(vi->i_sb, "Resident attribute write failed with " 1129 - "error %i.", err); 1130 - folio_set_error(folio); 1131 - NVolSetErrors(ni->vol); 1132 - } 1133 - folio_unlock(folio); 1134 - if (ctx) 1135 - ntfs_attr_put_search_ctx(ctx); 1136 - if (m) 1137 - unmap_mft_record(base_ni); 1138 - return err; 1139 429 } 1140 430 1141 - #endif /* NTFS_RW */ 1142 - 1143 - /** 431 + /* 1144 432 * ntfs_bmap - map logical file block to physical device block 1145 433 * @mapping: address space mapping to which the block to be mapped belongs 1146 434 * @block: logical block to map to its physical device block ··· 89 1533 { 90 1534 s64 ofs, size; 91 1535 loff_t i_size; 92 - LCN lcn; 1536 + s64 lcn; 93 1537 unsigned long blocksize, flags; 94 - ntfs_inode *ni = NTFS_I(mapping->host); 95 - ntfs_volume *vol = ni->vol; 96 - unsigned delta; 97 - unsigned char blocksize_bits, cluster_size_shift; 1538 + struct ntfs_inode *ni = NTFS_I(mapping->host); 1539 + struct ntfs_volume *vol = ni->vol; 1540 + unsigned int delta; 1541 + unsigned char blocksize_bits; 98 1542 99 1543 ntfs_debug("Entering for mft_no 0x%lx, logical block 0x%llx.", 100 1544 ni->mft_no, (unsigned long long)block); 101 - if (ni->type != AT_DATA || !NInoNonResident(ni) || NInoEncrypted(ni)) { 102 - ntfs_error(vol->sb, "BMAP does not make sense for %s " 103 - "attributes, returning 0.", 1545 + if (ni->type != AT_DATA || !NInoNonResident(ni) || NInoEncrypted(ni) || 1546 + NInoMstProtected(ni)) { 1547 + ntfs_error(vol->sb, "BMAP does not make sense for %s attributes, returning 0.", 104 1548 (ni->type != AT_DATA) ? "non-data" : 105 1549 (!NInoNonResident(ni) ? "resident" : 106 1550 "encrypted")); 107 1551 return 0; 108 1552 } 109 1553 /* None of these can happen. */ 110 - BUG_ON(NInoCompressed(ni)); 111 - BUG_ON(NInoMstProtected(ni)); 112 1554 blocksize = vol->sb->s_blocksize; 113 1555 blocksize_bits = vol->sb->s_blocksize_bits; 114 1556 ofs = (s64)block << blocksize_bits; ··· 121 1567 */ 122 1568 if (unlikely(ofs >= size || (ofs + blocksize > size && size < i_size))) 123 1569 goto hole; 124 - cluster_size_shift = vol->cluster_size_bits; 125 1570 down_read(&ni->runlist.lock); 126 - lcn = ntfs_attr_vcn_to_lcn_nolock(ni, ofs >> cluster_size_shift, false); 1571 + lcn = ntfs_attr_vcn_to_lcn_nolock(ni, ntfs_bytes_to_cluster(vol, ofs), 1572 + false); 127 1573 up_read(&ni->runlist.lock); 128 1574 if (unlikely(lcn < LCN_HOLE)) { 129 1575 /* ··· 143 1589 */ 144 1590 goto hole; 145 1591 case LCN_ENOMEM: 146 - ntfs_error(vol->sb, "Not enough memory to complete " 147 - "mapping for inode 0x%lx. " 148 - "Returning 0.", ni->mft_no); 1592 + ntfs_error(vol->sb, 1593 + "Not enough memory to complete mapping for inode 0x%lx. Returning 0.", 1594 + ni->mft_no); 149 1595 break; 150 1596 default: 151 - ntfs_error(vol->sb, "Failed to complete mapping for " 152 - "inode 0x%lx. Run chkdsk. " 153 - "Returning 0.", ni->mft_no); 1597 + ntfs_error(vol->sb, 1598 + "Failed to complete mapping for inode 0x%lx. Run chkdsk. Returning 0.", 1599 + ni->mft_no); 154 1600 break; 155 1601 } 156 1602 return 0; ··· 167 1613 */ 168 1614 delta = ofs & vol->cluster_size_mask; 169 1615 if (unlikely(sizeof(block) < sizeof(lcn))) { 170 - block = lcn = ((lcn << cluster_size_shift) + delta) >> 1616 + block = lcn = (ntfs_cluster_to_bytes(vol, lcn) + delta) >> 171 1617 blocksize_bits; 172 1618 /* If the block number was truncated return 0. */ 173 1619 if (unlikely(block != lcn)) { 174 - ntfs_error(vol->sb, "Physical block 0x%llx is too " 175 - "large to be returned, returning 0.", 176 - (long long)lcn); 1620 + ntfs_error(vol->sb, 1621 + "Physical block 0x%llx is too large to be returned, returning 0.", 1622 + (long long)lcn); 177 1623 return 0; 178 1624 } 179 1625 } else 180 - block = ((lcn << cluster_size_shift) + delta) >> 1626 + block = (ntfs_cluster_to_bytes(vol, lcn) + delta) >> 181 1627 blocksize_bits; 182 1628 ntfs_debug("Done (returning block 0x%llx).", (unsigned long long)lcn); 183 1629 return block; 184 1630 } 185 1631 186 - /* 187 - * ntfs_normal_aops - address space operations for normal inodes and attributes 188 - * 189 - * Note these are not used for compressed or mst protected inodes and 190 - * attributes. 191 - */ 192 - const struct address_space_operations ntfs_normal_aops = { 193 - .read_folio = ntfs_read_folio, 194 - #ifdef NTFS_RW 195 - .writepage = ntfs_writepage, 196 - .dirty_folio = block_dirty_folio, 197 - #endif /* NTFS_RW */ 198 - .bmap = ntfs_bmap, 199 - .migrate_folio = buffer_migrate_folio, 200 - .is_partially_uptodate = block_is_partially_uptodate, 201 - .error_remove_folio = generic_error_remove_folio, 202 - }; 1632 + static void ntfs_readahead(struct readahead_control *rac) 1633 + { 1634 + struct address_space *mapping = rac->mapping; 1635 + struct inode *inode = mapping->host; 1636 + struct ntfs_inode *ni = NTFS_I(inode); 203 1637 204 - /* 205 - * ntfs_compressed_aops - address space operations for compressed inodes 206 - */ 207 - const struct address_space_operations ntfs_compressed_aops = { 208 - .read_folio = ntfs_read_folio, 209 - #ifdef NTFS_RW 210 - .writepage = ntfs_writepage, 211 - .dirty_folio = block_dirty_folio, 212 - #endif /* NTFS_RW */ 213 - .migrate_folio = buffer_migrate_folio, 214 - .is_partially_uptodate = block_is_partially_uptodate, 215 - .error_remove_folio = generic_error_remove_folio, 216 - }; 217 - 218 - /* 219 - * ntfs_mst_aops - general address space operations for mst protecteed inodes 220 - * and attributes 221 - */ 222 - const struct address_space_operations ntfs_mst_aops = { 223 - .read_folio = ntfs_read_folio, /* Fill page with data. */ 224 - #ifdef NTFS_RW 225 - .writepage = ntfs_writepage, /* Write dirty page to disk. */ 226 - .dirty_folio = filemap_dirty_folio, 227 - #endif /* NTFS_RW */ 228 - .migrate_folio = buffer_migrate_folio, 229 - .is_partially_uptodate = block_is_partially_uptodate, 230 - .error_remove_folio = generic_error_remove_folio, 231 - }; 232 - 233 - #ifdef NTFS_RW 234 - 235 - /** 236 - * mark_ntfs_record_dirty - mark an ntfs record dirty 237 - * @page: page containing the ntfs record to mark dirty 238 - * @ofs: byte offset within @page at which the ntfs record begins 239 - * 240 - * Set the buffers and the page in which the ntfs record is located dirty. 241 - * 242 - * The latter also marks the vfs inode the ntfs record belongs to dirty 243 - * (I_DIRTY_PAGES only). 244 - * 245 - * If the page does not have buffers, we create them and set them uptodate. 246 - * The page may not be locked which is why we need to handle the buffers under 247 - * the mapping->i_private_lock. Once the buffers are marked dirty we no longer 248 - * need the lock since try_to_free_buffers() does not free dirty buffers. 249 - */ 250 - void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) { 251 - struct address_space *mapping = page->mapping; 252 - ntfs_inode *ni = NTFS_I(mapping->host); 253 - struct buffer_head *bh, *head, *buffers_to_free = NULL; 254 - unsigned int end, bh_size, bh_ofs; 255 - 256 - BUG_ON(!PageUptodate(page)); 257 - end = ofs + ni->itype.index.block_size; 258 - bh_size = VFS_I(ni)->i_sb->s_blocksize; 259 - spin_lock(&mapping->i_private_lock); 260 - if (unlikely(!page_has_buffers(page))) { 261 - spin_unlock(&mapping->i_private_lock); 262 - bh = head = alloc_page_buffers(page, bh_size, true); 263 - spin_lock(&mapping->i_private_lock); 264 - if (likely(!page_has_buffers(page))) { 265 - struct buffer_head *tail; 266 - 267 - do { 268 - set_buffer_uptodate(bh); 269 - tail = bh; 270 - bh = bh->b_this_page; 271 - } while (bh); 272 - tail->b_this_page = head; 273 - attach_page_private(page, head); 274 - } else 275 - buffers_to_free = bh; 276 - } 277 - bh = head = page_buffers(page); 278 - BUG_ON(!bh); 279 - do { 280 - bh_ofs = bh_offset(bh); 281 - if (bh_ofs + bh_size <= ofs) 282 - continue; 283 - if (unlikely(bh_ofs >= end)) 284 - break; 285 - set_buffer_dirty(bh); 286 - } while ((bh = bh->b_this_page) != head); 287 - spin_unlock(&mapping->i_private_lock); 288 - filemap_dirty_folio(mapping, page_folio(page)); 289 - if (unlikely(buffers_to_free)) { 290 - do { 291 - bh = buffers_to_free->b_this_page; 292 - free_buffer_head(buffers_to_free); 293 - buffers_to_free = bh; 294 - } while (buffers_to_free); 295 - } 1638 + /* 1639 + * Resident files are not cached in the page cache, 1640 + * and readahead is not implemented for compressed files. 1641 + */ 1642 + if (!NInoNonResident(ni) || NInoCompressed(ni)) 1643 + return; 1644 + iomap_bio_readahead(rac, &ntfs_read_iomap_ops); 296 1645 } 297 1646 298 - #endif /* NTFS_RW */ 1647 + static int ntfs_writepages(struct address_space *mapping, 1648 + struct writeback_control *wbc) 1649 + { 1650 + struct inode *inode = mapping->host; 1651 + struct ntfs_inode *ni = NTFS_I(inode); 1652 + struct iomap_writepage_ctx wpc = { 1653 + .inode = mapping->host, 1654 + .wbc = wbc, 1655 + .ops = &ntfs_writeback_ops, 1656 + }; 1657 + 1658 + if (NVolShutdown(ni->vol)) 1659 + return -EIO; 1660 + 1661 + if (!NInoNonResident(ni)) 1662 + return 0; 1663 + 1664 + /* 1665 + * EFS-encrypted files are not supported. 1666 + * (decryption/encryption is not implemented yet) 1667 + */ 1668 + if (NInoEncrypted(ni)) { 1669 + ntfs_debug("Encrypted I/O not supported"); 1670 + return -EOPNOTSUPP; 1671 + } 1672 + 1673 + return iomap_writepages(&wpc); 1674 + } 1675 + 1676 + static int ntfs_swap_activate(struct swap_info_struct *sis, 1677 + struct file *swap_file, sector_t *span) 1678 + { 1679 + return iomap_swapfile_activate(sis, swap_file, span, 1680 + &ntfs_read_iomap_ops); 1681 + } 1682 + 1683 + const struct address_space_operations ntfs_aops = { 1684 + .read_folio = ntfs_read_folio, 1685 + .readahead = ntfs_readahead, 1686 + .writepages = ntfs_writepages, 1687 + .direct_IO = noop_direct_IO, 1688 + .dirty_folio = iomap_dirty_folio, 1689 + .bmap = ntfs_bmap, 1690 + .migrate_folio = filemap_migrate_folio, 1691 + .is_partially_uptodate = iomap_is_partially_uptodate, 1692 + .error_remove_folio = generic_error_remove_folio, 1693 + .release_folio = iomap_release_folio, 1694 + .invalidate_folio = iomap_invalidate_folio, 1695 + .swap_activate = ntfs_swap_activate, 1696 + }; 1697 + 1698 + const struct address_space_operations ntfs_mft_aops = { 1699 + .read_folio = ntfs_read_folio, 1700 + .readahead = ntfs_readahead, 1701 + .writepages = ntfs_mft_writepages, 1702 + .dirty_folio = iomap_dirty_folio, 1703 + .bmap = ntfs_bmap, 1704 + .migrate_folio = filemap_migrate_folio, 1705 + .is_partially_uptodate = iomap_is_partially_uptodate, 1706 + .error_remove_folio = generic_error_remove_folio, 1707 + .release_folio = iomap_release_folio, 1708 + .invalidate_folio = iomap_invalidate_folio, 1709 + };
+870
fs/ntfs/iomap.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * iomap callack functions 4 + * 5 + * Copyright (c) 2025 LG Electronics Co., Ltd. 6 + */ 7 + 8 + #include <linux/writeback.h> 9 + 10 + #include "attrib.h" 11 + #include "mft.h" 12 + #include "ntfs.h" 13 + #include "iomap.h" 14 + 15 + static void ntfs_iomap_put_folio_non_resident(struct inode *inode, loff_t pos, 16 + unsigned int len, struct folio *folio) 17 + { 18 + struct ntfs_inode *ni = NTFS_I(inode); 19 + unsigned long sector_size = 1UL << inode->i_blkbits; 20 + loff_t start_down, end_up, init; 21 + 22 + start_down = round_down(pos, sector_size); 23 + end_up = (pos + len - 1) | (sector_size - 1); 24 + init = ni->initialized_size; 25 + 26 + if (init >= start_down && init <= end_up) { 27 + if (init < pos) { 28 + loff_t offset = offset_in_folio(folio, pos + len); 29 + 30 + if (offset == 0) 31 + offset = folio_size(folio); 32 + folio_zero_segments(folio, 33 + offset_in_folio(folio, init), 34 + offset_in_folio(folio, pos), 35 + offset, 36 + folio_size(folio)); 37 + 38 + } else { 39 + loff_t offset = max_t(loff_t, pos + len, init); 40 + 41 + offset = offset_in_folio(folio, offset); 42 + if (offset == 0) 43 + offset = folio_size(folio); 44 + folio_zero_segment(folio, 45 + offset, 46 + folio_size(folio)); 47 + } 48 + } else if (init <= pos) { 49 + loff_t offset = 0, offset2 = offset_in_folio(folio, pos + len); 50 + 51 + if ((init >> folio_shift(folio)) == (pos >> folio_shift(folio))) 52 + offset = offset_in_folio(folio, init); 53 + if (offset2 == 0) 54 + offset2 = folio_size(folio); 55 + folio_zero_segments(folio, 56 + offset, 57 + offset_in_folio(folio, pos), 58 + offset2, 59 + folio_size(folio)); 60 + } 61 + folio_unlock(folio); 62 + folio_put(folio); 63 + } 64 + 65 + /* 66 + * iomap_zero_range is called for an area beyond the initialized size, 67 + * garbage values can be read, so zeroing out is needed. 68 + */ 69 + static void ntfs_iomap_put_folio(struct inode *inode, loff_t pos, 70 + unsigned int len, struct folio *folio) 71 + { 72 + if (NInoNonResident(NTFS_I(inode))) 73 + return ntfs_iomap_put_folio_non_resident(inode, pos, 74 + len, folio); 75 + folio_unlock(folio); 76 + folio_put(folio); 77 + } 78 + 79 + const struct iomap_write_ops ntfs_iomap_folio_ops = { 80 + .put_folio = ntfs_iomap_put_folio, 81 + }; 82 + 83 + static int ntfs_read_iomap_begin_resident(struct inode *inode, loff_t offset, loff_t length, 84 + unsigned int flags, struct iomap *iomap) 85 + { 86 + struct ntfs_inode *base_ni, *ni = NTFS_I(inode); 87 + struct ntfs_attr_search_ctx *ctx; 88 + loff_t i_size; 89 + u32 attr_len; 90 + int err = 0; 91 + char *kattr; 92 + 93 + if (NInoAttr(ni)) 94 + base_ni = ni->ext.base_ntfs_ino; 95 + else 96 + base_ni = ni; 97 + 98 + ctx = ntfs_attr_get_search_ctx(base_ni, NULL); 99 + if (!ctx) { 100 + err = -ENOMEM; 101 + goto out; 102 + } 103 + 104 + err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 105 + CASE_SENSITIVE, 0, NULL, 0, ctx); 106 + if (unlikely(err)) 107 + goto out; 108 + 109 + attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 110 + if (unlikely(attr_len > ni->initialized_size)) 111 + attr_len = ni->initialized_size; 112 + i_size = i_size_read(inode); 113 + 114 + if (unlikely(attr_len > i_size)) { 115 + /* Race with shrinking truncate. */ 116 + attr_len = i_size; 117 + } 118 + 119 + if (offset >= attr_len) { 120 + if (flags & IOMAP_REPORT) 121 + err = -ENOENT; 122 + else { 123 + iomap->type = IOMAP_HOLE; 124 + iomap->offset = offset; 125 + iomap->length = length; 126 + } 127 + goto out; 128 + } 129 + 130 + kattr = (u8 *)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset); 131 + 132 + iomap->inline_data = kmemdup(kattr, attr_len, GFP_KERNEL); 133 + if (!iomap->inline_data) { 134 + err = -ENOMEM; 135 + goto out; 136 + } 137 + 138 + iomap->type = IOMAP_INLINE; 139 + iomap->offset = 0; 140 + iomap->length = attr_len; 141 + 142 + out: 143 + if (ctx) 144 + ntfs_attr_put_search_ctx(ctx); 145 + 146 + return err; 147 + } 148 + 149 + /* 150 + * ntfs_read_iomap_begin_non_resident - map non-resident NTFS file data 151 + * @inode: inode to map 152 + * @offset: file offset to map 153 + * @length: length of mapping 154 + * @flags: IOMAP flags 155 + * @iomap: iomap structure to fill 156 + * @need_unwritten: true if UNWRITTEN extent type is needed 157 + * 158 + * Map a range of a non-resident NTFS file to an iomap extent. 159 + * 160 + * NTFS UNWRITTEN extent handling: 161 + * ================================ 162 + * The concept of an unwritten extent in NTFS is slightly different from 163 + * that of other filesystems. NTFS conceptually manages only a single 164 + * continuous unwritten region, which is strictly defined based on 165 + * initialized_size. 166 + * 167 + * File offset layout: 168 + * 0 initialized_size i_size(EOF) 169 + * |----------#0----------|----------#1----------|----------#2----------| 170 + * | Actual data | Pre-allocated | Pre-allocated | 171 + * | (user written) | (within initialized) | (initialized ~ EOF) | 172 + * |----------------------|----------------------|----------------------| 173 + * MAPPED MAPPED UNWRITTEN (conditionally) 174 + * 175 + * Region #0: User-written data, initialized and valid. 176 + * Region #1: Pre-allocated within initialized_size, must be zero-initialized 177 + * by the filesystem before exposure to userspace. 178 + * Region #2: Pre-allocated beyond initialized_size, does not need initialization. 179 + * 180 + * The @need_unwritten parameter controls whether region #2 is mapped as 181 + * IOMAP_UNWRITTEN or IOMAP_MAPPED: 182 + * - For seek operations (SEEK_DATA/SEEK_HOLE): IOMAP_MAPPED is needed to 183 + * prevent iomap_seek_data from incorrectly interpreting pre-allocated 184 + * space as a hole. Since NTFS does not support multiple unwritten extents, 185 + * all pre-allocated regions should be treated as data, not holes. 186 + * - For zero_range operations: IOMAP_MAPPED is needed to be zeroed out. 187 + * 188 + * Return: 0 on success, negative error code on failure. 189 + */ 190 + static int ntfs_read_iomap_begin_non_resident(struct inode *inode, loff_t offset, 191 + loff_t length, unsigned int flags, struct iomap *iomap, 192 + bool need_unwritten) 193 + { 194 + struct ntfs_inode *ni = NTFS_I(inode); 195 + s64 vcn; 196 + s64 lcn; 197 + struct runlist_element *rl; 198 + struct ntfs_volume *vol = ni->vol; 199 + loff_t vcn_ofs; 200 + loff_t rl_length; 201 + 202 + vcn = ntfs_bytes_to_cluster(vol, offset); 203 + vcn_ofs = ntfs_bytes_to_cluster_off(vol, offset); 204 + 205 + down_write(&ni->runlist.lock); 206 + rl = ntfs_attr_vcn_to_rl(ni, vcn, &lcn); 207 + if (IS_ERR(rl)) { 208 + up_write(&ni->runlist.lock); 209 + return PTR_ERR(rl); 210 + } 211 + 212 + if (flags & IOMAP_REPORT) { 213 + if (lcn < LCN_HOLE) { 214 + up_write(&ni->runlist.lock); 215 + return -ENOENT; 216 + } 217 + } else if (lcn < LCN_ENOENT) { 218 + up_write(&ni->runlist.lock); 219 + return -EINVAL; 220 + } 221 + 222 + iomap->bdev = inode->i_sb->s_bdev; 223 + iomap->offset = offset; 224 + 225 + if (lcn <= LCN_DELALLOC) { 226 + if (lcn == LCN_DELALLOC) 227 + iomap->type = IOMAP_DELALLOC; 228 + else 229 + iomap->type = IOMAP_HOLE; 230 + iomap->addr = IOMAP_NULL_ADDR; 231 + } else { 232 + if (need_unwritten && offset >= ni->initialized_size) 233 + iomap->type = IOMAP_UNWRITTEN; 234 + else 235 + iomap->type = IOMAP_MAPPED; 236 + iomap->addr = ntfs_cluster_to_bytes(vol, lcn) + vcn_ofs; 237 + } 238 + 239 + rl_length = ntfs_cluster_to_bytes(vol, rl->length - (vcn - rl->vcn)); 240 + 241 + if (rl_length == 0 && rl->lcn > LCN_DELALLOC) { 242 + ntfs_error(inode->i_sb, 243 + "runlist(vcn : %lld, length : %lld, lcn : %lld) is corrupted\n", 244 + rl->vcn, rl->length, rl->lcn); 245 + up_write(&ni->runlist.lock); 246 + return -EIO; 247 + } 248 + 249 + if (rl_length && length > rl_length - vcn_ofs) 250 + iomap->length = rl_length - vcn_ofs; 251 + else 252 + iomap->length = length; 253 + up_write(&ni->runlist.lock); 254 + 255 + if (!(flags & IOMAP_ZERO) && 256 + iomap->type == IOMAP_MAPPED && 257 + iomap->offset < ni->initialized_size && 258 + iomap->offset + iomap->length > ni->initialized_size) { 259 + iomap->length = round_up(ni->initialized_size, 1 << inode->i_blkbits) - 260 + iomap->offset; 261 + } 262 + iomap->flags |= IOMAP_F_MERGED; 263 + 264 + return 0; 265 + } 266 + 267 + static int __ntfs_read_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 268 + unsigned int flags, struct iomap *iomap, struct iomap *srcmap, 269 + bool need_unwritten) 270 + { 271 + if (NInoNonResident(NTFS_I(inode))) 272 + return ntfs_read_iomap_begin_non_resident(inode, offset, length, 273 + flags, iomap, need_unwritten); 274 + return ntfs_read_iomap_begin_resident(inode, offset, length, 275 + flags, iomap); 276 + } 277 + 278 + static int ntfs_read_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 279 + unsigned int flags, struct iomap *iomap, struct iomap *srcmap) 280 + { 281 + return __ntfs_read_iomap_begin(inode, offset, length, flags, iomap, 282 + srcmap, true); 283 + } 284 + 285 + static int ntfs_read_iomap_end(struct inode *inode, loff_t pos, loff_t length, 286 + ssize_t written, unsigned int flags, struct iomap *iomap) 287 + { 288 + if (iomap->type == IOMAP_INLINE) 289 + kfree(iomap->inline_data); 290 + 291 + return written; 292 + } 293 + 294 + const struct iomap_ops ntfs_read_iomap_ops = { 295 + .iomap_begin = ntfs_read_iomap_begin, 296 + .iomap_end = ntfs_read_iomap_end, 297 + }; 298 + 299 + /* 300 + * Check that the cached iomap still matches the NTFS runlist before 301 + * iomap_zero_range() is called. if the runlist changes while iomap is 302 + * iterating a cached iomap, iomap_zero_range() may overwrite folios 303 + * that have been already written with valid data. 304 + */ 305 + static bool ntfs_iomap_valid(struct inode *inode, const struct iomap *iomap) 306 + { 307 + struct ntfs_inode *ni = NTFS_I(inode); 308 + struct runlist_element *rl; 309 + s64 vcn, lcn; 310 + 311 + if (!NInoNonResident(ni)) 312 + return false; 313 + 314 + vcn = iomap->offset >> ni->vol->cluster_size_bits; 315 + 316 + down_read(&ni->runlist.lock); 317 + rl = __ntfs_attr_find_vcn_nolock(&ni->runlist, vcn); 318 + if (IS_ERR(rl)) { 319 + up_read(&ni->runlist.lock); 320 + return false; 321 + } 322 + lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 323 + up_read(&ni->runlist.lock); 324 + return lcn == LCN_DELALLOC; 325 + } 326 + 327 + static const struct iomap_write_ops ntfs_zero_iomap_folio_ops = { 328 + .put_folio = ntfs_iomap_put_folio, 329 + .iomap_valid = ntfs_iomap_valid, 330 + }; 331 + 332 + static int ntfs_seek_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 333 + unsigned int flags, struct iomap *iomap, struct iomap *srcmap) 334 + { 335 + return __ntfs_read_iomap_begin(inode, offset, length, flags, iomap, 336 + srcmap, false); 337 + } 338 + 339 + static int ntfs_zero_read_iomap_end(struct inode *inode, loff_t pos, loff_t length, 340 + ssize_t written, unsigned int flags, struct iomap *iomap) 341 + { 342 + if ((flags & IOMAP_ZERO) && (iomap->flags & IOMAP_F_STALE)) 343 + return -EPERM; 344 + return written; 345 + } 346 + 347 + static const struct iomap_ops ntfs_zero_read_iomap_ops = { 348 + .iomap_begin = ntfs_seek_iomap_begin, 349 + .iomap_end = ntfs_zero_read_iomap_end, 350 + }; 351 + 352 + const struct iomap_ops ntfs_seek_iomap_ops = { 353 + .iomap_begin = ntfs_seek_iomap_begin, 354 + .iomap_end = ntfs_read_iomap_end, 355 + }; 356 + 357 + int ntfs_dio_zero_range(struct inode *inode, loff_t offset, loff_t length) 358 + { 359 + if ((offset | length) & (SECTOR_SIZE - 1)) 360 + return -EINVAL; 361 + 362 + return blkdev_issue_zeroout(inode->i_sb->s_bdev, 363 + offset >> SECTOR_SHIFT, 364 + length >> SECTOR_SHIFT, 365 + GFP_NOFS, 366 + BLKDEV_ZERO_NOUNMAP); 367 + } 368 + 369 + static int ntfs_zero_range(struct inode *inode, loff_t offset, loff_t length) 370 + { 371 + return iomap_zero_range(inode, 372 + offset, length, 373 + NULL, 374 + &ntfs_zero_read_iomap_ops, 375 + &ntfs_zero_iomap_folio_ops, 376 + NULL); 377 + } 378 + 379 + static int ntfs_write_simple_iomap_begin_non_resident(struct inode *inode, loff_t offset, 380 + loff_t length, struct iomap *iomap) 381 + { 382 + struct ntfs_inode *ni = NTFS_I(inode); 383 + struct ntfs_volume *vol = ni->vol; 384 + loff_t vcn_ofs, rl_length; 385 + struct runlist_element *rl, *rlc; 386 + bool is_retry = false; 387 + int err; 388 + s64 vcn, lcn; 389 + s64 max_clu_count = 390 + ntfs_bytes_to_cluster(vol, round_up(length, vol->cluster_size)); 391 + 392 + vcn = ntfs_bytes_to_cluster(vol, offset); 393 + vcn_ofs = ntfs_bytes_to_cluster_off(vol, offset); 394 + 395 + down_read(&ni->runlist.lock); 396 + rl = ni->runlist.rl; 397 + if (!rl) { 398 + up_read(&ni->runlist.lock); 399 + err = ntfs_map_runlist(ni, vcn); 400 + if (err) { 401 + mutex_unlock(&ni->mrec_lock); 402 + return -ENOENT; 403 + } 404 + down_read(&ni->runlist.lock); 405 + rl = ni->runlist.rl; 406 + } 407 + up_read(&ni->runlist.lock); 408 + 409 + down_write(&ni->runlist.lock); 410 + remap_rl: 411 + /* Seek to element containing target vcn. */ 412 + rl = __ntfs_attr_find_vcn_nolock(&ni->runlist, vcn); 413 + if (IS_ERR(rl)) { 414 + up_write(&ni->runlist.lock); 415 + mutex_unlock(&ni->mrec_lock); 416 + return -EIO; 417 + } 418 + lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 419 + 420 + if (lcn <= LCN_RL_NOT_MAPPED && is_retry == false) { 421 + is_retry = true; 422 + if (!ntfs_map_runlist_nolock(ni, vcn, NULL)) { 423 + rl = ni->runlist.rl; 424 + goto remap_rl; 425 + } 426 + } 427 + 428 + max_clu_count = min(max_clu_count, rl->length - (vcn - rl->vcn)); 429 + if (max_clu_count == 0) { 430 + ntfs_error(inode->i_sb, 431 + "runlist(vcn : %lld, length : %lld) is corrupted\n", 432 + rl->vcn, rl->length); 433 + up_write(&ni->runlist.lock); 434 + mutex_unlock(&ni->mrec_lock); 435 + return -EIO; 436 + } 437 + 438 + iomap->bdev = inode->i_sb->s_bdev; 439 + iomap->offset = offset; 440 + 441 + if (lcn <= LCN_DELALLOC) { 442 + if (lcn < LCN_DELALLOC) { 443 + max_clu_count = 444 + ntfs_available_clusters_count(vol, max_clu_count); 445 + if (max_clu_count < 0) { 446 + err = max_clu_count; 447 + up_write(&ni->runlist.lock); 448 + mutex_unlock(&ni->mrec_lock); 449 + return err; 450 + } 451 + } 452 + 453 + iomap->type = IOMAP_DELALLOC; 454 + iomap->addr = IOMAP_NULL_ADDR; 455 + 456 + if (lcn <= LCN_HOLE) { 457 + size_t new_rl_count; 458 + 459 + rlc = kmalloc(sizeof(struct runlist_element) * 2, 460 + GFP_NOFS); 461 + if (!rlc) { 462 + up_write(&ni->runlist.lock); 463 + mutex_unlock(&ni->mrec_lock); 464 + return -ENOMEM; 465 + } 466 + 467 + rlc->vcn = vcn; 468 + rlc->lcn = LCN_DELALLOC; 469 + rlc->length = max_clu_count; 470 + 471 + rlc[1].vcn = vcn + max_clu_count; 472 + rlc[1].lcn = LCN_RL_NOT_MAPPED; 473 + rlc[1].length = 0; 474 + 475 + rl = ntfs_runlists_merge(&ni->runlist, rlc, 0, 476 + &new_rl_count); 477 + if (IS_ERR(rl)) { 478 + ntfs_error(vol->sb, "Failed to merge runlists"); 479 + up_write(&ni->runlist.lock); 480 + mutex_unlock(&ni->mrec_lock); 481 + kvfree(rlc); 482 + return PTR_ERR(rl); 483 + } 484 + 485 + ni->runlist.rl = rl; 486 + ni->runlist.count = new_rl_count; 487 + ni->i_dealloc_clusters += max_clu_count; 488 + } 489 + up_write(&ni->runlist.lock); 490 + mutex_unlock(&ni->mrec_lock); 491 + 492 + if (lcn < LCN_DELALLOC) 493 + ntfs_hold_dirty_clusters(vol, max_clu_count); 494 + 495 + rl_length = ntfs_cluster_to_bytes(vol, max_clu_count); 496 + if (length > rl_length - vcn_ofs) 497 + iomap->length = rl_length - vcn_ofs; 498 + else 499 + iomap->length = length; 500 + 501 + iomap->flags = IOMAP_F_NEW; 502 + if (lcn <= LCN_HOLE) { 503 + loff_t end = offset + length; 504 + 505 + if (vcn_ofs || ((vol->cluster_size > iomap->length) && 506 + end < ni->initialized_size)) { 507 + loff_t z_start, z_end; 508 + 509 + z_start = vcn << vol->cluster_size_bits; 510 + z_end = min_t(loff_t, z_start + vol->cluster_size, 511 + i_size_read(inode)); 512 + if (z_end > z_start) 513 + err = ntfs_zero_range(inode, 514 + z_start, 515 + z_end - z_start); 516 + } 517 + if ((!err || err == -EPERM) && 518 + max_clu_count > 1 && 519 + (iomap->length & vol->cluster_size_mask && 520 + end < ni->initialized_size)) { 521 + loff_t z_start, z_end; 522 + 523 + z_start = (vcn + max_clu_count - 1) << 524 + vol->cluster_size_bits; 525 + z_end = min_t(loff_t, z_start + vol->cluster_size, 526 + i_size_read(inode)); 527 + if (z_end > z_start) 528 + err = ntfs_zero_range(inode, 529 + z_start, 530 + z_end - z_start); 531 + } 532 + 533 + if (err == -EPERM) 534 + err = 0; 535 + if (err) { 536 + ntfs_release_dirty_clusters(vol, max_clu_count); 537 + return err; 538 + } 539 + } 540 + } else { 541 + up_write(&ni->runlist.lock); 542 + mutex_unlock(&ni->mrec_lock); 543 + 544 + iomap->type = IOMAP_MAPPED; 545 + iomap->addr = ntfs_cluster_to_bytes(vol, lcn) + vcn_ofs; 546 + 547 + rl_length = ntfs_cluster_to_bytes(vol, max_clu_count); 548 + if (length > rl_length - vcn_ofs) 549 + iomap->length = rl_length - vcn_ofs; 550 + else 551 + iomap->length = length; 552 + } 553 + 554 + return 0; 555 + } 556 + 557 + #define NTFS_IOMAP_FLAGS_BEGIN BIT(1) 558 + #define NTFS_IOMAP_FLAGS_DIO BIT(2) 559 + #define NTFS_IOMAP_FLAGS_MKWRITE BIT(3) 560 + #define NTFS_IOMAP_FLAGS_WRITEBACK BIT(4) 561 + 562 + static int ntfs_write_da_iomap_begin_non_resident(struct inode *inode, 563 + loff_t offset, loff_t length, unsigned int flags, 564 + struct iomap *iomap, int ntfs_iomap_flags) 565 + { 566 + struct ntfs_inode *ni = NTFS_I(inode); 567 + struct ntfs_volume *vol = ni->vol; 568 + loff_t vcn_ofs, rl_length; 569 + s64 vcn, start_lcn, lcn_count; 570 + bool balloc = false, update_mp; 571 + int err; 572 + s64 max_clu_count = 573 + ntfs_bytes_to_cluster(vol, round_up(length, vol->cluster_size)); 574 + 575 + vcn = ntfs_bytes_to_cluster(vol, offset); 576 + vcn_ofs = ntfs_bytes_to_cluster_off(vol, offset); 577 + 578 + update_mp = ntfs_iomap_flags & (NTFS_IOMAP_FLAGS_DIO | NTFS_IOMAP_FLAGS_MKWRITE) || 579 + NInoAttr(ni) || ni->mft_no < FILE_first_user; 580 + down_write(&ni->runlist.lock); 581 + err = ntfs_attr_map_cluster(ni, vcn, &start_lcn, &lcn_count, 582 + max_clu_count, &balloc, update_mp, 583 + ntfs_iomap_flags & NTFS_IOMAP_FLAGS_WRITEBACK); 584 + up_write(&ni->runlist.lock); 585 + mutex_unlock(&ni->mrec_lock); 586 + if (err) { 587 + ni->i_dealloc_clusters = 0; 588 + return err; 589 + } 590 + 591 + iomap->bdev = inode->i_sb->s_bdev; 592 + iomap->offset = offset; 593 + 594 + rl_length = ntfs_cluster_to_bytes(vol, lcn_count); 595 + if (length > rl_length - vcn_ofs) 596 + iomap->length = rl_length - vcn_ofs; 597 + else 598 + iomap->length = length; 599 + 600 + if (start_lcn == LCN_HOLE) 601 + iomap->type = IOMAP_HOLE; 602 + else 603 + iomap->type = IOMAP_MAPPED; 604 + if (balloc == true) 605 + iomap->flags = IOMAP_F_NEW; 606 + 607 + iomap->addr = ntfs_cluster_to_bytes(vol, start_lcn) + vcn_ofs; 608 + 609 + if (balloc == true) { 610 + if (flags & IOMAP_DIRECT || 611 + ntfs_iomap_flags & NTFS_IOMAP_FLAGS_MKWRITE) { 612 + loff_t end = offset + length; 613 + 614 + if (vcn_ofs || ((vol->cluster_size > iomap->length) && 615 + end < ni->initialized_size)) 616 + err = ntfs_dio_zero_range(inode, 617 + start_lcn << 618 + vol->cluster_size_bits, 619 + vol->cluster_size); 620 + if (!err && lcn_count > 1 && 621 + (iomap->length & vol->cluster_size_mask && 622 + end < ni->initialized_size)) 623 + err = ntfs_dio_zero_range(inode, 624 + (start_lcn + lcn_count - 1) << 625 + vol->cluster_size_bits, 626 + vol->cluster_size); 627 + } else { 628 + if (lcn_count > ni->i_dealloc_clusters) 629 + ni->i_dealloc_clusters = 0; 630 + else 631 + ni->i_dealloc_clusters -= lcn_count; 632 + } 633 + if (err < 0) 634 + return err; 635 + } 636 + 637 + if (ntfs_iomap_flags & NTFS_IOMAP_FLAGS_MKWRITE && 638 + iomap->offset + iomap->length > ni->initialized_size) { 639 + err = ntfs_attr_set_initialized_size(ni, iomap->offset + 640 + iomap->length); 641 + } 642 + 643 + return err; 644 + } 645 + 646 + static int ntfs_write_iomap_begin_resident(struct inode *inode, loff_t offset, 647 + struct iomap *iomap) 648 + { 649 + struct ntfs_inode *ni = NTFS_I(inode); 650 + struct attr_record *a; 651 + struct ntfs_attr_search_ctx *ctx; 652 + u32 attr_len; 653 + int err = 0; 654 + char *kattr; 655 + 656 + ctx = ntfs_attr_get_search_ctx(ni, NULL); 657 + if (!ctx) { 658 + err = -ENOMEM; 659 + goto out; 660 + } 661 + 662 + err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 663 + CASE_SENSITIVE, 0, NULL, 0, ctx); 664 + if (err) { 665 + if (err == -ENOENT) 666 + err = -EIO; 667 + goto out; 668 + } 669 + 670 + a = ctx->attr; 671 + /* The total length of the attribute value. */ 672 + attr_len = le32_to_cpu(a->data.resident.value_length); 673 + kattr = (u8 *)a + le16_to_cpu(a->data.resident.value_offset); 674 + 675 + iomap->inline_data = kmemdup(kattr, attr_len, GFP_KERNEL); 676 + if (!iomap->inline_data) { 677 + err = -ENOMEM; 678 + goto out; 679 + } 680 + 681 + iomap->type = IOMAP_INLINE; 682 + iomap->offset = 0; 683 + /* iomap requires there is only one INLINE_DATA extent */ 684 + iomap->length = attr_len; 685 + 686 + out: 687 + if (ctx) 688 + ntfs_attr_put_search_ctx(ctx); 689 + mutex_unlock(&ni->mrec_lock); 690 + return err; 691 + } 692 + 693 + static int ntfs_write_iomap_begin_non_resident(struct inode *inode, loff_t offset, 694 + loff_t length, unsigned int flags, 695 + struct iomap *iomap, int ntfs_iomap_flags) 696 + { 697 + struct ntfs_inode *ni = NTFS_I(inode); 698 + 699 + if (ntfs_iomap_flags & (NTFS_IOMAP_FLAGS_BEGIN | NTFS_IOMAP_FLAGS_DIO) && 700 + offset + length > ni->initialized_size) { 701 + int ret; 702 + 703 + ret = ntfs_extend_initialized_size(inode, offset, 704 + offset + length, 705 + ntfs_iomap_flags & 706 + NTFS_IOMAP_FLAGS_DIO); 707 + if (ret < 0) 708 + return ret; 709 + } 710 + 711 + mutex_lock(&ni->mrec_lock); 712 + if (ntfs_iomap_flags & NTFS_IOMAP_FLAGS_BEGIN) 713 + return ntfs_write_simple_iomap_begin_non_resident(inode, offset, 714 + length, iomap); 715 + else 716 + return ntfs_write_da_iomap_begin_non_resident(inode, 717 + offset, length, 718 + flags, iomap, 719 + ntfs_iomap_flags); 720 + } 721 + 722 + static int __ntfs_write_iomap_begin(struct inode *inode, loff_t offset, 723 + loff_t length, unsigned int flags, 724 + struct iomap *iomap, int ntfs_iomap_flags) 725 + { 726 + struct ntfs_inode *ni = NTFS_I(inode); 727 + loff_t end = offset + length; 728 + 729 + if (NVolShutdown(ni->vol)) 730 + return -EIO; 731 + 732 + if (ntfs_iomap_flags & (NTFS_IOMAP_FLAGS_BEGIN | NTFS_IOMAP_FLAGS_DIO) && 733 + end > ni->data_size) { 734 + struct ntfs_volume *vol = ni->vol; 735 + int ret; 736 + 737 + mutex_lock(&ni->mrec_lock); 738 + if (end > ni->allocated_size && 739 + end < ni->allocated_size + vol->preallocated_size) 740 + ret = ntfs_attr_expand(ni, end, 741 + ni->allocated_size + vol->preallocated_size); 742 + else 743 + ret = ntfs_attr_expand(ni, end, 0); 744 + mutex_unlock(&ni->mrec_lock); 745 + if (ret) 746 + return ret; 747 + } 748 + 749 + if (!NInoNonResident(ni)) { 750 + mutex_lock(&ni->mrec_lock); 751 + return ntfs_write_iomap_begin_resident(inode, offset, iomap); 752 + } 753 + return ntfs_write_iomap_begin_non_resident(inode, offset, length, flags, 754 + iomap, ntfs_iomap_flags); 755 + } 756 + 757 + static int ntfs_write_iomap_begin(struct inode *inode, loff_t offset, 758 + loff_t length, unsigned int flags, 759 + struct iomap *iomap, struct iomap *srcmap) 760 + { 761 + return __ntfs_write_iomap_begin(inode, offset, length, flags, iomap, 762 + NTFS_IOMAP_FLAGS_BEGIN); 763 + } 764 + 765 + static int ntfs_write_iomap_end_resident(struct inode *inode, loff_t pos, 766 + loff_t length, ssize_t written, 767 + unsigned int flags, struct iomap *iomap) 768 + { 769 + struct ntfs_inode *ni = NTFS_I(inode); 770 + struct ntfs_attr_search_ctx *ctx; 771 + u32 attr_len; 772 + int err; 773 + char *kattr; 774 + 775 + mutex_lock(&ni->mrec_lock); 776 + ctx = ntfs_attr_get_search_ctx(ni, NULL); 777 + if (!ctx) { 778 + written = -ENOMEM; 779 + mutex_unlock(&ni->mrec_lock); 780 + return written; 781 + } 782 + 783 + err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 784 + CASE_SENSITIVE, 0, NULL, 0, ctx); 785 + if (err) { 786 + if (err == -ENOENT) 787 + err = -EIO; 788 + written = err; 789 + goto err_out; 790 + } 791 + 792 + /* The total length of the attribute value. */ 793 + attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 794 + if (pos >= attr_len || pos + written > attr_len) 795 + goto err_out; 796 + 797 + kattr = (u8 *)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset); 798 + memcpy(kattr + pos, iomap_inline_data(iomap, pos), written); 799 + mark_mft_record_dirty(ctx->ntfs_ino); 800 + err_out: 801 + ntfs_attr_put_search_ctx(ctx); 802 + kfree(iomap->inline_data); 803 + mutex_unlock(&ni->mrec_lock); 804 + return written; 805 + 806 + } 807 + 808 + static int ntfs_write_iomap_end(struct inode *inode, loff_t pos, loff_t length, 809 + ssize_t written, unsigned int flags, 810 + struct iomap *iomap) 811 + { 812 + if (iomap->type == IOMAP_INLINE) 813 + return ntfs_write_iomap_end_resident(inode, pos, length, 814 + written, flags, iomap); 815 + return written; 816 + } 817 + 818 + const struct iomap_ops ntfs_write_iomap_ops = { 819 + .iomap_begin = ntfs_write_iomap_begin, 820 + .iomap_end = ntfs_write_iomap_end, 821 + }; 822 + 823 + static int ntfs_page_mkwrite_iomap_begin(struct inode *inode, loff_t offset, 824 + loff_t length, unsigned int flags, 825 + struct iomap *iomap, struct iomap *srcmap) 826 + { 827 + return __ntfs_write_iomap_begin(inode, offset, length, flags, iomap, 828 + NTFS_IOMAP_FLAGS_MKWRITE); 829 + } 830 + 831 + const struct iomap_ops ntfs_page_mkwrite_iomap_ops = { 832 + .iomap_begin = ntfs_page_mkwrite_iomap_begin, 833 + .iomap_end = ntfs_write_iomap_end, 834 + }; 835 + 836 + static int ntfs_dio_iomap_begin(struct inode *inode, loff_t offset, 837 + loff_t length, unsigned int flags, 838 + struct iomap *iomap, struct iomap *srcmap) 839 + { 840 + return __ntfs_write_iomap_begin(inode, offset, length, flags, iomap, 841 + NTFS_IOMAP_FLAGS_DIO); 842 + } 843 + 844 + const struct iomap_ops ntfs_dio_iomap_ops = { 845 + .iomap_begin = ntfs_dio_iomap_begin, 846 + .iomap_end = ntfs_write_iomap_end, 847 + }; 848 + 849 + static ssize_t ntfs_writeback_range(struct iomap_writepage_ctx *wpc, 850 + struct folio *folio, u64 offset, unsigned int len, u64 end_pos) 851 + { 852 + if (offset < wpc->iomap.offset || 853 + offset >= wpc->iomap.offset + wpc->iomap.length) { 854 + int error; 855 + 856 + error = __ntfs_write_iomap_begin(wpc->inode, offset, 857 + NTFS_I(wpc->inode)->allocated_size - offset, 858 + IOMAP_WRITE, &wpc->iomap, 859 + NTFS_IOMAP_FLAGS_WRITEBACK); 860 + if (error) 861 + return error; 862 + } 863 + 864 + return iomap_add_to_ioend(wpc, folio, offset, end_pos, len); 865 + } 866 + 867 + const struct iomap_writeback_ops ntfs_writeback_ops = { 868 + .writeback_range = ntfs_writeback_range, 869 + .writeback_submit = iomap_ioend_writeback_submit, 870 + };