Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Cluster (de)allocation code.
4 *
5 * Copyright (c) 2004-2005 Anton Altaparmakov
6 * Copyright (c) 2025 LG Electronics Co., Ltd.
7 *
8 * Part of this file is based on code from the NTFS-3G.
9 * and is copyrighted by the respective authors below:
10 * Copyright (c) 2002-2004 Anton Altaparmakov
11 * Copyright (c) 2004 Yura Pakhuchiy
12 * Copyright (c) 2004-2008 Szabolcs Szakacsits
13 * Copyright (c) 2008-2009 Jean-Pierre Andre
14 */
15
16#include <linux/blkdev.h>
17
18#include "lcnalloc.h"
19#include "bitmap.h"
20#include "ntfs.h"
21
22/*
23 * ntfs_cluster_free_from_rl_nolock - free clusters from runlist
24 * @vol: mounted ntfs volume on which to free the clusters
25 * @rl: runlist describing the clusters to free
26 *
27 * Free all the clusters described by the runlist @rl on the volume @vol. In
28 * the case of an error being returned, at least some of the clusters were not
29 * freed.
30 *
31 * Return 0 on success and -errno on error.
32 *
33 * Locking: - The volume lcn bitmap must be locked for writing on entry and is
34 * left locked on return.
35 */
36int ntfs_cluster_free_from_rl_nolock(struct ntfs_volume *vol,
37 const struct runlist_element *rl)
38{
39 struct inode *lcnbmp_vi = vol->lcnbmp_ino;
40 int ret = 0;
41 s64 nr_freed = 0;
42
43 ntfs_debug("Entering.");
44 if (!rl)
45 return 0;
46
47 if (!NVolFreeClusterKnown(vol))
48 wait_event(vol->free_waitq, NVolFreeClusterKnown(vol));
49
50 for (; rl->length; rl++) {
51 int err;
52
53 if (rl->lcn < 0)
54 continue;
55 err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length);
56 if (unlikely(err && (!ret || ret == -ENOMEM) && ret != err))
57 ret = err;
58 else
59 nr_freed += rl->length;
60 }
61 ntfs_inc_free_clusters(vol, nr_freed);
62 ntfs_debug("Done.");
63 return ret;
64}
65
66static s64 max_empty_bit_range(unsigned char *buf, int size)
67{
68 int i, j, run = 0;
69 int max_range = 0;
70 s64 start_pos = -1;
71
72 ntfs_debug("Entering\n");
73
74 i = 0;
75 while (i < size) {
76 switch (*buf) {
77 case 0:
78 do {
79 buf++;
80 run += 8;
81 i++;
82 } while ((i < size) && !*buf);
83 break;
84 case 255:
85 if (run > max_range) {
86 max_range = run;
87 start_pos = (s64)i * 8 - run;
88 }
89 run = 0;
90 do {
91 buf++;
92 i++;
93 } while ((i < size) && (*buf == 255));
94 break;
95 default:
96 for (j = 0; j < 8; j++) {
97 int bit = *buf & (1 << j);
98
99 if (bit) {
100 if (run > max_range) {
101 max_range = run;
102 start_pos = (s64)i * 8 + (j - run);
103 }
104 run = 0;
105 } else
106 run++;
107 }
108 i++;
109 buf++;
110 }
111 }
112
113 if (run > max_range)
114 start_pos = (s64)i * 8 - run;
115
116 return start_pos;
117}
118
119/*
120 * ntfs_cluster_alloc - allocate clusters on an ntfs volume
121 * @vol: mounted ntfs volume on which to allocate clusters
122 * @start_vcn: vcn of the first allocated cluster
123 * @count: number of clusters to allocate
124 * @start_lcn: starting lcn at which to allocate the clusters or -1 if none
125 * @zone: zone from which to allocate (MFT_ZONE or DATA_ZONE)
126 * @is_extension: if true, the caller is extending an attribute
127 * @is_contig: if true, require contiguous allocation
128 * @is_dealloc: if true, the allocation is for deallocation purposes
129 *
130 * Allocate @count clusters preferably starting at cluster @start_lcn or at the
131 * current allocator position if @start_lcn is -1, on the mounted ntfs volume
132 * @vol. @zone is either DATA_ZONE for allocation of normal clusters or
133 * MFT_ZONE for allocation of clusters for the master file table, i.e. the
134 * $MFT/$DATA attribute.
135 *
136 * @start_vcn specifies the vcn of the first allocated cluster. This makes
137 * merging the resulting runlist with the old runlist easier.
138 *
139 * If @is_extension is 'true', the caller is allocating clusters to extend an
140 * attribute and if it is 'false', the caller is allocating clusters to fill a
141 * hole in an attribute. Practically the difference is that if @is_extension
142 * is 'true' the returned runlist will be terminated with LCN_ENOENT and if
143 * @is_extension is 'false' the runlist will be terminated with
144 * LCN_RL_NOT_MAPPED.
145 *
146 * You need to check the return value with IS_ERR(). If this is false, the
147 * function was successful and the return value is a runlist describing the
148 * allocated cluster(s). If IS_ERR() is true, the function failed and
149 * PTR_ERR() gives you the error code.
150 *
151 * Notes on the allocation algorithm
152 * =================================
153 *
154 * There are two data zones. First is the area between the end of the mft zone
155 * and the end of the volume, and second is the area between the start of the
156 * volume and the start of the mft zone. On unmodified/standard NTFS 1.x
157 * volumes, the second data zone does not exist due to the mft zone being
158 * expanded to cover the start of the volume in order to reserve space for the
159 * mft bitmap attribute.
160 *
161 * This is not the prettiest function but the complexity stems from the need of
162 * implementing the mft vs data zoned approach and from the fact that we have
163 * access to the lcn bitmap in portions of up to 8192 bytes at a time, so we
164 * need to cope with crossing over boundaries of two buffers. Further, the
165 * fact that the allocator allows for caller supplied hints as to the location
166 * of where allocation should begin and the fact that the allocator keeps track
167 * of where in the data zones the next natural allocation should occur,
168 * contribute to the complexity of the function. But it should all be
169 * worthwhile, because this allocator should: 1) be a full implementation of
170 * the MFT zone approach used by Windows NT, 2) cause reduction in
171 * fragmentation, and 3) be speedy in allocations (the code is not optimized
172 * for speed, but the algorithm is, so further speed improvements are probably
173 * possible).
174 *
175 * Locking: - The volume lcn bitmap must be unlocked on entry and is unlocked
176 * on return.
177 * - This function takes the volume lcn bitmap lock for writing and
178 * modifies the bitmap contents.
179 *
180 * Return: Runlist describing the allocated cluster(s) on success, error pointer
181 * on failure.
182 */
183struct runlist_element *ntfs_cluster_alloc(struct ntfs_volume *vol, const s64 start_vcn,
184 const s64 count, const s64 start_lcn,
185 const int zone,
186 const bool is_extension,
187 const bool is_contig,
188 const bool is_dealloc)
189{
190 s64 zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
191 s64 prev_lcn = 0, prev_run_len = 0, mft_zone_size;
192 s64 clusters, free_clusters;
193 loff_t i_size;
194 struct inode *lcnbmp_vi;
195 struct runlist_element *rl = NULL;
196 struct address_space *mapping;
197 struct folio *folio = NULL;
198 u8 *buf = NULL, *byte;
199 int err = 0, rlpos, rlsize, buf_size, pg_off;
200 u8 pass, done_zones, search_zone, need_writeback = 0, bit;
201 unsigned int memalloc_flags;
202 u8 has_guess, used_zone_pos;
203 pgoff_t index;
204
205 ntfs_debug("Entering for start_vcn 0x%llx, count 0x%llx, start_lcn 0x%llx, zone %s_ZONE.",
206 start_vcn, count, start_lcn,
207 zone == MFT_ZONE ? "MFT" : "DATA");
208
209 lcnbmp_vi = vol->lcnbmp_ino;
210 if (start_vcn < 0 || start_lcn < LCN_HOLE ||
211 zone < FIRST_ZONE || zone > LAST_ZONE)
212 return ERR_PTR(-EINVAL);
213
214 /* Return NULL if @count is zero. */
215 if (count < 0 || !count)
216 return ERR_PTR(-EINVAL);
217
218 memalloc_flags = memalloc_nofs_save();
219
220 if (!NVolFreeClusterKnown(vol))
221 wait_event(vol->free_waitq, NVolFreeClusterKnown(vol));
222 free_clusters = atomic64_read(&vol->free_clusters);
223
224 /* Take the lcnbmp lock for writing. */
225 down_write(&vol->lcnbmp_lock);
226 if (is_dealloc == false)
227 free_clusters -= atomic64_read(&vol->dirty_clusters);
228
229 if (free_clusters < count) {
230 err = -ENOSPC;
231 goto out_restore;
232 }
233
234 /*
235 * If no specific @start_lcn was requested, use the current data zone
236 * position, otherwise use the requested @start_lcn but make sure it
237 * lies outside the mft zone. Also set done_zones to 0 (no zones done)
238 * and pass depending on whether we are starting inside a zone (1) or
239 * at the beginning of a zone (2). If requesting from the MFT_ZONE,
240 * we either start at the current position within the mft zone or at
241 * the specified position. If the latter is out of bounds then we start
242 * at the beginning of the MFT_ZONE.
243 */
244 done_zones = 0;
245 pass = 1;
246 /*
247 * zone_start and zone_end are the current search range. search_zone
248 * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of
249 * volume) and 4 for data zone 2 (start of volume till start of mft
250 * zone).
251 */
252 has_guess = 1;
253 zone_start = start_lcn;
254
255 if (zone_start < 0) {
256 if (zone == DATA_ZONE)
257 zone_start = vol->data1_zone_pos;
258 else
259 zone_start = vol->mft_zone_pos;
260 if (!zone_start) {
261 /*
262 * Zone starts at beginning of volume which means a
263 * single pass is sufficient.
264 */
265 pass = 2;
266 }
267 has_guess = 0;
268 }
269
270 used_zone_pos = has_guess ? 0 : 1;
271
272 if (!zone_start || zone_start == vol->mft_zone_start ||
273 zone_start == vol->mft_zone_end)
274 pass = 2;
275
276 if (zone_start < vol->mft_zone_start) {
277 zone_end = vol->mft_zone_start;
278 search_zone = 4;
279 /* Skip searching the mft zone. */
280 done_zones |= 1;
281 } else if (zone_start < vol->mft_zone_end) {
282 zone_end = vol->mft_zone_end;
283 search_zone = 1;
284 } else {
285 zone_end = vol->nr_clusters;
286 search_zone = 2;
287 /* Skip searching the mft zone. */
288 done_zones |= 1;
289 }
290
291 /*
292 * bmp_pos is the current bit position inside the bitmap. We use
293 * bmp_initial_pos to determine whether or not to do a zone switch.
294 */
295 bmp_pos = bmp_initial_pos = zone_start;
296
297 /* Loop until all clusters are allocated, i.e. clusters == 0. */
298 clusters = count;
299 rlpos = rlsize = 0;
300 mapping = lcnbmp_vi->i_mapping;
301 i_size = i_size_read(lcnbmp_vi);
302 while (1) {
303 ntfs_debug("Start of outer while loop: done_zones 0x%x, search_zone %i, pass %i, zone_start 0x%llx, zone_end 0x%llx, bmp_initial_pos 0x%llx, bmp_pos 0x%llx, rlpos %i, rlsize %i.",
304 done_zones, search_zone, pass,
305 zone_start, zone_end, bmp_initial_pos,
306 bmp_pos, rlpos, rlsize);
307 /* Loop until we run out of free clusters. */
308 last_read_pos = bmp_pos >> 3;
309 ntfs_debug("last_read_pos 0x%llx.", last_read_pos);
310 if (last_read_pos >= i_size) {
311 ntfs_debug("End of attribute reached. Skipping to zone_pass_done.");
312 goto zone_pass_done;
313 }
314 if (likely(folio)) {
315 if (need_writeback) {
316 ntfs_debug("Marking page dirty.");
317 folio_mark_dirty(folio);
318 need_writeback = 0;
319 }
320 folio_unlock(folio);
321 kunmap_local(buf);
322 folio_put(folio);
323 folio = NULL;
324 }
325
326 index = last_read_pos >> PAGE_SHIFT;
327 pg_off = last_read_pos & ~PAGE_MASK;
328 buf_size = PAGE_SIZE - pg_off;
329 if (unlikely(last_read_pos + buf_size > i_size))
330 buf_size = i_size - last_read_pos;
331 buf_size <<= 3;
332 lcn = bmp_pos & 7;
333 bmp_pos &= ~(s64)7;
334
335 if (vol->lcn_empty_bits_per_page[index] == 0)
336 goto next_bmp_pos;
337
338 folio = read_mapping_folio(mapping, index, NULL);
339 if (IS_ERR(folio)) {
340 err = PTR_ERR(folio);
341 ntfs_error(vol->sb, "Failed to map page.");
342 goto out;
343 }
344
345 folio_lock(folio);
346 buf = kmap_local_folio(folio, 0) + pg_off;
347 ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, bmp_pos 0x%llx, need_writeback %i.",
348 buf_size, lcn, bmp_pos, need_writeback);
349 while (lcn < buf_size && lcn + bmp_pos < zone_end) {
350 byte = buf + (lcn >> 3);
351 ntfs_debug("In inner while loop: buf_size %i, lcn 0x%llx, bmp_pos 0x%llx, need_writeback %i, byte ofs 0x%x, *byte 0x%x.",
352 buf_size, lcn, bmp_pos, need_writeback,
353 (unsigned int)(lcn >> 3),
354 (unsigned int)*byte);
355 bit = 1 << (lcn & 7);
356 ntfs_debug("bit 0x%x.", bit);
357
358 if (has_guess) {
359 if (*byte & bit) {
360 if (is_contig == true && prev_run_len > 0)
361 goto done;
362
363 has_guess = 0;
364 break;
365 }
366 } else {
367 lcn = max_empty_bit_range(buf, buf_size >> 3);
368 if (lcn < 0)
369 break;
370 has_guess = 1;
371 continue;
372 }
373 /*
374 * Allocate more memory if needed, including space for
375 * the terminator element.
376 * kvzalloc() operates on whole pages only.
377 */
378 if ((rlpos + 2) * sizeof(*rl) > rlsize) {
379 struct runlist_element *rl2;
380
381 ntfs_debug("Reallocating memory.");
382 if (!rl)
383 ntfs_debug("First free bit is at s64 0x%llx.",
384 lcn + bmp_pos);
385 rl2 = kvzalloc(rlsize + PAGE_SIZE, GFP_NOFS);
386 if (unlikely(!rl2)) {
387 err = -ENOMEM;
388 ntfs_error(vol->sb, "Failed to allocate memory.");
389 goto out;
390 }
391 memcpy(rl2, rl, rlsize);
392 kvfree(rl);
393 rl = rl2;
394 rlsize += PAGE_SIZE;
395 ntfs_debug("Reallocated memory, rlsize 0x%x.",
396 rlsize);
397 }
398 /* Allocate the bitmap bit. */
399 *byte |= bit;
400 /* We need to write this bitmap page to disk. */
401 need_writeback = 1;
402 ntfs_debug("*byte 0x%x, need_writeback is set.",
403 (unsigned int)*byte);
404 ntfs_dec_free_clusters(vol, 1);
405 ntfs_set_lcn_empty_bits(vol, index, 1, 1);
406
407 /*
408 * Coalesce with previous run if adjacent LCNs.
409 * Otherwise, append a new run.
410 */
411 ntfs_debug("Adding run (lcn 0x%llx, len 0x%llx), prev_lcn 0x%llx, lcn 0x%llx, bmp_pos 0x%llx, prev_run_len 0x%llx, rlpos %i.",
412 lcn + bmp_pos, 1ULL, prev_lcn,
413 lcn, bmp_pos, prev_run_len, rlpos);
414 if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) {
415 ntfs_debug("Coalescing to run (lcn 0x%llx, len 0x%llx).",
416 rl[rlpos - 1].lcn,
417 rl[rlpos - 1].length);
418 rl[rlpos - 1].length = ++prev_run_len;
419 ntfs_debug("Run now (lcn 0x%llx, len 0x%llx), prev_run_len 0x%llx.",
420 rl[rlpos - 1].lcn,
421 rl[rlpos - 1].length,
422 prev_run_len);
423 } else {
424 if (likely(rlpos)) {
425 ntfs_debug("Adding new run, (previous run lcn 0x%llx, len 0x%llx).",
426 rl[rlpos - 1].lcn, rl[rlpos - 1].length);
427 rl[rlpos].vcn = rl[rlpos - 1].vcn +
428 prev_run_len;
429 } else {
430 ntfs_debug("Adding new run, is first run.");
431 rl[rlpos].vcn = start_vcn;
432 }
433 rl[rlpos].lcn = prev_lcn = lcn + bmp_pos;
434 rl[rlpos].length = prev_run_len = 1;
435 rlpos++;
436 }
437 /* Done? */
438 if (!--clusters) {
439 s64 tc;
440done:
441 if (!used_zone_pos)
442 goto out;
443 /*
444 * Update the current zone position. Positions
445 * of already scanned zones have been updated
446 * during the respective zone switches.
447 */
448 tc = lcn + bmp_pos + 1;
449 ntfs_debug("Done. Updating current zone position, tc 0x%llx, search_zone %i.",
450 tc, search_zone);
451 switch (search_zone) {
452 case 1:
453 ntfs_debug("Before checks, vol->mft_zone_pos 0x%llx.",
454 vol->mft_zone_pos);
455 if (tc >= vol->mft_zone_end) {
456 vol->mft_zone_pos =
457 vol->mft_lcn;
458 if (!vol->mft_zone_end)
459 vol->mft_zone_pos = 0;
460 } else if ((bmp_initial_pos >=
461 vol->mft_zone_pos ||
462 tc > vol->mft_zone_pos)
463 && tc >= vol->mft_lcn)
464 vol->mft_zone_pos = tc;
465 ntfs_debug("After checks, vol->mft_zone_pos 0x%llx.",
466 vol->mft_zone_pos);
467 break;
468 case 2:
469 ntfs_debug("Before checks, vol->data1_zone_pos 0x%llx.",
470 vol->data1_zone_pos);
471 if (tc >= vol->nr_clusters)
472 vol->data1_zone_pos =
473 vol->mft_zone_end;
474 else if ((bmp_initial_pos >=
475 vol->data1_zone_pos ||
476 tc > vol->data1_zone_pos)
477 && tc >= vol->mft_zone_end)
478 vol->data1_zone_pos = tc;
479 ntfs_debug("After checks, vol->data1_zone_pos 0x%llx.",
480 vol->data1_zone_pos);
481 break;
482 case 4:
483 ntfs_debug("Before checks, vol->data2_zone_pos 0x%llx.",
484 vol->data2_zone_pos);
485 if (tc >= vol->mft_zone_start)
486 vol->data2_zone_pos = 0;
487 else if (bmp_initial_pos >=
488 vol->data2_zone_pos ||
489 tc > vol->data2_zone_pos)
490 vol->data2_zone_pos = tc;
491 ntfs_debug("After checks, vol->data2_zone_pos 0x%llx.",
492 vol->data2_zone_pos);
493 break;
494 default:
495 WARN_ON(1);
496 }
497 ntfs_debug("Finished. Going to out.");
498 goto out;
499 }
500 lcn++;
501 }
502
503 if (!used_zone_pos) {
504 used_zone_pos = 1;
505 if (search_zone == 1)
506 zone_start = vol->mft_zone_pos;
507 else if (search_zone == 2)
508 zone_start = vol->data1_zone_pos;
509 else
510 zone_start = vol->data2_zone_pos;
511
512 if (!zone_start || zone_start == vol->mft_zone_start ||
513 zone_start == vol->mft_zone_end)
514 pass = 2;
515 bmp_pos = zone_start;
516 } else {
517next_bmp_pos:
518 bmp_pos += buf_size;
519 }
520
521 ntfs_debug("After inner while loop: buf_size 0x%x, lcn 0x%llx, bmp_pos 0x%llx, need_writeback %i.",
522 buf_size, lcn, bmp_pos, need_writeback);
523 if (bmp_pos < zone_end) {
524 ntfs_debug("Continuing outer while loop, bmp_pos 0x%llx, zone_end 0x%llx.",
525 bmp_pos, zone_end);
526 continue;
527 }
528zone_pass_done: /* Finished with the current zone pass. */
529 ntfs_debug("At zone_pass_done, pass %i.", pass);
530 if (pass == 1) {
531 /*
532 * Now do pass 2, scanning the first part of the zone
533 * we omitted in pass 1.
534 */
535 pass = 2;
536 zone_end = zone_start;
537 switch (search_zone) {
538 case 1: /* mft_zone */
539 zone_start = vol->mft_zone_start;
540 break;
541 case 2: /* data1_zone */
542 zone_start = vol->mft_zone_end;
543 break;
544 case 4: /* data2_zone */
545 zone_start = 0;
546 break;
547 default:
548 WARN_ON(1);
549 }
550 /* Sanity check. */
551 if (zone_end < zone_start)
552 zone_end = zone_start;
553 bmp_pos = zone_start;
554 ntfs_debug("Continuing outer while loop, pass 2, zone_start 0x%llx, zone_end 0x%llx, bmp_pos 0x%llx.",
555 zone_start, zone_end, bmp_pos);
556 continue;
557 } /* pass == 2 */
558done_zones_check:
559 ntfs_debug("At done_zones_check, search_zone %i, done_zones before 0x%x, done_zones after 0x%x.",
560 search_zone, done_zones,
561 done_zones | search_zone);
562 done_zones |= search_zone;
563 if (done_zones < 7) {
564 ntfs_debug("Switching zone.");
565 /* Now switch to the next zone we haven't done yet. */
566 pass = 1;
567 switch (search_zone) {
568 case 1:
569 ntfs_debug("Switching from mft zone to data1 zone.");
570 /* Update mft zone position. */
571 if (rlpos && used_zone_pos) {
572 s64 tc;
573
574 ntfs_debug("Before checks, vol->mft_zone_pos 0x%llx.",
575 vol->mft_zone_pos);
576 tc = rl[rlpos - 1].lcn +
577 rl[rlpos - 1].length;
578 if (tc >= vol->mft_zone_end) {
579 vol->mft_zone_pos =
580 vol->mft_lcn;
581 if (!vol->mft_zone_end)
582 vol->mft_zone_pos = 0;
583 } else if ((bmp_initial_pos >=
584 vol->mft_zone_pos ||
585 tc > vol->mft_zone_pos)
586 && tc >= vol->mft_lcn)
587 vol->mft_zone_pos = tc;
588 ntfs_debug("After checks, vol->mft_zone_pos 0x%llx.",
589 vol->mft_zone_pos);
590 }
591 /* Switch from mft zone to data1 zone. */
592switch_to_data1_zone: search_zone = 2;
593 zone_start = bmp_initial_pos =
594 vol->data1_zone_pos;
595 zone_end = vol->nr_clusters;
596 if (zone_start == vol->mft_zone_end)
597 pass = 2;
598 if (zone_start >= zone_end) {
599 vol->data1_zone_pos = zone_start =
600 vol->mft_zone_end;
601 pass = 2;
602 }
603 break;
604 case 2:
605 ntfs_debug("Switching from data1 zone to data2 zone.");
606 /* Update data1 zone position. */
607 if (rlpos && used_zone_pos) {
608 s64 tc;
609
610 ntfs_debug("Before checks, vol->data1_zone_pos 0x%llx.",
611 vol->data1_zone_pos);
612 tc = rl[rlpos - 1].lcn +
613 rl[rlpos - 1].length;
614 if (tc >= vol->nr_clusters)
615 vol->data1_zone_pos =
616 vol->mft_zone_end;
617 else if ((bmp_initial_pos >=
618 vol->data1_zone_pos ||
619 tc > vol->data1_zone_pos)
620 && tc >= vol->mft_zone_end)
621 vol->data1_zone_pos = tc;
622 ntfs_debug("After checks, vol->data1_zone_pos 0x%llx.",
623 vol->data1_zone_pos);
624 }
625 /* Switch from data1 zone to data2 zone. */
626 search_zone = 4;
627 zone_start = bmp_initial_pos =
628 vol->data2_zone_pos;
629 zone_end = vol->mft_zone_start;
630 if (!zone_start)
631 pass = 2;
632 if (zone_start >= zone_end) {
633 vol->data2_zone_pos = zone_start =
634 bmp_initial_pos = 0;
635 pass = 2;
636 }
637 break;
638 case 4:
639 ntfs_debug("Switching from data2 zone to data1 zone.");
640 /* Update data2 zone position. */
641 if (rlpos && used_zone_pos) {
642 s64 tc;
643
644 ntfs_debug("Before checks, vol->data2_zone_pos 0x%llx.",
645 vol->data2_zone_pos);
646 tc = rl[rlpos - 1].lcn +
647 rl[rlpos - 1].length;
648 if (tc >= vol->mft_zone_start)
649 vol->data2_zone_pos = 0;
650 else if (bmp_initial_pos >=
651 vol->data2_zone_pos ||
652 tc > vol->data2_zone_pos)
653 vol->data2_zone_pos = tc;
654 ntfs_debug("After checks, vol->data2_zone_pos 0x%llx.",
655 vol->data2_zone_pos);
656 }
657 /* Switch from data2 zone to data1 zone. */
658 goto switch_to_data1_zone;
659 default:
660 WARN_ON(1);
661 }
662 ntfs_debug("After zone switch, search_zone %i, pass %i, bmp_initial_pos 0x%llx, zone_start 0x%llx, zone_end 0x%llx.",
663 search_zone, pass,
664 bmp_initial_pos,
665 zone_start,
666 zone_end);
667 bmp_pos = zone_start;
668 if (zone_start == zone_end) {
669 ntfs_debug("Empty zone, going to done_zones_check.");
670 /* Empty zone. Don't bother searching it. */
671 goto done_zones_check;
672 }
673 ntfs_debug("Continuing outer while loop.");
674 continue;
675 } /* done_zones == 7 */
676 ntfs_debug("All zones are finished.");
677 /*
678 * All zones are finished! If DATA_ZONE, shrink mft zone. If
679 * MFT_ZONE, we have really run out of space.
680 */
681 mft_zone_size = vol->mft_zone_end - vol->mft_zone_start;
682 ntfs_debug("vol->mft_zone_start 0x%llx, vol->mft_zone_end 0x%llx, mft_zone_size 0x%llx.",
683 vol->mft_zone_start, vol->mft_zone_end,
684 mft_zone_size);
685 if (zone == MFT_ZONE || mft_zone_size <= 0) {
686 ntfs_debug("No free clusters left, going to out.");
687 /* Really no more space left on device. */
688 err = -ENOSPC;
689 goto out;
690 } /* zone == DATA_ZONE && mft_zone_size > 0 */
691 ntfs_debug("Shrinking mft zone.");
692 zone_end = vol->mft_zone_end;
693 mft_zone_size >>= 1;
694 if (mft_zone_size > 0)
695 vol->mft_zone_end = vol->mft_zone_start + mft_zone_size;
696 else /* mft zone and data2 zone no longer exist. */
697 vol->data2_zone_pos = vol->mft_zone_start =
698 vol->mft_zone_end = 0;
699 if (vol->mft_zone_pos >= vol->mft_zone_end) {
700 vol->mft_zone_pos = vol->mft_lcn;
701 if (!vol->mft_zone_end)
702 vol->mft_zone_pos = 0;
703 }
704 bmp_pos = zone_start = bmp_initial_pos =
705 vol->data1_zone_pos = vol->mft_zone_end;
706 search_zone = 2;
707 pass = 2;
708 done_zones &= ~2;
709 ntfs_debug("After shrinking mft zone, mft_zone_size 0x%llx, vol->mft_zone_start 0x%llx, vol->mft_zone_end 0x%llx, vol->mft_zone_pos 0x%llx, search_zone 2, pass 2, dones_zones 0x%x, zone_start 0x%llx, zone_end 0x%llx, vol->data1_zone_pos 0x%llx, continuing outer while loop.",
710 mft_zone_size, vol->mft_zone_start,
711 vol->mft_zone_end, vol->mft_zone_pos,
712 done_zones, zone_start, zone_end,
713 vol->data1_zone_pos);
714 }
715 ntfs_debug("After outer while loop.");
716out:
717 ntfs_debug("At out.");
718 /* Add runlist terminator element. */
719 if (likely(rl)) {
720 rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
721 rl[rlpos].lcn = is_extension ? LCN_ENOENT : LCN_RL_NOT_MAPPED;
722 rl[rlpos].length = 0;
723 }
724 if (!IS_ERR_OR_NULL(folio)) {
725 if (need_writeback) {
726 ntfs_debug("Marking page dirty.");
727 folio_mark_dirty(folio);
728 need_writeback = 0;
729 }
730 folio_unlock(folio);
731 kunmap_local(buf);
732 folio_put(folio);
733 }
734 if (likely(!err)) {
735 if (!rl) {
736 err = -EIO;
737 goto out_restore;
738 }
739 if (is_dealloc == true)
740 ntfs_release_dirty_clusters(vol, rl->length);
741 ntfs_debug("Done.");
742 goto out_restore;
743 }
744 if (err != -ENOSPC)
745 ntfs_error(vol->sb,
746 "Failed to allocate clusters, aborting (error %i).",
747 err);
748 if (rl) {
749 int err2;
750
751 if (err == -ENOSPC)
752 ntfs_debug("Not enough space to complete allocation, err -ENOSPC, first free lcn 0x%llx, could allocate up to 0x%llx clusters.",
753 rl[0].lcn, count - clusters);
754 /* Deallocate all allocated clusters. */
755 ntfs_debug("Attempting rollback...");
756 err2 = ntfs_cluster_free_from_rl_nolock(vol, rl);
757 if (err2) {
758 ntfs_error(vol->sb,
759 "Failed to rollback (error %i). Leaving inconsistent metadata! Unmount and run chkdsk.",
760 err2);
761 NVolSetErrors(vol);
762 }
763 /* Free the runlist. */
764 kvfree(rl);
765 } else if (err == -ENOSPC)
766 ntfs_debug("No space left at all, err = -ENOSPC, first free lcn = 0x%llx.",
767 vol->data1_zone_pos);
768 atomic64_set(&vol->dirty_clusters, 0);
769
770out_restore:
771 up_write(&vol->lcnbmp_lock);
772 memalloc_nofs_restore(memalloc_flags);
773
774 return err < 0 ? ERR_PTR(err) : rl;
775}
776
777/*
778 * __ntfs_cluster_free - free clusters on an ntfs volume
779 * @ni: ntfs inode whose runlist describes the clusters to free
780 * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters
781 * @count: number of clusters to free or -1 for all clusters
782 * @ctx: active attribute search context if present or NULL if not
783 * @is_rollback: true if this is a rollback operation
784 *
785 * Free @count clusters starting at the cluster @start_vcn in the runlist
786 * described by the vfs inode @ni.
787 *
788 * If @count is -1, all clusters from @start_vcn to the end of the runlist are
789 * deallocated. Thus, to completely free all clusters in a runlist, use
790 * @start_vcn = 0 and @count = -1.
791 *
792 * If @ctx is specified, it is an active search context of @ni and its base mft
793 * record. This is needed when __ntfs_cluster_free() encounters unmapped
794 * runlist fragments and allows their mapping. If you do not have the mft
795 * record mapped, you can specify @ctx as NULL and __ntfs_cluster_free() will
796 * perform the necessary mapping and unmapping.
797 *
798 * Note, __ntfs_cluster_free() saves the state of @ctx on entry and restores it
799 * before returning. Thus, @ctx will be left pointing to the same attribute on
800 * return as on entry. However, the actual pointers in @ctx may point to
801 * different memory locations on return, so you must remember to reset any
802 * cached pointers from the @ctx, i.e. after the call to __ntfs_cluster_free(),
803 * you will probably want to do:
804 * m = ctx->mrec;
805 * a = ctx->attr;
806 * Assuming you cache ctx->attr in a variable @a of type attr_record * and that
807 * you cache ctx->mrec in a variable @m of type struct mft_record *.
808 *
809 * @is_rollback should always be 'false', it is for internal use to rollback
810 * errors. You probably want to use ntfs_cluster_free() instead.
811 *
812 * Note, __ntfs_cluster_free() does not modify the runlist, so you have to
813 * remove from the runlist or mark sparse the freed runs later.
814 *
815 * Return the number of deallocated clusters (not counting sparse ones) on
816 * success and -errno on error.
817 *
818 * WARNING: If @ctx is supplied, regardless of whether success or failure is
819 * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
820 * is no longer valid, i.e. you need to either call
821 * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
822 * In that case PTR_ERR(@ctx->mrec) will give you the error code for
823 * why the mapping of the old inode failed.
824 *
825 * Locking: - The runlist described by @ni must be locked for writing on entry
826 * and is locked on return. Note the runlist may be modified when
827 * needed runlist fragments need to be mapped.
828 * - The volume lcn bitmap must be unlocked on entry and is unlocked
829 * on return.
830 * - This function takes the volume lcn bitmap lock for writing and
831 * modifies the bitmap contents.
832 * - If @ctx is NULL, the base mft record of @ni must not be mapped on
833 * entry and it will be left unmapped on return.
834 * - If @ctx is not NULL, the base mft record must be mapped on entry
835 * and it will be left mapped on return.
836 */
837s64 __ntfs_cluster_free(struct ntfs_inode *ni, const s64 start_vcn, s64 count,
838 struct ntfs_attr_search_ctx *ctx, const bool is_rollback)
839{
840 s64 delta, to_free, total_freed, real_freed;
841 struct ntfs_volume *vol;
842 struct inode *lcnbmp_vi;
843 struct runlist_element *rl;
844 int err;
845 unsigned int memalloc_flags;
846
847 ntfs_debug("Entering for i_ino 0x%llx, start_vcn 0x%llx, count 0x%llx.%s",
848 ni->mft_no, start_vcn, count,
849 is_rollback ? " (rollback)" : "");
850 vol = ni->vol;
851 lcnbmp_vi = vol->lcnbmp_ino;
852 if (start_vcn < 0 || count < -1)
853 return -EINVAL;
854
855 if (!NVolFreeClusterKnown(vol))
856 wait_event(vol->free_waitq, NVolFreeClusterKnown(vol));
857
858 /*
859 * Lock the lcn bitmap for writing but only if not rolling back. We
860 * must hold the lock all the way including through rollback otherwise
861 * rollback is not possible because once we have cleared a bit and
862 * dropped the lock, anyone could have set the bit again, thus
863 * allocating the cluster for another use.
864 */
865 if (likely(!is_rollback)) {
866 memalloc_flags = memalloc_nofs_save();
867 down_write(&vol->lcnbmp_lock);
868 }
869
870 total_freed = real_freed = 0;
871
872 rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx);
873 if (IS_ERR(rl)) {
874 err = PTR_ERR(rl);
875 if (err == -ENOENT) {
876 if (likely(!is_rollback)) {
877 up_write(&vol->lcnbmp_lock);
878 memalloc_nofs_restore(memalloc_flags);
879 }
880 return 0;
881 }
882
883 if (!is_rollback)
884 ntfs_error(vol->sb,
885 "Failed to find first runlist element (error %d), aborting.",
886 err);
887 goto err_out;
888 }
889 if (unlikely(rl->lcn < LCN_HOLE)) {
890 if (!is_rollback)
891 ntfs_error(vol->sb, "First runlist element has invalid lcn, aborting.");
892 err = -EIO;
893 goto err_out;
894 }
895 /* Find the starting cluster inside the run that needs freeing. */
896 delta = start_vcn - rl->vcn;
897
898 /* The number of clusters in this run that need freeing. */
899 to_free = rl->length - delta;
900 if (count >= 0 && to_free > count)
901 to_free = count;
902
903 if (likely(rl->lcn >= 0)) {
904 /* Do the actual freeing of the clusters in this run. */
905 err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn + delta,
906 to_free, likely(!is_rollback) ? 0 : 1);
907 if (unlikely(err)) {
908 if (!is_rollback)
909 ntfs_error(vol->sb,
910 "Failed to clear first run (error %i), aborting.",
911 err);
912 goto err_out;
913 }
914 /* We have freed @to_free real clusters. */
915 real_freed = to_free;
916 }
917 /* Go to the next run and adjust the number of clusters left to free. */
918 ++rl;
919 if (count >= 0)
920 count -= to_free;
921
922 /* Keep track of the total "freed" clusters, including sparse ones. */
923 total_freed = to_free;
924 /*
925 * Loop over the remaining runs, using @count as a capping value, and
926 * free them.
927 */
928 for (; rl->length && count != 0; ++rl) {
929 if (unlikely(rl->lcn < LCN_HOLE)) {
930 s64 vcn;
931
932 /* Attempt to map runlist. */
933 vcn = rl->vcn;
934 rl = ntfs_attr_find_vcn_nolock(ni, vcn, ctx);
935 if (IS_ERR(rl)) {
936 err = PTR_ERR(rl);
937 if (!is_rollback)
938 ntfs_error(vol->sb,
939 "Failed to map runlist fragment or failed to find subsequent runlist element.");
940 goto err_out;
941 }
942 if (unlikely(rl->lcn < LCN_HOLE)) {
943 if (!is_rollback)
944 ntfs_error(vol->sb,
945 "Runlist element has invalid lcn (0x%llx).",
946 rl->lcn);
947 err = -EIO;
948 goto err_out;
949 }
950 }
951 /* The number of clusters in this run that need freeing. */
952 to_free = rl->length;
953 if (count >= 0 && to_free > count)
954 to_free = count;
955
956 if (likely(rl->lcn >= 0)) {
957 /* Do the actual freeing of the clusters in the run. */
958 err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn,
959 to_free, likely(!is_rollback) ? 0 : 1);
960 if (unlikely(err)) {
961 if (!is_rollback)
962 ntfs_error(vol->sb, "Failed to clear subsequent run.");
963 goto err_out;
964 }
965 /* We have freed @to_free real clusters. */
966 real_freed += to_free;
967 }
968 /* Adjust the number of clusters left to free. */
969 if (count >= 0)
970 count -= to_free;
971
972 /* Update the total done clusters. */
973 total_freed += to_free;
974 }
975 ntfs_inc_free_clusters(vol, real_freed);
976 if (likely(!is_rollback)) {
977 up_write(&vol->lcnbmp_lock);
978 memalloc_nofs_restore(memalloc_flags);
979 }
980
981 WARN_ON(count > 0);
982
983 if (NVolDiscard(vol) && !is_rollback) {
984 s64 total_discarded = 0, rl_off;
985 u32 gran = bdev_discard_granularity(vol->sb->s_bdev);
986
987 rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx);
988 if (IS_ERR(rl))
989 return real_freed;
990 rl_off = start_vcn - rl->vcn;
991 while (rl->length && total_discarded < total_freed) {
992 s64 to_discard = rl->length - rl_off;
993
994 if (to_discard + total_discarded > total_freed)
995 to_discard = total_freed - total_discarded;
996 if (rl->lcn >= 0) {
997 sector_t start_sector, end_sector;
998 int ret;
999
1000 start_sector = ALIGN(NTFS_CLU_TO_B(vol, rl->lcn + rl_off),
1001 gran) >> SECTOR_SHIFT;
1002 end_sector = ALIGN_DOWN(NTFS_CLU_TO_B(vol,
1003 rl->lcn + rl_off + to_discard),
1004 gran) >> SECTOR_SHIFT;
1005 if (start_sector < end_sector) {
1006 ret = blkdev_issue_discard(vol->sb->s_bdev, start_sector,
1007 end_sector - start_sector,
1008 GFP_NOFS);
1009 if (ret)
1010 break;
1011 }
1012 }
1013
1014 total_discarded += to_discard;
1015 ++rl;
1016 rl_off = 0;
1017 }
1018 }
1019
1020 /* We are done. Return the number of actually freed clusters. */
1021 ntfs_debug("Done.");
1022 return real_freed;
1023err_out:
1024 if (is_rollback)
1025 return err;
1026 /* If no real clusters were freed, no need to rollback. */
1027 if (!real_freed) {
1028 up_write(&vol->lcnbmp_lock);
1029 memalloc_nofs_restore(memalloc_flags);
1030 return err;
1031 }
1032 /*
1033 * Attempt to rollback and if that succeeds just return the error code.
1034 * If rollback fails, set the volume errors flag, emit an error
1035 * message, and return the error code.
1036 */
1037 delta = __ntfs_cluster_free(ni, start_vcn, total_freed, ctx, true);
1038 if (delta < 0) {
1039 ntfs_error(vol->sb,
1040 "Failed to rollback (error %i). Leaving inconsistent metadata! Unmount and run chkdsk.",
1041 (int)delta);
1042 NVolSetErrors(vol);
1043 }
1044 ntfs_dec_free_clusters(vol, delta);
1045 up_write(&vol->lcnbmp_lock);
1046 memalloc_nofs_restore(memalloc_flags);
1047 ntfs_error(vol->sb, "Aborting (error %i).", err);
1048 return err;
1049}