Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
4 * Copyright (C) 2010 Red Hat, Inc.
5 * All Rights Reserved.
6 */
7#include "xfs_platform.h"
8#include "xfs_fs.h"
9#include "xfs_shared.h"
10#include "xfs_format.h"
11#include "xfs_log_format.h"
12#include "xfs_trans_resv.h"
13#include "xfs_mount.h"
14#include "xfs_da_format.h"
15#include "xfs_da_btree.h"
16#include "xfs_inode.h"
17#include "xfs_bmap_btree.h"
18#include "xfs_quota.h"
19#include "xfs_trans.h"
20#include "xfs_qm.h"
21#include "xfs_trans_space.h"
22#include "xfs_rtbitmap.h"
23#include "xfs_attr_item.h"
24#include "xfs_log.h"
25#include "xfs_defer.h"
26#include "xfs_bmap_item.h"
27#include "xfs_extfree_item.h"
28#include "xfs_rmap_item.h"
29#include "xfs_refcount_item.h"
30#include "xfs_trace.h"
31
32#define _ALLOC true
33#define _FREE false
34
35/*
36 * A buffer has a format structure overhead in the log in addition
37 * to the data, so we need to take this into account when reserving
38 * space in a transaction for a buffer. Round the space required up
39 * to a multiple of 128 bytes so that we don't change the historical
40 * reservation that has been used for this overhead.
41 */
42STATIC uint
43xfs_buf_log_overhead(void)
44{
45 return round_up(sizeof(struct xlog_op_header) +
46 sizeof(struct xfs_buf_log_format), 128);
47}
48
49/*
50 * Calculate out transaction log reservation per item in bytes.
51 *
52 * The nbufs argument is used to indicate the number of items that
53 * will be changed in a transaction. size is used to tell how many
54 * bytes should be reserved per item.
55 */
56STATIC uint
57xfs_calc_buf_res(
58 uint nbufs,
59 uint size)
60{
61 return nbufs * (size + xfs_buf_log_overhead());
62}
63
64/*
65 * Per-extent log reservation for the btree changes involved in freeing or
66 * allocating an extent. In classic XFS there were two trees that will be
67 * modified (bnobt + cntbt). With rmap enabled, there are three trees
68 * (rmapbt). The number of blocks reserved is based on the formula:
69 *
70 * num trees * ((2 blocks/level * max depth) - 1)
71 *
72 * Keep in mind that max depth is calculated separately for each type of tree.
73 */
74uint
75xfs_allocfree_block_count(
76 struct xfs_mount *mp,
77 uint num_ops)
78{
79 uint blocks;
80
81 blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1);
82 if (xfs_has_rmapbt(mp))
83 blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
84
85 return blocks;
86}
87
88/*
89 * Per-extent log reservation for refcount btree changes. These are never done
90 * in the same transaction as an allocation or a free, so we compute them
91 * separately.
92 */
93static unsigned int
94xfs_refcountbt_block_count(
95 struct xfs_mount *mp,
96 unsigned int num_ops)
97{
98 return num_ops * (2 * mp->m_refc_maxlevels - 1);
99}
100
101static unsigned int
102xfs_rtrefcountbt_block_count(
103 struct xfs_mount *mp,
104 unsigned int num_ops)
105{
106 return num_ops * (2 * mp->m_rtrefc_maxlevels - 1);
107}
108
109/*
110 * Logging inodes is really tricksy. They are logged in memory format,
111 * which means that what we write into the log doesn't directly translate into
112 * the amount of space they use on disk.
113 *
114 * Case in point - btree format forks in memory format use more space than the
115 * on-disk format. In memory, the buffer contains a normal btree block header so
116 * the btree code can treat it as though it is just another generic buffer.
117 * However, when we write it to the inode fork, we don't write all of this
118 * header as it isn't needed. e.g. the root is only ever in the inode, so
119 * there's no need for sibling pointers which would waste 16 bytes of space.
120 *
121 * Hence when we have an inode with a maximally sized btree format fork, then
122 * amount of information we actually log is greater than the size of the inode
123 * on disk. Hence we need an inode reservation function that calculates all this
124 * correctly. So, we log:
125 *
126 * - 4 log op headers for object
127 * - for the ilf, the inode core and 2 forks
128 * - inode log format object
129 * - the inode core
130 * - two inode forks containing bmap btree root blocks.
131 * - the btree data contained by both forks will fit into the inode size,
132 * hence when combined with the inode core above, we have a total of the
133 * actual inode size.
134 * - the BMBT headers need to be accounted separately, as they are
135 * additional to the records and pointers that fit inside the inode
136 * forks.
137 */
138STATIC uint
139xfs_calc_inode_res(
140 struct xfs_mount *mp,
141 uint ninodes)
142{
143 return ninodes *
144 (4 * sizeof(struct xlog_op_header) +
145 sizeof(struct xfs_inode_log_format) +
146 mp->m_sb.sb_inodesize +
147 2 * xfs_bmbt_block_len(mp));
148}
149
150/*
151 * Inode btree record insertion/removal modifies the inode btree and free space
152 * btrees (since the inobt does not use the agfl). This requires the following
153 * reservation:
154 *
155 * the inode btree: max depth * blocksize
156 * the allocation btrees: 2 trees * (max depth - 1) * block size
157 *
158 * The caller must account for SB and AG header modifications, etc.
159 */
160STATIC uint
161xfs_calc_inobt_res(
162 struct xfs_mount *mp)
163{
164 return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels,
165 XFS_FSB_TO_B(mp, 1)) +
166 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
167 XFS_FSB_TO_B(mp, 1));
168}
169
170/*
171 * The free inode btree is a conditional feature. The behavior differs slightly
172 * from that of the traditional inode btree in that the finobt tracks records
173 * for inode chunks with at least one free inode. A record can be removed from
174 * the tree during individual inode allocation. Therefore the finobt
175 * reservation is unconditional for both the inode chunk allocation and
176 * individual inode allocation (modify) cases.
177 *
178 * Behavior aside, the reservation for finobt modification is equivalent to the
179 * traditional inobt: cover a full finobt shape change plus block allocation.
180 */
181STATIC uint
182xfs_calc_finobt_res(
183 struct xfs_mount *mp)
184{
185 if (!xfs_has_finobt(mp))
186 return 0;
187
188 return xfs_calc_inobt_res(mp);
189}
190
191/*
192 * Calculate the reservation required to allocate or free an inode chunk. This
193 * includes:
194 *
195 * the allocation btrees: 2 trees * (max depth - 1) * block size
196 * the inode chunk: m_ino_geo.ialloc_blks * N
197 *
198 * The size N of the inode chunk reservation depends on whether it is for
199 * allocation or free and which type of create transaction is in use. An inode
200 * chunk free always invalidates the buffers and only requires reservation for
201 * headers (N == 0). An inode chunk allocation requires a chunk sized
202 * reservation on v4 and older superblocks to initialize the chunk. No chunk
203 * reservation is required for allocation on v5 supers, which use ordered
204 * buffers to initialize.
205 */
206STATIC uint
207xfs_calc_inode_chunk_res(
208 struct xfs_mount *mp,
209 bool alloc)
210{
211 uint res, size = 0;
212
213 res = xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
214 XFS_FSB_TO_B(mp, 1));
215 if (alloc) {
216 /* icreate tx uses ordered buffers */
217 if (xfs_has_v3inodes(mp))
218 return res;
219 size = XFS_FSB_TO_B(mp, 1);
220 }
221
222 res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size);
223 return res;
224}
225
226/*
227 * Per-extent log reservation for the btree changes involved in freeing or
228 * allocating a realtime extent. We have to be able to log as many rtbitmap
229 * blocks as needed to mark inuse XFS_BMBT_MAX_EXTLEN blocks' worth of realtime
230 * extents, as well as the realtime summary block (t1). Realtime rmap btree
231 * operations happen in a second transaction, so factor in a couple of rtrmapbt
232 * splits (t2).
233 */
234static unsigned int
235xfs_rtalloc_block_count(
236 struct xfs_mount *mp,
237 unsigned int num_ops)
238{
239 unsigned int rtbmp_blocks;
240 xfs_rtxlen_t rtxlen;
241 unsigned int t1, t2 = 0;
242
243 rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN);
244 rtbmp_blocks = xfs_rtbitmap_blockcount_len(mp, rtxlen);
245 t1 = (rtbmp_blocks + 1) * num_ops;
246
247 if (xfs_has_rmapbt(mp))
248 t2 = num_ops * (2 * mp->m_rtrmap_maxlevels - 1);
249
250 return max(t1, t2);
251}
252
253/*
254 * Various log reservation values.
255 *
256 * These are based on the size of the file system block because that is what
257 * most transactions manipulate. Each adds in an additional 128 bytes per
258 * item logged to try to account for the overhead of the transaction mechanism.
259 *
260 * Note: Most of the reservations underestimate the number of allocation
261 * groups into which they could free extents in the xfs_defer_finish() call.
262 * This is because the number in the worst case is quite high and quite
263 * unusual. In order to fix this we need to change xfs_defer_finish() to free
264 * extents in only a single AG at a time. This will require changes to the
265 * EFI code as well, however, so that the EFI for the extents not freed is
266 * logged again in each transaction. See SGI PV #261917.
267 *
268 * Reservation functions here avoid a huge stack in xfs_trans_init due to
269 * register overflow from temporaries in the calculations.
270 */
271
272/*
273 * Finishing a data device refcount updates (t1):
274 * the agfs of the ags containing the blocks: nr_ops * sector size
275 * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
276 */
277inline unsigned int
278xfs_calc_finish_cui_reservation(
279 struct xfs_mount *mp,
280 unsigned int nr_ops)
281{
282 if (!xfs_has_reflink(mp))
283 return 0;
284
285 return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
286 xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops),
287 mp->m_sb.sb_blocksize);
288}
289
290/*
291 * Realtime refcount updates (t2);
292 * the rt refcount inode
293 * the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
294 */
295inline unsigned int
296xfs_calc_finish_rt_cui_reservation(
297 struct xfs_mount *mp,
298 unsigned int nr_ops)
299{
300 if (!xfs_has_rtreflink(mp))
301 return 0;
302
303 return xfs_calc_inode_res(mp, 1) +
304 xfs_calc_buf_res(xfs_rtrefcountbt_block_count(mp, nr_ops),
305 mp->m_sb.sb_blocksize);
306}
307
308/*
309 * Compute the log reservation required to handle the refcount update
310 * transaction. Refcount updates are always done via deferred log items.
311 *
312 * This is calculated as the max of:
313 * Data device refcount updates (t1):
314 * the agfs of the ags containing the blocks: nr_ops * sector size
315 * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
316 * Realtime refcount updates (t2);
317 * the rt refcount inode
318 * the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
319 */
320static unsigned int
321xfs_calc_refcountbt_reservation(
322 struct xfs_mount *mp,
323 unsigned int nr_ops)
324{
325 unsigned int t1, t2;
326
327 t1 = xfs_calc_finish_cui_reservation(mp, nr_ops);
328 t2 = xfs_calc_finish_rt_cui_reservation(mp, nr_ops);
329
330 return max(t1, t2);
331}
332
333/*
334 * In a write transaction we can allocate a maximum of 2
335 * extents. This gives (t1):
336 * the inode getting the new extents: inode size
337 * the inode's bmap btree: max depth * block size
338 * the agfs of the ags from which the extents are allocated: 2 * sector
339 * the superblock free block counter: sector size
340 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
341 * Or, if we're writing to a realtime file (t2):
342 * the inode getting the new extents: inode size
343 * the inode's bmap btree: max depth * block size
344 * the agfs of the ags from which the extents are allocated: 2 * sector
345 * the superblock free block counter: sector size
346 * the realtime bitmap: ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
347 * the realtime summary: 1 block
348 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
349 * And the bmap_finish transaction can free bmap blocks in a join (t3):
350 * the agfs of the ags containing the blocks: 2 * sector size
351 * the agfls of the ags containing the blocks: 2 * sector size
352 * the super block free block counter: sector size
353 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
354 * And any refcount updates that happen in a separate transaction (t4).
355 */
356STATIC uint
357xfs_calc_write_reservation(
358 struct xfs_mount *mp,
359 bool for_minlogsize)
360{
361 unsigned int t1, t2, t3, t4;
362 unsigned int blksz = XFS_FSB_TO_B(mp, 1);
363
364 t1 = xfs_calc_inode_res(mp, 1) +
365 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
366 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
367 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
368
369 if (xfs_has_realtime(mp)) {
370 t2 = xfs_calc_inode_res(mp, 1) +
371 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
372 blksz) +
373 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
374 xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 1), blksz) +
375 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), blksz);
376 } else {
377 t2 = 0;
378 }
379
380 t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
381 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
382
383 /*
384 * In the early days of reflink, we included enough reservation to log
385 * two refcountbt splits for each transaction. The codebase runs
386 * refcountbt updates in separate transactions now, so to compute the
387 * minimum log size, add the refcountbtree splits back to t1 and t3 and
388 * do not account them separately as t4. Reflink did not support
389 * realtime when the reservations were established, so no adjustment to
390 * t2 is needed.
391 */
392 if (for_minlogsize) {
393 unsigned int adj = 0;
394
395 if (xfs_has_reflink(mp))
396 adj = xfs_calc_buf_res(
397 xfs_refcountbt_block_count(mp, 2),
398 blksz);
399 t1 += adj;
400 t3 += adj;
401 return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
402 }
403
404 t4 = xfs_calc_refcountbt_reservation(mp, 1);
405 return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
406}
407
408unsigned int
409xfs_calc_write_reservation_minlogsize(
410 struct xfs_mount *mp)
411{
412 return xfs_calc_write_reservation(mp, true);
413}
414
415/*
416 * Finishing an EFI can free the blocks and bmap blocks (t2):
417 * the agf for each of the ags: nr * sector size
418 * the agfl for each of the ags: nr * sector size
419 * the super block to reflect the freed blocks: sector size
420 * worst case split in allocation btrees per extent assuming nr extents:
421 * nr exts * 2 trees * (2 * max depth - 1) * block size
422 */
423inline unsigned int
424xfs_calc_finish_efi_reservation(
425 struct xfs_mount *mp,
426 unsigned int nr)
427{
428 return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
429 xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
430 mp->m_sb.sb_blocksize);
431}
432
433/*
434 * Or, if it's a realtime file (t3):
435 * the agf for each of the ags: 2 * sector size
436 * the agfl for each of the ags: 2 * sector size
437 * the super block to reflect the freed blocks: sector size
438 * the realtime bitmap:
439 * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
440 * the realtime summary: 2 exts * 1 block
441 * worst case split in allocation btrees per extent assuming 2 extents:
442 * 2 exts * 2 trees * (2 * max depth - 1) * block size
443 */
444inline unsigned int
445xfs_calc_finish_rt_efi_reservation(
446 struct xfs_mount *mp,
447 unsigned int nr)
448{
449 if (!xfs_has_realtime(mp))
450 return 0;
451
452 return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
453 xfs_calc_buf_res(xfs_rtalloc_block_count(mp, nr),
454 mp->m_sb.sb_blocksize) +
455 xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
456 mp->m_sb.sb_blocksize);
457}
458
459/*
460 * Finishing an RUI is the same as an EFI. We can split the rmap btree twice
461 * on each end of the record, and that can cause the AGFL to be refilled or
462 * emptied out.
463 */
464inline unsigned int
465xfs_calc_finish_rui_reservation(
466 struct xfs_mount *mp,
467 unsigned int nr)
468{
469 if (!xfs_has_rmapbt(mp))
470 return 0;
471 return xfs_calc_finish_efi_reservation(mp, nr);
472}
473
474/*
475 * Finishing an RUI is the same as an EFI. We can split the rmap btree twice
476 * on each end of the record, and that can cause the AGFL to be refilled or
477 * emptied out.
478 */
479inline unsigned int
480xfs_calc_finish_rt_rui_reservation(
481 struct xfs_mount *mp,
482 unsigned int nr)
483{
484 if (!xfs_has_rtrmapbt(mp))
485 return 0;
486 return xfs_calc_finish_rt_efi_reservation(mp, nr);
487}
488
489/*
490 * In finishing a BUI, we can modify:
491 * the inode being truncated: inode size
492 * dquots
493 * the inode's bmap btree: (max depth + 1) * block size
494 */
495inline unsigned int
496xfs_calc_finish_bui_reservation(
497 struct xfs_mount *mp,
498 unsigned int nr)
499{
500 return xfs_calc_inode_res(mp, 1) + XFS_DQUOT_LOGRES +
501 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
502 mp->m_sb.sb_blocksize);
503}
504
505/*
506 * In truncating a file we free up to two extents at once. We can modify (t1):
507 * the inode being truncated: inode size
508 * the inode's bmap btree: (max depth + 1) * block size
509 * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
510 * the agf for each of the ags: 4 * sector size
511 * the agfl for each of the ags: 4 * sector size
512 * the super block to reflect the freed blocks: sector size
513 * worst case split in allocation btrees per extent assuming 4 extents:
514 * 4 exts * 2 trees * (2 * max depth - 1) * block size
515 * Or, if it's a realtime file (t3):
516 * the agf for each of the ags: 2 * sector size
517 * the agfl for each of the ags: 2 * sector size
518 * the super block to reflect the freed blocks: sector size
519 * the realtime bitmap:
520 * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
521 * the realtime summary: 2 exts * 1 block
522 * worst case split in allocation btrees per extent assuming 2 extents:
523 * 2 exts * 2 trees * (2 * max depth - 1) * block size
524 * And any refcount updates that happen in a separate transaction (t4).
525 */
526STATIC uint
527xfs_calc_itruncate_reservation(
528 struct xfs_mount *mp,
529 bool for_minlogsize)
530{
531 unsigned int t1, t2, t3, t4;
532 unsigned int blksz = XFS_FSB_TO_B(mp, 1);
533
534 t1 = xfs_calc_inode_res(mp, 1) +
535 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
536
537 t2 = xfs_calc_finish_efi_reservation(mp, 4);
538 t3 = xfs_calc_finish_rt_efi_reservation(mp, 2);
539
540 /*
541 * In the early days of reflink, we included enough reservation to log
542 * four refcountbt splits in the same transaction as bnobt/cntbt
543 * updates. The codebase runs refcountbt updates in separate
544 * transactions now, so to compute the minimum log size, add the
545 * refcount btree splits back here and do not compute them separately
546 * as t4. Reflink did not support realtime when the reservations were
547 * established, so do not adjust t3.
548 */
549 if (for_minlogsize) {
550 if (xfs_has_reflink(mp))
551 t2 += xfs_calc_buf_res(
552 xfs_refcountbt_block_count(mp, 4),
553 blksz);
554
555 return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
556 }
557
558 t4 = xfs_calc_refcountbt_reservation(mp, 2);
559 return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
560}
561
562unsigned int
563xfs_calc_itruncate_reservation_minlogsize(
564 struct xfs_mount *mp)
565{
566 return xfs_calc_itruncate_reservation(mp, true);
567}
568
569static inline unsigned int xfs_calc_pptr_link_overhead(void)
570{
571 return sizeof(struct xfs_attri_log_format) +
572 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
573 xlog_calc_iovec_len(MAXNAMELEN - 1);
574}
575static inline unsigned int xfs_calc_pptr_unlink_overhead(void)
576{
577 return sizeof(struct xfs_attri_log_format) +
578 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
579 xlog_calc_iovec_len(MAXNAMELEN - 1);
580}
581static inline unsigned int xfs_calc_pptr_replace_overhead(void)
582{
583 return sizeof(struct xfs_attri_log_format) +
584 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
585 xlog_calc_iovec_len(MAXNAMELEN - 1) +
586 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
587 xlog_calc_iovec_len(MAXNAMELEN - 1);
588}
589
590/*
591 * In renaming a files we can modify:
592 * the five inodes involved: 5 * inode size
593 * the two directory btrees: 2 * (max depth + v2) * dir block size
594 * the two directory bmap btrees: 2 * max depth * block size
595 * And the bmap_finish transaction can free dir and bmap blocks (two sets
596 * of bmap blocks) giving (t2):
597 * the agf for the ags in which the blocks live: 3 * sector size
598 * the agfl for the ags in which the blocks live: 3 * sector size
599 * the superblock for the free block count: sector size
600 * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
601 * If parent pointers are enabled (t3), then each transaction in the chain
602 * must be capable of setting or removing the extended attribute
603 * containing the parent information. It must also be able to handle
604 * the three xattr intent items that track the progress of the parent
605 * pointer update.
606 */
607STATIC uint
608xfs_calc_rename_reservation(
609 struct xfs_mount *mp)
610{
611 unsigned int overhead = XFS_DQUOT_LOGRES;
612 struct xfs_trans_resv *resp = M_RES(mp);
613 unsigned int t1, t2, t3 = 0;
614
615 t1 = xfs_calc_inode_res(mp, 5) +
616 xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
617 XFS_FSB_TO_B(mp, 1));
618
619 t2 = xfs_calc_finish_efi_reservation(mp, 3);
620
621 if (xfs_has_parent(mp)) {
622 unsigned int rename_overhead, exchange_overhead;
623
624 t3 = max(resp->tr_attrsetm.tr_logres,
625 resp->tr_attrrm.tr_logres);
626
627 /*
628 * For a standard rename, the three xattr intent log items
629 * are (1) replacing the pptr for the source file; (2)
630 * removing the pptr on the dest file; and (3) adding a
631 * pptr for the whiteout file in the src dir.
632 *
633 * For an RENAME_EXCHANGE, there are two xattr intent
634 * items to replace the pptr for both src and dest
635 * files. Link counts don't change and there is no
636 * whiteout.
637 *
638 * In the worst case we can end up relogging all log
639 * intent items to allow the log tail to move ahead, so
640 * they become overhead added to each transaction in a
641 * processing chain.
642 */
643 rename_overhead = xfs_calc_pptr_replace_overhead() +
644 xfs_calc_pptr_unlink_overhead() +
645 xfs_calc_pptr_link_overhead();
646 exchange_overhead = 2 * xfs_calc_pptr_replace_overhead();
647
648 overhead += max(rename_overhead, exchange_overhead);
649 }
650
651 return overhead + max3(t1, t2, t3);
652}
653
654static inline unsigned int
655xfs_rename_log_count(
656 struct xfs_mount *mp,
657 struct xfs_trans_resv *resp)
658{
659 /* One for the rename, one more for freeing blocks */
660 unsigned int ret = XFS_RENAME_LOG_COUNT;
661
662 /*
663 * Pre-reserve enough log reservation to handle the transaction
664 * rolling needed to remove or add one parent pointer.
665 */
666 if (xfs_has_parent(mp))
667 ret += max(resp->tr_attrsetm.tr_logcount,
668 resp->tr_attrrm.tr_logcount);
669
670 return ret;
671}
672
673/*
674 * For removing an inode from unlinked list at first, we can modify:
675 * the agi hash list and counters: sector size
676 * the on disk inode before ours in the agi hash list: inode cluster size
677 * the on disk inode in the agi hash list: inode cluster size
678 */
679STATIC uint
680xfs_calc_iunlink_remove_reservation(
681 struct xfs_mount *mp)
682{
683 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
684 2 * M_IGEO(mp)->inode_cluster_size;
685}
686
687static inline unsigned int
688xfs_link_log_count(
689 struct xfs_mount *mp,
690 struct xfs_trans_resv *resp)
691{
692 unsigned int ret = XFS_LINK_LOG_COUNT;
693
694 /*
695 * Pre-reserve enough log reservation to handle the transaction
696 * rolling needed to add one parent pointer.
697 */
698 if (xfs_has_parent(mp))
699 ret += resp->tr_attrsetm.tr_logcount;
700
701 return ret;
702}
703
704/*
705 * For creating a link to an inode:
706 * the parent directory inode: inode size
707 * the linked inode: inode size
708 * the directory btree could split: (max depth + v2) * dir block size
709 * the directory bmap btree could join or split: (max depth + v2) * blocksize
710 * And the bmap_finish transaction can free some bmap blocks giving:
711 * the agf for the ag in which the blocks live: sector size
712 * the agfl for the ag in which the blocks live: sector size
713 * the superblock for the free block count: sector size
714 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
715 */
716STATIC uint
717xfs_calc_link_reservation(
718 struct xfs_mount *mp)
719{
720 unsigned int overhead = XFS_DQUOT_LOGRES;
721 struct xfs_trans_resv *resp = M_RES(mp);
722 unsigned int t1, t2, t3 = 0;
723
724 overhead += xfs_calc_iunlink_remove_reservation(mp);
725 t1 = xfs_calc_inode_res(mp, 2) +
726 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
727 t2 = xfs_calc_finish_efi_reservation(mp, 1);
728
729 if (xfs_has_parent(mp)) {
730 t3 = resp->tr_attrsetm.tr_logres;
731 overhead += xfs_calc_pptr_link_overhead();
732 }
733
734 return overhead + max3(t1, t2, t3);
735}
736
737/*
738 * For adding an inode to unlinked list we can modify:
739 * the agi hash list: sector size
740 * the on disk inode: inode cluster size
741 */
742STATIC uint
743xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
744{
745 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
746 M_IGEO(mp)->inode_cluster_size;
747}
748
749static inline unsigned int
750xfs_remove_log_count(
751 struct xfs_mount *mp,
752 struct xfs_trans_resv *resp)
753{
754 unsigned int ret = XFS_REMOVE_LOG_COUNT;
755
756 /*
757 * Pre-reserve enough log reservation to handle the transaction
758 * rolling needed to add one parent pointer.
759 */
760 if (xfs_has_parent(mp))
761 ret += resp->tr_attrrm.tr_logcount;
762
763 return ret;
764}
765
766/*
767 * For removing a directory entry we can modify:
768 * the parent directory inode: inode size
769 * the removed inode: inode size
770 * the directory btree could join: (max depth + v2) * dir block size
771 * the directory bmap btree could join or split: (max depth + v2) * blocksize
772 * And the bmap_finish transaction can free the dir and bmap blocks giving:
773 * the agf for the ag in which the blocks live: 2 * sector size
774 * the agfl for the ag in which the blocks live: 2 * sector size
775 * the superblock for the free block count: sector size
776 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
777 */
778STATIC uint
779xfs_calc_remove_reservation(
780 struct xfs_mount *mp)
781{
782 unsigned int overhead = XFS_DQUOT_LOGRES;
783 struct xfs_trans_resv *resp = M_RES(mp);
784 unsigned int t1, t2, t3 = 0;
785
786 overhead += xfs_calc_iunlink_add_reservation(mp);
787
788 t1 = xfs_calc_inode_res(mp, 2) +
789 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
790 t2 = xfs_calc_finish_efi_reservation(mp, 2);
791
792 if (xfs_has_parent(mp)) {
793 t3 = resp->tr_attrrm.tr_logres;
794 overhead += xfs_calc_pptr_unlink_overhead();
795 }
796
797 return overhead + max3(t1, t2, t3);
798}
799
800/*
801 * For create, break it in to the two cases that the transaction
802 * covers. We start with the modify case - allocation done by modification
803 * of the state of existing inodes - and the allocation case.
804 */
805
806/*
807 * For create we can modify:
808 * the parent directory inode: inode size
809 * the new inode: inode size
810 * the inode btree entry: block size
811 * the superblock for the nlink flag: sector size
812 * the directory btree: (max depth + v2) * dir block size
813 * the directory inode's bmap btree: (max depth + v2) * block size
814 * the finobt (record modification and allocation btrees)
815 */
816STATIC uint
817xfs_calc_create_resv_modify(
818 struct xfs_mount *mp)
819{
820 return xfs_calc_inode_res(mp, 2) +
821 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
822 (uint)XFS_FSB_TO_B(mp, 1) +
823 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
824 xfs_calc_finobt_res(mp);
825}
826
827/*
828 * For icreate we can allocate some inodes giving:
829 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
830 * the superblock for the nlink flag: sector size
831 * the inode chunk (allocation, optional init)
832 * the inobt (record insertion)
833 * the finobt (optional, record insertion)
834 */
835STATIC uint
836xfs_calc_icreate_resv_alloc(
837 struct xfs_mount *mp)
838{
839 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
840 mp->m_sb.sb_sectsize +
841 xfs_calc_inode_chunk_res(mp, _ALLOC) +
842 xfs_calc_inobt_res(mp) +
843 xfs_calc_finobt_res(mp);
844}
845
846static inline unsigned int
847xfs_icreate_log_count(
848 struct xfs_mount *mp,
849 struct xfs_trans_resv *resp)
850{
851 unsigned int ret = XFS_CREATE_LOG_COUNT;
852
853 /*
854 * Pre-reserve enough log reservation to handle the transaction
855 * rolling needed to add one parent pointer.
856 */
857 if (xfs_has_parent(mp))
858 ret += resp->tr_attrsetm.tr_logcount;
859
860 return ret;
861}
862
863STATIC uint
864xfs_calc_icreate_reservation(
865 struct xfs_mount *mp)
866{
867 struct xfs_trans_resv *resp = M_RES(mp);
868 unsigned int overhead = XFS_DQUOT_LOGRES;
869 unsigned int t1, t2, t3 = 0;
870
871 t1 = xfs_calc_icreate_resv_alloc(mp);
872 t2 = xfs_calc_create_resv_modify(mp);
873
874 if (xfs_has_parent(mp)) {
875 t3 = resp->tr_attrsetm.tr_logres;
876 overhead += xfs_calc_pptr_link_overhead();
877 }
878
879 return overhead + max3(t1, t2, t3);
880}
881
882STATIC uint
883xfs_calc_create_tmpfile_reservation(
884 struct xfs_mount *mp)
885{
886 uint res = XFS_DQUOT_LOGRES;
887
888 res += xfs_calc_icreate_resv_alloc(mp);
889 return res + xfs_calc_iunlink_add_reservation(mp);
890}
891
892static inline unsigned int
893xfs_mkdir_log_count(
894 struct xfs_mount *mp,
895 struct xfs_trans_resv *resp)
896{
897 unsigned int ret = XFS_MKDIR_LOG_COUNT;
898
899 /*
900 * Pre-reserve enough log reservation to handle the transaction
901 * rolling needed to add one parent pointer.
902 */
903 if (xfs_has_parent(mp))
904 ret += resp->tr_attrsetm.tr_logcount;
905
906 return ret;
907}
908
909/*
910 * Making a new directory is the same as creating a new file.
911 */
912STATIC uint
913xfs_calc_mkdir_reservation(
914 struct xfs_mount *mp)
915{
916 return xfs_calc_icreate_reservation(mp);
917}
918
919static inline unsigned int
920xfs_symlink_log_count(
921 struct xfs_mount *mp,
922 struct xfs_trans_resv *resp)
923{
924 unsigned int ret = XFS_SYMLINK_LOG_COUNT;
925
926 /*
927 * Pre-reserve enough log reservation to handle the transaction
928 * rolling needed to add one parent pointer.
929 */
930 if (xfs_has_parent(mp))
931 ret += resp->tr_attrsetm.tr_logcount;
932
933 return ret;
934}
935
936/*
937 * Making a new symplink is the same as creating a new file, but
938 * with the added blocks for remote symlink data which can be up to 1kB in
939 * length (XFS_SYMLINK_MAXLEN).
940 */
941STATIC uint
942xfs_calc_symlink_reservation(
943 struct xfs_mount *mp)
944{
945 return xfs_calc_icreate_reservation(mp) +
946 xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
947}
948
949/*
950 * In freeing an inode we can modify:
951 * the inode being freed: inode size
952 * the super block free inode counter, AGF and AGFL: sector size
953 * the on disk inode (agi unlinked list removal)
954 * the inode chunk (invalidated, headers only)
955 * the inode btree
956 * the finobt (record insertion, removal or modification)
957 *
958 * Note that the inode chunk res. includes an allocfree res. for freeing of the
959 * inode chunk. This is technically extraneous because the inode chunk free is
960 * deferred (it occurs after a transaction roll). Include the extra reservation
961 * anyways since we've had reports of ifree transaction overruns due to too many
962 * agfl fixups during inode chunk frees.
963 */
964STATIC uint
965xfs_calc_ifree_reservation(
966 struct xfs_mount *mp)
967{
968 return XFS_DQUOT_LOGRES +
969 xfs_calc_inode_res(mp, 1) +
970 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
971 xfs_calc_iunlink_remove_reservation(mp) +
972 xfs_calc_inode_chunk_res(mp, _FREE) +
973 xfs_calc_inobt_res(mp) +
974 xfs_calc_finobt_res(mp);
975}
976
977/*
978 * When only changing the inode we log the inode and possibly the superblock
979 * We also add a bit of slop for the transaction stuff.
980 */
981STATIC uint
982xfs_calc_ichange_reservation(
983 struct xfs_mount *mp)
984{
985 return XFS_DQUOT_LOGRES +
986 xfs_calc_inode_res(mp, 1) +
987 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
988
989}
990
991/*
992 * Growing the data section of the filesystem.
993 * superblock
994 * agi and agf
995 * allocation btrees
996 */
997STATIC uint
998xfs_calc_growdata_reservation(
999 struct xfs_mount *mp)
1000{
1001 return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
1002 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
1003 XFS_FSB_TO_B(mp, 1));
1004}
1005
1006/*
1007 * Growing the rt section of the filesystem.
1008 * In the first set of transactions (ALLOC) we allocate space to the
1009 * bitmap or summary files.
1010 * superblock: sector size
1011 * agf of the ag from which the extent is allocated: sector size
1012 * bmap btree for bitmap/summary inode: max depth * blocksize
1013 * bitmap/summary inode: inode size
1014 * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
1015 */
1016STATIC uint
1017xfs_calc_growrtalloc_reservation(
1018 struct xfs_mount *mp)
1019{
1020 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
1021 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
1022 XFS_FSB_TO_B(mp, 1)) +
1023 xfs_calc_inode_res(mp, 1) +
1024 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
1025 XFS_FSB_TO_B(mp, 1));
1026}
1027
1028/*
1029 * Growing the rt section of the filesystem.
1030 * In the second set of transactions (ZERO) we zero the new metadata blocks.
1031 * one bitmap/summary block: blocksize
1032 */
1033STATIC uint
1034xfs_calc_growrtzero_reservation(
1035 struct xfs_mount *mp)
1036{
1037 return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
1038}
1039
1040/*
1041 * Growing the rt section of the filesystem.
1042 * In the third set of transactions (FREE) we update metadata without
1043 * allocating any new blocks.
1044 * superblock: sector size
1045 * bitmap inode: inode size
1046 * summary inode: inode size
1047 * one bitmap block: blocksize
1048 * summary blocks: new summary size
1049 */
1050STATIC uint
1051xfs_calc_growrtfree_reservation(
1052 struct xfs_mount *mp)
1053{
1054 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
1055 xfs_calc_inode_res(mp, 2) +
1056 xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
1057 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, mp->m_rsumblocks));
1058}
1059
1060/*
1061 * Logging the inode modification timestamp on a synchronous write.
1062 * inode
1063 */
1064STATIC uint
1065xfs_calc_swrite_reservation(
1066 struct xfs_mount *mp)
1067{
1068 return xfs_calc_inode_res(mp, 1);
1069}
1070
1071/*
1072 * Logging the inode mode bits when writing a setuid/setgid file
1073 * inode
1074 */
1075STATIC uint
1076xfs_calc_writeid_reservation(
1077 struct xfs_mount *mp)
1078{
1079 return xfs_calc_inode_res(mp, 1);
1080}
1081
1082/*
1083 * Converting the inode from non-attributed to attributed.
1084 * the inode being converted: inode size
1085 * agf block and superblock (for block allocation)
1086 * the new block (directory sized)
1087 * bmap blocks for the new directory block
1088 * allocation btrees
1089 */
1090STATIC uint
1091xfs_calc_addafork_reservation(
1092 struct xfs_mount *mp)
1093{
1094 return XFS_DQUOT_LOGRES +
1095 xfs_calc_inode_res(mp, 1) +
1096 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
1097 xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
1098 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
1099 XFS_FSB_TO_B(mp, 1)) +
1100 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
1101 XFS_FSB_TO_B(mp, 1));
1102}
1103
1104/*
1105 * Removing the attribute fork of a file
1106 * the inode being truncated: inode size
1107 * the inode's bmap btree: max depth * block size
1108 * And the bmap_finish transaction can free the blocks and bmap blocks:
1109 * the agf for each of the ags: 4 * sector size
1110 * the agfl for each of the ags: 4 * sector size
1111 * the super block to reflect the freed blocks: sector size
1112 * worst case split in allocation btrees per extent assuming 4 extents:
1113 * 4 exts * 2 trees * (2 * max depth - 1) * block size
1114 */
1115STATIC uint
1116xfs_calc_attrinval_reservation(
1117 struct xfs_mount *mp)
1118{
1119 return max((xfs_calc_inode_res(mp, 1) +
1120 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
1121 XFS_FSB_TO_B(mp, 1))),
1122 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
1123 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4),
1124 XFS_FSB_TO_B(mp, 1))));
1125}
1126
1127/*
1128 * Setting an attribute at mount time.
1129 * the inode getting the attribute
1130 * the superblock for allocations
1131 * the agfs extents are allocated from
1132 * the attribute btree * max depth
1133 * the inode allocation btree
1134 * Since attribute transaction space is dependent on the size of the attribute,
1135 * the calculation is done partially at mount time and partially at runtime(see
1136 * below).
1137 */
1138STATIC uint
1139xfs_calc_attrsetm_reservation(
1140 struct xfs_mount *mp)
1141{
1142 return XFS_DQUOT_LOGRES +
1143 xfs_calc_inode_res(mp, 1) +
1144 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
1145 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
1146}
1147
1148/*
1149 * Setting an attribute at runtime, transaction space unit per block.
1150 * the superblock for allocations: sector size
1151 * the inode bmap btree could join or split: max depth * block size
1152 * Since the runtime attribute transaction space is dependent on the total
1153 * blocks needed for the 1st bmap, here we calculate out the space unit for
1154 * one block so that the caller could figure out the total space according
1155 * to the attibute extent length in blocks by:
1156 * ext * M_RES(mp)->tr_attrsetrt.tr_logres
1157 */
1158STATIC uint
1159xfs_calc_attrsetrt_reservation(
1160 struct xfs_mount *mp)
1161{
1162 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
1163 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
1164 XFS_FSB_TO_B(mp, 1));
1165}
1166
1167/*
1168 * Removing an attribute.
1169 * the inode: inode size
1170 * the attribute btree could join: max depth * block size
1171 * the inode bmap btree could join or split: max depth * block size
1172 * And the bmap_finish transaction can free the attr blocks freed giving:
1173 * the agf for the ag in which the blocks live: 2 * sector size
1174 * the agfl for the ag in which the blocks live: 2 * sector size
1175 * the superblock for the free block count: sector size
1176 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
1177 */
1178STATIC uint
1179xfs_calc_attrrm_reservation(
1180 struct xfs_mount *mp)
1181{
1182 return XFS_DQUOT_LOGRES +
1183 max((xfs_calc_inode_res(mp, 1) +
1184 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
1185 XFS_FSB_TO_B(mp, 1)) +
1186 (uint)XFS_FSB_TO_B(mp,
1187 XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
1188 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
1189 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
1190 xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
1191 XFS_FSB_TO_B(mp, 1))));
1192}
1193
1194/*
1195 * Clearing a bad agino number in an agi hash bucket.
1196 */
1197STATIC uint
1198xfs_calc_clear_agi_bucket_reservation(
1199 struct xfs_mount *mp)
1200{
1201 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
1202}
1203
1204/*
1205 * Adjusting quota limits.
1206 * the disk quota buffer: sizeof(struct xfs_disk_dquot)
1207 */
1208STATIC uint
1209xfs_calc_qm_setqlim_reservation(void)
1210{
1211 return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
1212}
1213
1214/*
1215 * Allocating quota on disk if needed.
1216 * the write transaction log space for quota file extent allocation
1217 * the unit of quota allocation: one system block size
1218 */
1219STATIC uint
1220xfs_calc_qm_dqalloc_reservation(
1221 struct xfs_mount *mp,
1222 bool for_minlogsize)
1223{
1224 return xfs_calc_write_reservation(mp, for_minlogsize) +
1225 xfs_calc_buf_res(1,
1226 XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
1227}
1228
1229unsigned int
1230xfs_calc_qm_dqalloc_reservation_minlogsize(
1231 struct xfs_mount *mp)
1232{
1233 return xfs_calc_qm_dqalloc_reservation(mp, true);
1234}
1235
1236/*
1237 * Syncing the incore super block changes to disk.
1238 * the super block to reflect the changes: sector size
1239 */
1240STATIC uint
1241xfs_calc_sb_reservation(
1242 struct xfs_mount *mp)
1243{
1244 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
1245}
1246
1247/*
1248 * Namespace reservations.
1249 *
1250 * These get tricky when parent pointers are enabled as we have attribute
1251 * modifications occurring from within these transactions. Rather than confuse
1252 * each of these reservation calculations with the conditional attribute
1253 * reservations, add them here in a clear and concise manner. This requires that
1254 * the attribute reservations have already been calculated.
1255 *
1256 * Note that we only include the static attribute reservation here; the runtime
1257 * reservation will have to be modified by the size of the attributes being
1258 * added/removed/modified. See the comments on the attribute reservation
1259 * calculations for more details.
1260 */
1261STATIC void
1262xfs_calc_namespace_reservations(
1263 struct xfs_mount *mp,
1264 struct xfs_trans_resv *resp)
1265{
1266 ASSERT(resp->tr_attrsetm.tr_logres > 0);
1267
1268 resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
1269 resp->tr_rename.tr_logcount = xfs_rename_log_count(mp, resp);
1270 resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1271
1272 resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
1273 resp->tr_link.tr_logcount = xfs_link_log_count(mp, resp);
1274 resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1275
1276 resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
1277 resp->tr_remove.tr_logcount = xfs_remove_log_count(mp, resp);
1278 resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1279
1280 resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
1281 resp->tr_symlink.tr_logcount = xfs_symlink_log_count(mp, resp);
1282 resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1283
1284 resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
1285 resp->tr_create.tr_logcount = xfs_icreate_log_count(mp, resp);
1286 resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1287
1288 resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
1289 resp->tr_mkdir.tr_logcount = xfs_mkdir_log_count(mp, resp);
1290 resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1291}
1292
1293STATIC void
1294xfs_calc_default_atomic_ioend_reservation(
1295 struct xfs_mount *mp,
1296 struct xfs_trans_resv *resp)
1297{
1298 /* Pick a default that will scale reasonably for the log size. */
1299 resp->tr_atomic_ioend = resp->tr_itruncate;
1300}
1301
1302void
1303xfs_trans_resv_calc(
1304 struct xfs_mount *mp,
1305 struct xfs_trans_resv *resp)
1306{
1307 int logcount_adj = 0;
1308
1309 /*
1310 * The following transactions are logged in physical format and
1311 * require a permanent reservation on space.
1312 */
1313 resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
1314 resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
1315 resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1316
1317 resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
1318 resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
1319 resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1320
1321 resp->tr_create_tmpfile.tr_logres =
1322 xfs_calc_create_tmpfile_reservation(mp);
1323 resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
1324 resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1325
1326 resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
1327 resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
1328 resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1329
1330 resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp);
1331 resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT;
1332 resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1333
1334 resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp);
1335 resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT;
1336 resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1337
1338 resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp);
1339 resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT;
1340 resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1341
1342 resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp);
1343 resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT;
1344 resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1345
1346 resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp);
1347 resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
1348 resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1349
1350 resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp,
1351 false);
1352 resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
1353 resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1354
1355 xfs_calc_namespace_reservations(mp, resp);
1356
1357 /*
1358 * The following transactions are logged in logical format with
1359 * a default log count.
1360 */
1361 resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation();
1362 resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
1363
1364 resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp);
1365 resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT;
1366
1367 /* growdata requires permanent res; it can free space to the last AG */
1368 resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
1369 resp->tr_growdata.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
1370 resp->tr_growdata.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1371
1372 /* The following transaction are logged in logical format */
1373 resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
1374 resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
1375 resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
1376 resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
1377 resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
1378 resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
1379 resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
1380
1381 /*
1382 * Add one logcount for BUI items that appear with rmap or reflink,
1383 * one logcount for refcount intent items, and one logcount for rmap
1384 * intent items.
1385 */
1386 if (xfs_has_reflink(mp) || xfs_has_rmapbt(mp))
1387 logcount_adj++;
1388 if (xfs_has_reflink(mp))
1389 logcount_adj++;
1390 if (xfs_has_rmapbt(mp))
1391 logcount_adj++;
1392
1393 resp->tr_itruncate.tr_logcount += logcount_adj;
1394 resp->tr_write.tr_logcount += logcount_adj;
1395 resp->tr_qm_dqalloc.tr_logcount += logcount_adj;
1396
1397 /*
1398 * Now that we've finished computing the static reservations, we can
1399 * compute the dynamic reservation for atomic writes.
1400 */
1401 xfs_calc_default_atomic_ioend_reservation(mp, resp);
1402}
1403
1404/*
1405 * Return the per-extent and fixed transaction reservation sizes needed to
1406 * complete an atomic write.
1407 */
1408STATIC unsigned int
1409xfs_calc_atomic_write_ioend_geometry(
1410 struct xfs_mount *mp,
1411 unsigned int *step_size)
1412{
1413 const unsigned int efi = xfs_efi_log_space(1);
1414 const unsigned int efd = xfs_efd_log_space(1);
1415 const unsigned int rui = xfs_rui_log_space(1);
1416 const unsigned int rud = xfs_rud_log_space();
1417 const unsigned int cui = xfs_cui_log_space(1);
1418 const unsigned int cud = xfs_cud_log_space();
1419 const unsigned int bui = xfs_bui_log_space(1);
1420 const unsigned int bud = xfs_bud_log_space();
1421
1422 /*
1423 * Maximum overhead to complete an atomic write ioend in software:
1424 * remove data fork extent + remove cow fork extent + map extent into
1425 * data fork.
1426 *
1427 * tx0: Creates a BUI and a CUI and that's all it needs.
1428 *
1429 * tx1: Roll to finish the BUI. Need space for the BUD, an RUI, and
1430 * enough space to relog the CUI (== CUI + CUD).
1431 *
1432 * tx2: Roll again to finish the RUI. Need space for the RUD and space
1433 * to relog the CUI.
1434 *
1435 * tx3: Roll again, need space for the CUD and possibly a new EFI.
1436 *
1437 * tx4: Roll again, need space for an EFD.
1438 *
1439 * If the extent referenced by the pair of BUI/CUI items is not the one
1440 * being currently processed, then we need to reserve space to relog
1441 * both items.
1442 */
1443 const unsigned int tx0 = bui + cui;
1444 const unsigned int tx1 = bud + rui + cui + cud;
1445 const unsigned int tx2 = rud + cui + cud;
1446 const unsigned int tx3 = cud + efi;
1447 const unsigned int tx4 = efd;
1448 const unsigned int relog = bui + bud + cui + cud;
1449
1450 const unsigned int per_intent = max(max3(tx0, tx1, tx2),
1451 max3(tx3, tx4, relog));
1452
1453 /* Overhead to finish one step of each intent item type */
1454 const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1);
1455 const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1);
1456 const unsigned int f3 = xfs_calc_finish_cui_reservation(mp, 1);
1457 const unsigned int f4 = xfs_calc_finish_bui_reservation(mp, 1);
1458
1459 /* We only finish one item per transaction in a chain */
1460 *step_size = max(f4, max3(f1, f2, f3));
1461
1462 return per_intent;
1463}
1464
1465/*
1466 * Compute the maximum size (in fsblocks) of atomic writes that we can complete
1467 * given the existing log reservations.
1468 */
1469xfs_extlen_t
1470xfs_calc_max_atomic_write_fsblocks(
1471 struct xfs_mount *mp)
1472{
1473 const struct xfs_trans_res *resv = &M_RES(mp)->tr_atomic_ioend;
1474 unsigned int per_intent = 0;
1475 unsigned int step_size = 0;
1476 unsigned int ret = 0;
1477
1478 if (resv->tr_logres > 0) {
1479 per_intent = xfs_calc_atomic_write_ioend_geometry(mp,
1480 &step_size);
1481
1482 if (resv->tr_logres >= step_size)
1483 ret = (resv->tr_logres - step_size) / per_intent;
1484 }
1485
1486 trace_xfs_calc_max_atomic_write_fsblocks(mp, per_intent, step_size,
1487 resv->tr_logres, ret);
1488
1489 return ret;
1490}
1491
1492/*
1493 * Compute the log blocks and transaction reservation needed to complete an
1494 * atomic write of a given number of blocks. Worst case, each block requires
1495 * separate handling. A return value of 0 means something went wrong.
1496 */
1497xfs_extlen_t
1498xfs_calc_atomic_write_log_geometry(
1499 struct xfs_mount *mp,
1500 xfs_extlen_t blockcount,
1501 unsigned int *new_logres)
1502{
1503 struct xfs_trans_res *curr_res = &M_RES(mp)->tr_atomic_ioend;
1504 uint old_logres = curr_res->tr_logres;
1505 unsigned int per_intent, step_size;
1506 unsigned int logres;
1507 xfs_extlen_t min_logblocks;
1508
1509 ASSERT(blockcount > 0);
1510
1511 xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
1512
1513 per_intent = xfs_calc_atomic_write_ioend_geometry(mp, &step_size);
1514
1515 /* Check for overflows */
1516 if (check_mul_overflow(blockcount, per_intent, &logres) ||
1517 check_add_overflow(logres, step_size, &logres))
1518 return 0;
1519
1520 curr_res->tr_logres = logres;
1521 min_logblocks = xfs_log_calc_minimum_size(mp);
1522 curr_res->tr_logres = old_logres;
1523
1524 trace_xfs_calc_max_atomic_write_log_geometry(mp, per_intent, step_size,
1525 blockcount, min_logblocks, logres);
1526
1527 *new_logres = logres;
1528 return min_logblocks;
1529}
1530
1531/*
1532 * Compute the transaction reservation needed to complete an out of place
1533 * atomic write of a given number of blocks.
1534 */
1535int
1536xfs_calc_atomic_write_reservation(
1537 struct xfs_mount *mp,
1538 xfs_extlen_t blockcount)
1539{
1540 unsigned int new_logres;
1541 xfs_extlen_t min_logblocks;
1542
1543 /*
1544 * If the caller doesn't ask for a specific atomic write size, then
1545 * use the defaults.
1546 */
1547 if (blockcount == 0) {
1548 xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
1549 return 0;
1550 }
1551
1552 min_logblocks = xfs_calc_atomic_write_log_geometry(mp, blockcount,
1553 &new_logres);
1554 if (!min_logblocks || min_logblocks > mp->m_sb.sb_logblocks)
1555 return -EINVAL;
1556
1557 M_RES(mp)->tr_atomic_ioend.tr_logres = new_logres;
1558 return 0;
1559}