fs/xfs/libxfs/xfs_trans_resv.c at master

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / fs / xfs / libxfs / xfs_trans_resv.c
at master 1559 lines 49 kB view raw
wrap content
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
   4 * Copyright (C) 2010 Red Hat, Inc.
   5 * All Rights Reserved.
   6 */
   7#include "xfs_platform.h"
   8#include "xfs_fs.h"
   9#include "xfs_shared.h"
  10#include "xfs_format.h"
  11#include "xfs_log_format.h"
  12#include "xfs_trans_resv.h"
  13#include "xfs_mount.h"
  14#include "xfs_da_format.h"
  15#include "xfs_da_btree.h"
  16#include "xfs_inode.h"
  17#include "xfs_bmap_btree.h"
  18#include "xfs_quota.h"
  19#include "xfs_trans.h"
  20#include "xfs_qm.h"
  21#include "xfs_trans_space.h"
  22#include "xfs_rtbitmap.h"
  23#include "xfs_attr_item.h"
  24#include "xfs_log.h"
  25#include "xfs_defer.h"
  26#include "xfs_bmap_item.h"
  27#include "xfs_extfree_item.h"
  28#include "xfs_rmap_item.h"
  29#include "xfs_refcount_item.h"
  30#include "xfs_trace.h"
  31
  32#define _ALLOC	true
  33#define _FREE	false
  34
  35/*
  36 * A buffer has a format structure overhead in the log in addition
  37 * to the data, so we need to take this into account when reserving
  38 * space in a transaction for a buffer.  Round the space required up
  39 * to a multiple of 128 bytes so that we don't change the historical
  40 * reservation that has been used for this overhead.
  41 */
  42STATIC uint
  43xfs_buf_log_overhead(void)
  44{
  45	return round_up(sizeof(struct xlog_op_header) +
  46			sizeof(struct xfs_buf_log_format), 128);
  47}
  48
  49/*
  50 * Calculate out transaction log reservation per item in bytes.
  51 *
  52 * The nbufs argument is used to indicate the number of items that
  53 * will be changed in a transaction.  size is used to tell how many
  54 * bytes should be reserved per item.
  55 */
  56STATIC uint
  57xfs_calc_buf_res(
  58	uint		nbufs,
  59	uint		size)
  60{
  61	return nbufs * (size + xfs_buf_log_overhead());
  62}
  63
  64/*
  65 * Per-extent log reservation for the btree changes involved in freeing or
  66 * allocating an extent.  In classic XFS there were two trees that will be
  67 * modified (bnobt + cntbt).  With rmap enabled, there are three trees
  68 * (rmapbt).  The number of blocks reserved is based on the formula:
  69 *
  70 * num trees * ((2 blocks/level * max depth) - 1)
  71 *
  72 * Keep in mind that max depth is calculated separately for each type of tree.
  73 */
  74uint
  75xfs_allocfree_block_count(
  76	struct xfs_mount *mp,
  77	uint		num_ops)
  78{
  79	uint		blocks;
  80
  81	blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1);
  82	if (xfs_has_rmapbt(mp))
  83		blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
  84
  85	return blocks;
  86}
  87
  88/*
  89 * Per-extent log reservation for refcount btree changes.  These are never done
  90 * in the same transaction as an allocation or a free, so we compute them
  91 * separately.
  92 */
  93static unsigned int
  94xfs_refcountbt_block_count(
  95	struct xfs_mount	*mp,
  96	unsigned int		num_ops)
  97{
  98	return num_ops * (2 * mp->m_refc_maxlevels - 1);
  99}
 100
 101static unsigned int
 102xfs_rtrefcountbt_block_count(
 103	struct xfs_mount	*mp,
 104	unsigned int		num_ops)
 105{
 106	return num_ops * (2 * mp->m_rtrefc_maxlevels - 1);
 107}
 108
 109/*
 110 * Logging inodes is really tricksy. They are logged in memory format,
 111 * which means that what we write into the log doesn't directly translate into
 112 * the amount of space they use on disk.
 113 *
 114 * Case in point - btree format forks in memory format use more space than the
 115 * on-disk format. In memory, the buffer contains a normal btree block header so
 116 * the btree code can treat it as though it is just another generic buffer.
 117 * However, when we write it to the inode fork, we don't write all of this
 118 * header as it isn't needed. e.g. the root is only ever in the inode, so
 119 * there's no need for sibling pointers which would waste 16 bytes of space.
 120 *
 121 * Hence when we have an inode with a maximally sized btree format fork, then
 122 * amount of information we actually log is greater than the size of the inode
 123 * on disk. Hence we need an inode reservation function that calculates all this
 124 * correctly. So, we log:
 125 *
 126 * - 4 log op headers for object
 127 *	- for the ilf, the inode core and 2 forks
 128 * - inode log format object
 129 * - the inode core
 130 * - two inode forks containing bmap btree root blocks.
 131 *	- the btree data contained by both forks will fit into the inode size,
 132 *	  hence when combined with the inode core above, we have a total of the
 133 *	  actual inode size.
 134 *	- the BMBT headers need to be accounted separately, as they are
 135 *	  additional to the records and pointers that fit inside the inode
 136 *	  forks.
 137 */
 138STATIC uint
 139xfs_calc_inode_res(
 140	struct xfs_mount	*mp,
 141	uint			ninodes)
 142{
 143	return ninodes *
 144		(4 * sizeof(struct xlog_op_header) +
 145		 sizeof(struct xfs_inode_log_format) +
 146		 mp->m_sb.sb_inodesize +
 147		 2 * xfs_bmbt_block_len(mp));
 148}
 149
 150/*
 151 * Inode btree record insertion/removal modifies the inode btree and free space
 152 * btrees (since the inobt does not use the agfl). This requires the following
 153 * reservation:
 154 *
 155 * the inode btree: max depth * blocksize
 156 * the allocation btrees: 2 trees * (max depth - 1) * block size
 157 *
 158 * The caller must account for SB and AG header modifications, etc.
 159 */
 160STATIC uint
 161xfs_calc_inobt_res(
 162	struct xfs_mount	*mp)
 163{
 164	return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels,
 165			XFS_FSB_TO_B(mp, 1)) +
 166				xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
 167			XFS_FSB_TO_B(mp, 1));
 168}
 169
 170/*
 171 * The free inode btree is a conditional feature. The behavior differs slightly
 172 * from that of the traditional inode btree in that the finobt tracks records
 173 * for inode chunks with at least one free inode. A record can be removed from
 174 * the tree during individual inode allocation. Therefore the finobt
 175 * reservation is unconditional for both the inode chunk allocation and
 176 * individual inode allocation (modify) cases.
 177 *
 178 * Behavior aside, the reservation for finobt modification is equivalent to the
 179 * traditional inobt: cover a full finobt shape change plus block allocation.
 180 */
 181STATIC uint
 182xfs_calc_finobt_res(
 183	struct xfs_mount	*mp)
 184{
 185	if (!xfs_has_finobt(mp))
 186		return 0;
 187
 188	return xfs_calc_inobt_res(mp);
 189}
 190
 191/*
 192 * Calculate the reservation required to allocate or free an inode chunk. This
 193 * includes:
 194 *
 195 * the allocation btrees: 2 trees * (max depth - 1) * block size
 196 * the inode chunk: m_ino_geo.ialloc_blks * N
 197 *
 198 * The size N of the inode chunk reservation depends on whether it is for
 199 * allocation or free and which type of create transaction is in use. An inode
 200 * chunk free always invalidates the buffers and only requires reservation for
 201 * headers (N == 0). An inode chunk allocation requires a chunk sized
 202 * reservation on v4 and older superblocks to initialize the chunk. No chunk
 203 * reservation is required for allocation on v5 supers, which use ordered
 204 * buffers to initialize.
 205 */
 206STATIC uint
 207xfs_calc_inode_chunk_res(
 208	struct xfs_mount	*mp,
 209	bool			alloc)
 210{
 211	uint			res, size = 0;
 212
 213	res = xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
 214			       XFS_FSB_TO_B(mp, 1));
 215	if (alloc) {
 216		/* icreate tx uses ordered buffers */
 217		if (xfs_has_v3inodes(mp))
 218			return res;
 219		size = XFS_FSB_TO_B(mp, 1);
 220	}
 221
 222	res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size);
 223	return res;
 224}
 225
 226/*
 227 * Per-extent log reservation for the btree changes involved in freeing or
 228 * allocating a realtime extent.  We have to be able to log as many rtbitmap
 229 * blocks as needed to mark inuse XFS_BMBT_MAX_EXTLEN blocks' worth of realtime
 230 * extents, as well as the realtime summary block (t1).  Realtime rmap btree
 231 * operations happen in a second transaction, so factor in a couple of rtrmapbt
 232 * splits (t2).
 233 */
 234static unsigned int
 235xfs_rtalloc_block_count(
 236	struct xfs_mount	*mp,
 237	unsigned int		num_ops)
 238{
 239	unsigned int		rtbmp_blocks;
 240	xfs_rtxlen_t		rtxlen;
 241	unsigned int		t1, t2 = 0;
 242
 243	rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN);
 244	rtbmp_blocks = xfs_rtbitmap_blockcount_len(mp, rtxlen);
 245	t1 = (rtbmp_blocks + 1) * num_ops;
 246
 247	if (xfs_has_rmapbt(mp))
 248		t2 = num_ops * (2 * mp->m_rtrmap_maxlevels - 1);
 249
 250	return max(t1, t2);
 251}
 252
 253/*
 254 * Various log reservation values.
 255 *
 256 * These are based on the size of the file system block because that is what
 257 * most transactions manipulate.  Each adds in an additional 128 bytes per
 258 * item logged to try to account for the overhead of the transaction mechanism.
 259 *
 260 * Note:  Most of the reservations underestimate the number of allocation
 261 * groups into which they could free extents in the xfs_defer_finish() call.
 262 * This is because the number in the worst case is quite high and quite
 263 * unusual.  In order to fix this we need to change xfs_defer_finish() to free
 264 * extents in only a single AG at a time.  This will require changes to the
 265 * EFI code as well, however, so that the EFI for the extents not freed is
 266 * logged again in each transaction.  See SGI PV #261917.
 267 *
 268 * Reservation functions here avoid a huge stack in xfs_trans_init due to
 269 * register overflow from temporaries in the calculations.
 270 */
 271
 272/*
 273 * Finishing a data device refcount updates (t1):
 274 *    the agfs of the ags containing the blocks: nr_ops * sector size
 275 *    the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
 276 */
 277inline unsigned int
 278xfs_calc_finish_cui_reservation(
 279	struct xfs_mount	*mp,
 280	unsigned int		nr_ops)
 281{
 282	if (!xfs_has_reflink(mp))
 283		return 0;
 284
 285	return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
 286	       xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops),
 287			       mp->m_sb.sb_blocksize);
 288}
 289
 290/*
 291 * Realtime refcount updates (t2);
 292 *    the rt refcount inode
 293 *    the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
 294 */
 295inline unsigned int
 296xfs_calc_finish_rt_cui_reservation(
 297	struct xfs_mount	*mp,
 298	unsigned int		nr_ops)
 299{
 300	if (!xfs_has_rtreflink(mp))
 301		return 0;
 302
 303	return xfs_calc_inode_res(mp, 1) +
 304	       xfs_calc_buf_res(xfs_rtrefcountbt_block_count(mp, nr_ops),
 305				     mp->m_sb.sb_blocksize);
 306}
 307
 308/*
 309 * Compute the log reservation required to handle the refcount update
 310 * transaction.  Refcount updates are always done via deferred log items.
 311 *
 312 * This is calculated as the max of:
 313 * Data device refcount updates (t1):
 314 *    the agfs of the ags containing the blocks: nr_ops * sector size
 315 *    the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
 316 * Realtime refcount updates (t2);
 317 *    the rt refcount inode
 318 *    the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
 319 */
 320static unsigned int
 321xfs_calc_refcountbt_reservation(
 322	struct xfs_mount	*mp,
 323	unsigned int		nr_ops)
 324{
 325	unsigned int		t1, t2;
 326
 327	t1 = xfs_calc_finish_cui_reservation(mp, nr_ops);
 328	t2 = xfs_calc_finish_rt_cui_reservation(mp, nr_ops);
 329
 330	return max(t1, t2);
 331}
 332
 333/*
 334 * In a write transaction we can allocate a maximum of 2
 335 * extents.  This gives (t1):
 336 *    the inode getting the new extents: inode size
 337 *    the inode's bmap btree: max depth * block size
 338 *    the agfs of the ags from which the extents are allocated: 2 * sector
 339 *    the superblock free block counter: sector size
 340 *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
 341 * Or, if we're writing to a realtime file (t2):
 342 *    the inode getting the new extents: inode size
 343 *    the inode's bmap btree: max depth * block size
 344 *    the agfs of the ags from which the extents are allocated: 2 * sector
 345 *    the superblock free block counter: sector size
 346 *    the realtime bitmap: ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
 347 *    the realtime summary: 1 block
 348 *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
 349 * And the bmap_finish transaction can free bmap blocks in a join (t3):
 350 *    the agfs of the ags containing the blocks: 2 * sector size
 351 *    the agfls of the ags containing the blocks: 2 * sector size
 352 *    the super block free block counter: sector size
 353 *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
 354 * And any refcount updates that happen in a separate transaction (t4).
 355 */
 356STATIC uint
 357xfs_calc_write_reservation(
 358	struct xfs_mount	*mp,
 359	bool			for_minlogsize)
 360{
 361	unsigned int		t1, t2, t3, t4;
 362	unsigned int		blksz = XFS_FSB_TO_B(mp, 1);
 363
 364	t1 = xfs_calc_inode_res(mp, 1) +
 365	     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
 366	     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
 367	     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
 368
 369	if (xfs_has_realtime(mp)) {
 370		t2 = xfs_calc_inode_res(mp, 1) +
 371		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
 372				     blksz) +
 373		     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
 374		     xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 1), blksz) +
 375		     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), blksz);
 376	} else {
 377		t2 = 0;
 378	}
 379
 380	t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
 381	     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
 382
 383	/*
 384	 * In the early days of reflink, we included enough reservation to log
 385	 * two refcountbt splits for each transaction.  The codebase runs
 386	 * refcountbt updates in separate transactions now, so to compute the
 387	 * minimum log size, add the refcountbtree splits back to t1 and t3 and
 388	 * do not account them separately as t4.  Reflink did not support
 389	 * realtime when the reservations were established, so no adjustment to
 390	 * t2 is needed.
 391	 */
 392	if (for_minlogsize) {
 393		unsigned int	adj = 0;
 394
 395		if (xfs_has_reflink(mp))
 396			adj = xfs_calc_buf_res(
 397					xfs_refcountbt_block_count(mp, 2),
 398					blksz);
 399		t1 += adj;
 400		t3 += adj;
 401		return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
 402	}
 403
 404	t4 = xfs_calc_refcountbt_reservation(mp, 1);
 405	return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
 406}
 407
 408unsigned int
 409xfs_calc_write_reservation_minlogsize(
 410	struct xfs_mount	*mp)
 411{
 412	return xfs_calc_write_reservation(mp, true);
 413}
 414
 415/*
 416 * Finishing an EFI can free the blocks and bmap blocks (t2):
 417 *    the agf for each of the ags: nr * sector size
 418 *    the agfl for each of the ags: nr * sector size
 419 *    the super block to reflect the freed blocks: sector size
 420 *    worst case split in allocation btrees per extent assuming nr extents:
 421 *		nr exts * 2 trees * (2 * max depth - 1) * block size
 422 */
 423inline unsigned int
 424xfs_calc_finish_efi_reservation(
 425	struct xfs_mount	*mp,
 426	unsigned int		nr)
 427{
 428	return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
 429	       xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
 430			       mp->m_sb.sb_blocksize);
 431}
 432
 433/*
 434 * Or, if it's a realtime file (t3):
 435 *    the agf for each of the ags: 2 * sector size
 436 *    the agfl for each of the ags: 2 * sector size
 437 *    the super block to reflect the freed blocks: sector size
 438 *    the realtime bitmap:
 439 *		2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
 440 *    the realtime summary: 2 exts * 1 block
 441 *    worst case split in allocation btrees per extent assuming 2 extents:
 442 *		2 exts * 2 trees * (2 * max depth - 1) * block size
 443 */
 444inline unsigned int
 445xfs_calc_finish_rt_efi_reservation(
 446	struct xfs_mount	*mp,
 447	unsigned int		nr)
 448{
 449	if (!xfs_has_realtime(mp))
 450		return 0;
 451
 452	return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
 453	       xfs_calc_buf_res(xfs_rtalloc_block_count(mp, nr),
 454			       mp->m_sb.sb_blocksize) +
 455	       xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
 456			       mp->m_sb.sb_blocksize);
 457}
 458
 459/*
 460 * Finishing an RUI is the same as an EFI.  We can split the rmap btree twice
 461 * on each end of the record, and that can cause the AGFL to be refilled or
 462 * emptied out.
 463 */
 464inline unsigned int
 465xfs_calc_finish_rui_reservation(
 466	struct xfs_mount	*mp,
 467	unsigned int		nr)
 468{
 469	if (!xfs_has_rmapbt(mp))
 470		return 0;
 471	return xfs_calc_finish_efi_reservation(mp, nr);
 472}
 473
 474/*
 475 * Finishing an RUI is the same as an EFI.  We can split the rmap btree twice
 476 * on each end of the record, and that can cause the AGFL to be refilled or
 477 * emptied out.
 478 */
 479inline unsigned int
 480xfs_calc_finish_rt_rui_reservation(
 481	struct xfs_mount	*mp,
 482	unsigned int		nr)
 483{
 484	if (!xfs_has_rtrmapbt(mp))
 485		return 0;
 486	return xfs_calc_finish_rt_efi_reservation(mp, nr);
 487}
 488
 489/*
 490 * In finishing a BUI, we can modify:
 491 *    the inode being truncated: inode size
 492 *    dquots
 493 *    the inode's bmap btree: (max depth + 1) * block size
 494 */
 495inline unsigned int
 496xfs_calc_finish_bui_reservation(
 497	struct xfs_mount	*mp,
 498	unsigned int		nr)
 499{
 500	return xfs_calc_inode_res(mp, 1) + XFS_DQUOT_LOGRES +
 501	       xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
 502			       mp->m_sb.sb_blocksize);
 503}
 504
 505/*
 506 * In truncating a file we free up to two extents at once.  We can modify (t1):
 507 *    the inode being truncated: inode size
 508 *    the inode's bmap btree: (max depth + 1) * block size
 509 * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
 510 *    the agf for each of the ags: 4 * sector size
 511 *    the agfl for each of the ags: 4 * sector size
 512 *    the super block to reflect the freed blocks: sector size
 513 *    worst case split in allocation btrees per extent assuming 4 extents:
 514 *		4 exts * 2 trees * (2 * max depth - 1) * block size
 515 * Or, if it's a realtime file (t3):
 516 *    the agf for each of the ags: 2 * sector size
 517 *    the agfl for each of the ags: 2 * sector size
 518 *    the super block to reflect the freed blocks: sector size
 519 *    the realtime bitmap:
 520 *		2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
 521 *    the realtime summary: 2 exts * 1 block
 522 *    worst case split in allocation btrees per extent assuming 2 extents:
 523 *		2 exts * 2 trees * (2 * max depth - 1) * block size
 524 * And any refcount updates that happen in a separate transaction (t4).
 525 */
 526STATIC uint
 527xfs_calc_itruncate_reservation(
 528	struct xfs_mount	*mp,
 529	bool			for_minlogsize)
 530{
 531	unsigned int		t1, t2, t3, t4;
 532	unsigned int		blksz = XFS_FSB_TO_B(mp, 1);
 533
 534	t1 = xfs_calc_inode_res(mp, 1) +
 535	     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
 536
 537	t2 = xfs_calc_finish_efi_reservation(mp, 4);
 538	t3 = xfs_calc_finish_rt_efi_reservation(mp, 2);
 539
 540	/*
 541	 * In the early days of reflink, we included enough reservation to log
 542	 * four refcountbt splits in the same transaction as bnobt/cntbt
 543	 * updates.  The codebase runs refcountbt updates in separate
 544	 * transactions now, so to compute the minimum log size, add the
 545	 * refcount btree splits back here and do not compute them separately
 546	 * as t4.  Reflink did not support realtime when the reservations were
 547	 * established, so do not adjust t3.
 548	 */
 549	if (for_minlogsize) {
 550		if (xfs_has_reflink(mp))
 551			t2 += xfs_calc_buf_res(
 552					xfs_refcountbt_block_count(mp, 4),
 553					blksz);
 554
 555		return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
 556	}
 557
 558	t4 = xfs_calc_refcountbt_reservation(mp, 2);
 559	return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
 560}
 561
 562unsigned int
 563xfs_calc_itruncate_reservation_minlogsize(
 564	struct xfs_mount	*mp)
 565{
 566	return xfs_calc_itruncate_reservation(mp, true);
 567}
 568
 569static inline unsigned int xfs_calc_pptr_link_overhead(void)
 570{
 571	return sizeof(struct xfs_attri_log_format) +
 572			xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
 573			xlog_calc_iovec_len(MAXNAMELEN - 1);
 574}
 575static inline unsigned int xfs_calc_pptr_unlink_overhead(void)
 576{
 577	return sizeof(struct xfs_attri_log_format) +
 578			xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
 579			xlog_calc_iovec_len(MAXNAMELEN - 1);
 580}
 581static inline unsigned int xfs_calc_pptr_replace_overhead(void)
 582{
 583	return sizeof(struct xfs_attri_log_format) +
 584			xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
 585			xlog_calc_iovec_len(MAXNAMELEN - 1) +
 586			xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
 587			xlog_calc_iovec_len(MAXNAMELEN - 1);
 588}
 589
 590/*
 591 * In renaming a files we can modify:
 592 *    the five inodes involved: 5 * inode size
 593 *    the two directory btrees: 2 * (max depth + v2) * dir block size
 594 *    the two directory bmap btrees: 2 * max depth * block size
 595 * And the bmap_finish transaction can free dir and bmap blocks (two sets
 596 *	of bmap blocks) giving (t2):
 597 *    the agf for the ags in which the blocks live: 3 * sector size
 598 *    the agfl for the ags in which the blocks live: 3 * sector size
 599 *    the superblock for the free block count: sector size
 600 *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
 601 * If parent pointers are enabled (t3), then each transaction in the chain
 602 *    must be capable of setting or removing the extended attribute
 603 *    containing the parent information.  It must also be able to handle
 604 *    the three xattr intent items that track the progress of the parent
 605 *    pointer update.
 606 */
 607STATIC uint
 608xfs_calc_rename_reservation(
 609	struct xfs_mount	*mp)
 610{
 611	unsigned int		overhead = XFS_DQUOT_LOGRES;
 612	struct xfs_trans_resv	*resp = M_RES(mp);
 613	unsigned int		t1, t2, t3 = 0;
 614
 615	t1 = xfs_calc_inode_res(mp, 5) +
 616	     xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
 617			XFS_FSB_TO_B(mp, 1));
 618
 619	t2 = xfs_calc_finish_efi_reservation(mp, 3);
 620
 621	if (xfs_has_parent(mp)) {
 622		unsigned int	rename_overhead, exchange_overhead;
 623
 624		t3 = max(resp->tr_attrsetm.tr_logres,
 625			 resp->tr_attrrm.tr_logres);
 626
 627		/*
 628		 * For a standard rename, the three xattr intent log items
 629		 * are (1) replacing the pptr for the source file; (2)
 630		 * removing the pptr on the dest file; and (3) adding a
 631		 * pptr for the whiteout file in the src dir.
 632		 *
 633		 * For an RENAME_EXCHANGE, there are two xattr intent
 634		 * items to replace the pptr for both src and dest
 635		 * files.  Link counts don't change and there is no
 636		 * whiteout.
 637		 *
 638		 * In the worst case we can end up relogging all log
 639		 * intent items to allow the log tail to move ahead, so
 640		 * they become overhead added to each transaction in a
 641		 * processing chain.
 642		 */
 643		rename_overhead = xfs_calc_pptr_replace_overhead() +
 644				  xfs_calc_pptr_unlink_overhead() +
 645				  xfs_calc_pptr_link_overhead();
 646		exchange_overhead = 2 * xfs_calc_pptr_replace_overhead();
 647
 648		overhead += max(rename_overhead, exchange_overhead);
 649	}
 650
 651	return overhead + max3(t1, t2, t3);
 652}
 653
 654static inline unsigned int
 655xfs_rename_log_count(
 656	struct xfs_mount	*mp,
 657	struct xfs_trans_resv	*resp)
 658{
 659	/* One for the rename, one more for freeing blocks */
 660	unsigned int		ret = XFS_RENAME_LOG_COUNT;
 661
 662	/*
 663	 * Pre-reserve enough log reservation to handle the transaction
 664	 * rolling needed to remove or add one parent pointer.
 665	 */
 666	if (xfs_has_parent(mp))
 667		ret += max(resp->tr_attrsetm.tr_logcount,
 668			   resp->tr_attrrm.tr_logcount);
 669
 670	return ret;
 671}
 672
 673/*
 674 * For removing an inode from unlinked list at first, we can modify:
 675 *    the agi hash list and counters: sector size
 676 *    the on disk inode before ours in the agi hash list: inode cluster size
 677 *    the on disk inode in the agi hash list: inode cluster size
 678 */
 679STATIC uint
 680xfs_calc_iunlink_remove_reservation(
 681	struct xfs_mount        *mp)
 682{
 683	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
 684	       2 * M_IGEO(mp)->inode_cluster_size;
 685}
 686
 687static inline unsigned int
 688xfs_link_log_count(
 689	struct xfs_mount	*mp,
 690	struct xfs_trans_resv	*resp)
 691{
 692	unsigned int		ret = XFS_LINK_LOG_COUNT;
 693
 694	/*
 695	 * Pre-reserve enough log reservation to handle the transaction
 696	 * rolling needed to add one parent pointer.
 697	 */
 698	if (xfs_has_parent(mp))
 699		ret += resp->tr_attrsetm.tr_logcount;
 700
 701	return ret;
 702}
 703
 704/*
 705 * For creating a link to an inode:
 706 *    the parent directory inode: inode size
 707 *    the linked inode: inode size
 708 *    the directory btree could split: (max depth + v2) * dir block size
 709 *    the directory bmap btree could join or split: (max depth + v2) * blocksize
 710 * And the bmap_finish transaction can free some bmap blocks giving:
 711 *    the agf for the ag in which the blocks live: sector size
 712 *    the agfl for the ag in which the blocks live: sector size
 713 *    the superblock for the free block count: sector size
 714 *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
 715 */
 716STATIC uint
 717xfs_calc_link_reservation(
 718	struct xfs_mount	*mp)
 719{
 720	unsigned int		overhead = XFS_DQUOT_LOGRES;
 721	struct xfs_trans_resv	*resp = M_RES(mp);
 722	unsigned int		t1, t2, t3 = 0;
 723
 724	overhead += xfs_calc_iunlink_remove_reservation(mp);
 725	t1 = xfs_calc_inode_res(mp, 2) +
 726	     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
 727	t2 = xfs_calc_finish_efi_reservation(mp, 1);
 728
 729	if (xfs_has_parent(mp)) {
 730		t3 = resp->tr_attrsetm.tr_logres;
 731		overhead += xfs_calc_pptr_link_overhead();
 732	}
 733
 734	return overhead + max3(t1, t2, t3);
 735}
 736
 737/*
 738 * For adding an inode to unlinked list we can modify:
 739 *    the agi hash list: sector size
 740 *    the on disk inode: inode cluster size
 741 */
 742STATIC uint
 743xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
 744{
 745	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
 746			M_IGEO(mp)->inode_cluster_size;
 747}
 748
 749static inline unsigned int
 750xfs_remove_log_count(
 751	struct xfs_mount	*mp,
 752	struct xfs_trans_resv	*resp)
 753{
 754	unsigned int		ret = XFS_REMOVE_LOG_COUNT;
 755
 756	/*
 757	 * Pre-reserve enough log reservation to handle the transaction
 758	 * rolling needed to add one parent pointer.
 759	 */
 760	if (xfs_has_parent(mp))
 761		ret += resp->tr_attrrm.tr_logcount;
 762
 763	return ret;
 764}
 765
 766/*
 767 * For removing a directory entry we can modify:
 768 *    the parent directory inode: inode size
 769 *    the removed inode: inode size
 770 *    the directory btree could join: (max depth + v2) * dir block size
 771 *    the directory bmap btree could join or split: (max depth + v2) * blocksize
 772 * And the bmap_finish transaction can free the dir and bmap blocks giving:
 773 *    the agf for the ag in which the blocks live: 2 * sector size
 774 *    the agfl for the ag in which the blocks live: 2 * sector size
 775 *    the superblock for the free block count: sector size
 776 *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
 777 */
 778STATIC uint
 779xfs_calc_remove_reservation(
 780	struct xfs_mount	*mp)
 781{
 782	unsigned int            overhead = XFS_DQUOT_LOGRES;
 783	struct xfs_trans_resv   *resp = M_RES(mp);
 784	unsigned int            t1, t2, t3 = 0;
 785
 786	overhead += xfs_calc_iunlink_add_reservation(mp);
 787
 788	t1 = xfs_calc_inode_res(mp, 2) +
 789	     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
 790	t2 = xfs_calc_finish_efi_reservation(mp, 2);
 791
 792	if (xfs_has_parent(mp)) {
 793		t3 = resp->tr_attrrm.tr_logres;
 794		overhead += xfs_calc_pptr_unlink_overhead();
 795	}
 796
 797	return overhead + max3(t1, t2, t3);
 798}
 799
 800/*
 801 * For create, break it in to the two cases that the transaction
 802 * covers. We start with the modify case - allocation done by modification
 803 * of the state of existing inodes - and the allocation case.
 804 */
 805
 806/*
 807 * For create we can modify:
 808 *    the parent directory inode: inode size
 809 *    the new inode: inode size
 810 *    the inode btree entry: block size
 811 *    the superblock for the nlink flag: sector size
 812 *    the directory btree: (max depth + v2) * dir block size
 813 *    the directory inode's bmap btree: (max depth + v2) * block size
 814 *    the finobt (record modification and allocation btrees)
 815 */
 816STATIC uint
 817xfs_calc_create_resv_modify(
 818	struct xfs_mount	*mp)
 819{
 820	return xfs_calc_inode_res(mp, 2) +
 821		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
 822		(uint)XFS_FSB_TO_B(mp, 1) +
 823		xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
 824		xfs_calc_finobt_res(mp);
 825}
 826
 827/*
 828 * For icreate we can allocate some inodes giving:
 829 *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
 830 *    the superblock for the nlink flag: sector size
 831 *    the inode chunk (allocation, optional init)
 832 *    the inobt (record insertion)
 833 *    the finobt (optional, record insertion)
 834 */
 835STATIC uint
 836xfs_calc_icreate_resv_alloc(
 837	struct xfs_mount	*mp)
 838{
 839	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
 840		mp->m_sb.sb_sectsize +
 841		xfs_calc_inode_chunk_res(mp, _ALLOC) +
 842		xfs_calc_inobt_res(mp) +
 843		xfs_calc_finobt_res(mp);
 844}
 845
 846static inline unsigned int
 847xfs_icreate_log_count(
 848	struct xfs_mount	*mp,
 849	struct xfs_trans_resv	*resp)
 850{
 851	unsigned int		ret = XFS_CREATE_LOG_COUNT;
 852
 853	/*
 854	 * Pre-reserve enough log reservation to handle the transaction
 855	 * rolling needed to add one parent pointer.
 856	 */
 857	if (xfs_has_parent(mp))
 858		ret += resp->tr_attrsetm.tr_logcount;
 859
 860	return ret;
 861}
 862
 863STATIC uint
 864xfs_calc_icreate_reservation(
 865	struct xfs_mount	*mp)
 866{
 867	struct xfs_trans_resv	*resp = M_RES(mp);
 868	unsigned int		overhead = XFS_DQUOT_LOGRES;
 869	unsigned int		t1, t2, t3 = 0;
 870
 871	t1 = xfs_calc_icreate_resv_alloc(mp);
 872	t2 = xfs_calc_create_resv_modify(mp);
 873
 874	if (xfs_has_parent(mp)) {
 875		t3 = resp->tr_attrsetm.tr_logres;
 876		overhead += xfs_calc_pptr_link_overhead();
 877	}
 878
 879	return overhead + max3(t1, t2, t3);
 880}
 881
 882STATIC uint
 883xfs_calc_create_tmpfile_reservation(
 884	struct xfs_mount        *mp)
 885{
 886	uint	res = XFS_DQUOT_LOGRES;
 887
 888	res += xfs_calc_icreate_resv_alloc(mp);
 889	return res + xfs_calc_iunlink_add_reservation(mp);
 890}
 891
 892static inline unsigned int
 893xfs_mkdir_log_count(
 894	struct xfs_mount	*mp,
 895	struct xfs_trans_resv	*resp)
 896{
 897	unsigned int		ret = XFS_MKDIR_LOG_COUNT;
 898
 899	/*
 900	 * Pre-reserve enough log reservation to handle the transaction
 901	 * rolling needed to add one parent pointer.
 902	 */
 903	if (xfs_has_parent(mp))
 904		ret += resp->tr_attrsetm.tr_logcount;
 905
 906	return ret;
 907}
 908
 909/*
 910 * Making a new directory is the same as creating a new file.
 911 */
 912STATIC uint
 913xfs_calc_mkdir_reservation(
 914	struct xfs_mount	*mp)
 915{
 916	return xfs_calc_icreate_reservation(mp);
 917}
 918
 919static inline unsigned int
 920xfs_symlink_log_count(
 921	struct xfs_mount	*mp,
 922	struct xfs_trans_resv	*resp)
 923{
 924	unsigned int		ret = XFS_SYMLINK_LOG_COUNT;
 925
 926	/*
 927	 * Pre-reserve enough log reservation to handle the transaction
 928	 * rolling needed to add one parent pointer.
 929	 */
 930	if (xfs_has_parent(mp))
 931		ret += resp->tr_attrsetm.tr_logcount;
 932
 933	return ret;
 934}
 935
 936/*
 937 * Making a new symplink is the same as creating a new file, but
 938 * with the added blocks for remote symlink data which can be up to 1kB in
 939 * length (XFS_SYMLINK_MAXLEN).
 940 */
 941STATIC uint
 942xfs_calc_symlink_reservation(
 943	struct xfs_mount	*mp)
 944{
 945	return xfs_calc_icreate_reservation(mp) +
 946	       xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
 947}
 948
 949/*
 950 * In freeing an inode we can modify:
 951 *    the inode being freed: inode size
 952 *    the super block free inode counter, AGF and AGFL: sector size
 953 *    the on disk inode (agi unlinked list removal)
 954 *    the inode chunk (invalidated, headers only)
 955 *    the inode btree
 956 *    the finobt (record insertion, removal or modification)
 957 *
 958 * Note that the inode chunk res. includes an allocfree res. for freeing of the
 959 * inode chunk. This is technically extraneous because the inode chunk free is
 960 * deferred (it occurs after a transaction roll). Include the extra reservation
 961 * anyways since we've had reports of ifree transaction overruns due to too many
 962 * agfl fixups during inode chunk frees.
 963 */
 964STATIC uint
 965xfs_calc_ifree_reservation(
 966	struct xfs_mount	*mp)
 967{
 968	return XFS_DQUOT_LOGRES +
 969		xfs_calc_inode_res(mp, 1) +
 970		xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
 971		xfs_calc_iunlink_remove_reservation(mp) +
 972		xfs_calc_inode_chunk_res(mp, _FREE) +
 973		xfs_calc_inobt_res(mp) +
 974		xfs_calc_finobt_res(mp);
 975}
 976
 977/*
 978 * When only changing the inode we log the inode and possibly the superblock
 979 * We also add a bit of slop for the transaction stuff.
 980 */
 981STATIC uint
 982xfs_calc_ichange_reservation(
 983	struct xfs_mount	*mp)
 984{
 985	return XFS_DQUOT_LOGRES +
 986		xfs_calc_inode_res(mp, 1) +
 987		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
 988
 989}
 990
 991/*
 992 * Growing the data section of the filesystem.
 993 *	superblock
 994 *	agi and agf
 995 *	allocation btrees
 996 */
 997STATIC uint
 998xfs_calc_growdata_reservation(
 999	struct xfs_mount	*mp)
1000{
1001	return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
1002		xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
1003				 XFS_FSB_TO_B(mp, 1));
1004}
1005
1006/*
1007 * Growing the rt section of the filesystem.
1008 * In the first set of transactions (ALLOC) we allocate space to the
1009 * bitmap or summary files.
1010 *	superblock: sector size
1011 *	agf of the ag from which the extent is allocated: sector size
1012 *	bmap btree for bitmap/summary inode: max depth * blocksize
1013 *	bitmap/summary inode: inode size
1014 *	allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
1015 */
1016STATIC uint
1017xfs_calc_growrtalloc_reservation(
1018	struct xfs_mount	*mp)
1019{
1020	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
1021		xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
1022				 XFS_FSB_TO_B(mp, 1)) +
1023		xfs_calc_inode_res(mp, 1) +
1024		xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
1025				 XFS_FSB_TO_B(mp, 1));
1026}
1027
1028/*
1029 * Growing the rt section of the filesystem.
1030 * In the second set of transactions (ZERO) we zero the new metadata blocks.
1031 *	one bitmap/summary block: blocksize
1032 */
1033STATIC uint
1034xfs_calc_growrtzero_reservation(
1035	struct xfs_mount	*mp)
1036{
1037	return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
1038}
1039
1040/*
1041 * Growing the rt section of the filesystem.
1042 * In the third set of transactions (FREE) we update metadata without
1043 * allocating any new blocks.
1044 *	superblock: sector size
1045 *	bitmap inode: inode size
1046 *	summary inode: inode size
1047 *	one bitmap block: blocksize
1048 *	summary blocks: new summary size
1049 */
1050STATIC uint
1051xfs_calc_growrtfree_reservation(
1052	struct xfs_mount	*mp)
1053{
1054	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
1055		xfs_calc_inode_res(mp, 2) +
1056		xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
1057		xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, mp->m_rsumblocks));
1058}
1059
1060/*
1061 * Logging the inode modification timestamp on a synchronous write.
1062 *	inode
1063 */
1064STATIC uint
1065xfs_calc_swrite_reservation(
1066	struct xfs_mount	*mp)
1067{
1068	return xfs_calc_inode_res(mp, 1);
1069}
1070
1071/*
1072 * Logging the inode mode bits when writing a setuid/setgid file
1073 *	inode
1074 */
1075STATIC uint
1076xfs_calc_writeid_reservation(
1077	struct xfs_mount	*mp)
1078{
1079	return xfs_calc_inode_res(mp, 1);
1080}
1081
1082/*
1083 * Converting the inode from non-attributed to attributed.
1084 *	the inode being converted: inode size
1085 *	agf block and superblock (for block allocation)
1086 *	the new block (directory sized)
1087 *	bmap blocks for the new directory block
1088 *	allocation btrees
1089 */
1090STATIC uint
1091xfs_calc_addafork_reservation(
1092	struct xfs_mount	*mp)
1093{
1094	return XFS_DQUOT_LOGRES +
1095		xfs_calc_inode_res(mp, 1) +
1096		xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
1097		xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
1098		xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
1099				 XFS_FSB_TO_B(mp, 1)) +
1100		xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
1101				 XFS_FSB_TO_B(mp, 1));
1102}
1103
1104/*
1105 * Removing the attribute fork of a file
1106 *    the inode being truncated: inode size
1107 *    the inode's bmap btree: max depth * block size
1108 * And the bmap_finish transaction can free the blocks and bmap blocks:
1109 *    the agf for each of the ags: 4 * sector size
1110 *    the agfl for each of the ags: 4 * sector size
1111 *    the super block to reflect the freed blocks: sector size
1112 *    worst case split in allocation btrees per extent assuming 4 extents:
1113 *		4 exts * 2 trees * (2 * max depth - 1) * block size
1114 */
1115STATIC uint
1116xfs_calc_attrinval_reservation(
1117	struct xfs_mount	*mp)
1118{
1119	return max((xfs_calc_inode_res(mp, 1) +
1120		    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
1121				     XFS_FSB_TO_B(mp, 1))),
1122		   (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
1123		    xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4),
1124				     XFS_FSB_TO_B(mp, 1))));
1125}
1126
1127/*
1128 * Setting an attribute at mount time.
1129 *	the inode getting the attribute
1130 *	the superblock for allocations
1131 *	the agfs extents are allocated from
1132 *	the attribute btree * max depth
1133 *	the inode allocation btree
1134 * Since attribute transaction space is dependent on the size of the attribute,
1135 * the calculation is done partially at mount time and partially at runtime(see
1136 * below).
1137 */
1138STATIC uint
1139xfs_calc_attrsetm_reservation(
1140	struct xfs_mount	*mp)
1141{
1142	return XFS_DQUOT_LOGRES +
1143		xfs_calc_inode_res(mp, 1) +
1144		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
1145		xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
1146}
1147
1148/*
1149 * Setting an attribute at runtime, transaction space unit per block.
1150 * 	the superblock for allocations: sector size
1151 *	the inode bmap btree could join or split: max depth * block size
1152 * Since the runtime attribute transaction space is dependent on the total
1153 * blocks needed for the 1st bmap, here we calculate out the space unit for
1154 * one block so that the caller could figure out the total space according
1155 * to the attibute extent length in blocks by:
1156 *	ext * M_RES(mp)->tr_attrsetrt.tr_logres
1157 */
1158STATIC uint
1159xfs_calc_attrsetrt_reservation(
1160	struct xfs_mount	*mp)
1161{
1162	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
1163		xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
1164				 XFS_FSB_TO_B(mp, 1));
1165}
1166
1167/*
1168 * Removing an attribute.
1169 *    the inode: inode size
1170 *    the attribute btree could join: max depth * block size
1171 *    the inode bmap btree could join or split: max depth * block size
1172 * And the bmap_finish transaction can free the attr blocks freed giving:
1173 *    the agf for the ag in which the blocks live: 2 * sector size
1174 *    the agfl for the ag in which the blocks live: 2 * sector size
1175 *    the superblock for the free block count: sector size
1176 *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
1177 */
1178STATIC uint
1179xfs_calc_attrrm_reservation(
1180	struct xfs_mount	*mp)
1181{
1182	return XFS_DQUOT_LOGRES +
1183		max((xfs_calc_inode_res(mp, 1) +
1184		     xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
1185				      XFS_FSB_TO_B(mp, 1)) +
1186		     (uint)XFS_FSB_TO_B(mp,
1187					XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
1188		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
1189		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
1190		     xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
1191				      XFS_FSB_TO_B(mp, 1))));
1192}
1193
1194/*
1195 * Clearing a bad agino number in an agi hash bucket.
1196 */
1197STATIC uint
1198xfs_calc_clear_agi_bucket_reservation(
1199	struct xfs_mount	*mp)
1200{
1201	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
1202}
1203
1204/*
1205 * Adjusting quota limits.
1206 *    the disk quota buffer: sizeof(struct xfs_disk_dquot)
1207 */
1208STATIC uint
1209xfs_calc_qm_setqlim_reservation(void)
1210{
1211	return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
1212}
1213
1214/*
1215 * Allocating quota on disk if needed.
1216 *	the write transaction log space for quota file extent allocation
1217 *	the unit of quota allocation: one system block size
1218 */
1219STATIC uint
1220xfs_calc_qm_dqalloc_reservation(
1221	struct xfs_mount	*mp,
1222	bool			for_minlogsize)
1223{
1224	return xfs_calc_write_reservation(mp, for_minlogsize) +
1225		xfs_calc_buf_res(1,
1226			XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
1227}
1228
1229unsigned int
1230xfs_calc_qm_dqalloc_reservation_minlogsize(
1231	struct xfs_mount	*mp)
1232{
1233	return xfs_calc_qm_dqalloc_reservation(mp, true);
1234}
1235
1236/*
1237 * Syncing the incore super block changes to disk.
1238 *     the super block to reflect the changes: sector size
1239 */
1240STATIC uint
1241xfs_calc_sb_reservation(
1242	struct xfs_mount	*mp)
1243{
1244	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
1245}
1246
1247/*
1248 * Namespace reservations.
1249 *
1250 * These get tricky when parent pointers are enabled as we have attribute
1251 * modifications occurring from within these transactions. Rather than confuse
1252 * each of these reservation calculations with the conditional attribute
1253 * reservations, add them here in a clear and concise manner. This requires that
1254 * the attribute reservations have already been calculated.
1255 *
1256 * Note that we only include the static attribute reservation here; the runtime
1257 * reservation will have to be modified by the size of the attributes being
1258 * added/removed/modified. See the comments on the attribute reservation
1259 * calculations for more details.
1260 */
1261STATIC void
1262xfs_calc_namespace_reservations(
1263	struct xfs_mount	*mp,
1264	struct xfs_trans_resv	*resp)
1265{
1266	ASSERT(resp->tr_attrsetm.tr_logres > 0);
1267
1268	resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
1269	resp->tr_rename.tr_logcount = xfs_rename_log_count(mp, resp);
1270	resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1271
1272	resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
1273	resp->tr_link.tr_logcount = xfs_link_log_count(mp, resp);
1274	resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1275
1276	resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
1277	resp->tr_remove.tr_logcount = xfs_remove_log_count(mp, resp);
1278	resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1279
1280	resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
1281	resp->tr_symlink.tr_logcount = xfs_symlink_log_count(mp, resp);
1282	resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1283
1284	resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
1285	resp->tr_create.tr_logcount = xfs_icreate_log_count(mp, resp);
1286	resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1287
1288	resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
1289	resp->tr_mkdir.tr_logcount = xfs_mkdir_log_count(mp, resp);
1290	resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1291}
1292
1293STATIC void
1294xfs_calc_default_atomic_ioend_reservation(
1295	struct xfs_mount	*mp,
1296	struct xfs_trans_resv	*resp)
1297{
1298	/* Pick a default that will scale reasonably for the log size. */
1299	resp->tr_atomic_ioend = resp->tr_itruncate;
1300}
1301
1302void
1303xfs_trans_resv_calc(
1304	struct xfs_mount	*mp,
1305	struct xfs_trans_resv	*resp)
1306{
1307	int			logcount_adj = 0;
1308
1309	/*
1310	 * The following transactions are logged in physical format and
1311	 * require a permanent reservation on space.
1312	 */
1313	resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
1314	resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
1315	resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1316
1317	resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
1318	resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
1319	resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1320
1321	resp->tr_create_tmpfile.tr_logres =
1322			xfs_calc_create_tmpfile_reservation(mp);
1323	resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
1324	resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1325
1326	resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
1327	resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
1328	resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1329
1330	resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp);
1331	resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT;
1332	resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1333
1334	resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp);
1335	resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT;
1336	resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1337
1338	resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp);
1339	resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT;
1340	resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1341
1342	resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp);
1343	resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT;
1344	resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1345
1346	resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp);
1347	resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
1348	resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1349
1350	resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp,
1351			false);
1352	resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
1353	resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1354
1355	xfs_calc_namespace_reservations(mp, resp);
1356
1357	/*
1358	 * The following transactions are logged in logical format with
1359	 * a default log count.
1360	 */
1361	resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation();
1362	resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
1363
1364	resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp);
1365	resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT;
1366
1367	/* growdata requires permanent res; it can free space to the last AG */
1368	resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
1369	resp->tr_growdata.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
1370	resp->tr_growdata.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
1371
1372	/* The following transaction are logged in logical format */
1373	resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
1374	resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
1375	resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
1376	resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
1377	resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
1378	resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
1379	resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
1380
1381	/*
1382	 * Add one logcount for BUI items that appear with rmap or reflink,
1383	 * one logcount for refcount intent items, and one logcount for rmap
1384	 * intent items.
1385	 */
1386	if (xfs_has_reflink(mp) || xfs_has_rmapbt(mp))
1387		logcount_adj++;
1388	if (xfs_has_reflink(mp))
1389		logcount_adj++;
1390	if (xfs_has_rmapbt(mp))
1391		logcount_adj++;
1392
1393	resp->tr_itruncate.tr_logcount += logcount_adj;
1394	resp->tr_write.tr_logcount += logcount_adj;
1395	resp->tr_qm_dqalloc.tr_logcount += logcount_adj;
1396
1397	/*
1398	 * Now that we've finished computing the static reservations, we can
1399	 * compute the dynamic reservation for atomic writes.
1400	 */
1401	xfs_calc_default_atomic_ioend_reservation(mp, resp);
1402}
1403
1404/*
1405 * Return the per-extent and fixed transaction reservation sizes needed to
1406 * complete an atomic write.
1407 */
1408STATIC unsigned int
1409xfs_calc_atomic_write_ioend_geometry(
1410	struct xfs_mount	*mp,
1411	unsigned int		*step_size)
1412{
1413	const unsigned int	efi = xfs_efi_log_space(1);
1414	const unsigned int	efd = xfs_efd_log_space(1);
1415	const unsigned int	rui = xfs_rui_log_space(1);
1416	const unsigned int	rud = xfs_rud_log_space();
1417	const unsigned int	cui = xfs_cui_log_space(1);
1418	const unsigned int	cud = xfs_cud_log_space();
1419	const unsigned int	bui = xfs_bui_log_space(1);
1420	const unsigned int	bud = xfs_bud_log_space();
1421
1422	/*
1423	 * Maximum overhead to complete an atomic write ioend in software:
1424	 * remove data fork extent + remove cow fork extent + map extent into
1425	 * data fork.
1426	 *
1427	 * tx0: Creates a BUI and a CUI and that's all it needs.
1428	 *
1429	 * tx1: Roll to finish the BUI.  Need space for the BUD, an RUI, and
1430	 * enough space to relog the CUI (== CUI + CUD).
1431	 *
1432	 * tx2: Roll again to finish the RUI.  Need space for the RUD and space
1433	 * to relog the CUI.
1434	 *
1435	 * tx3: Roll again, need space for the CUD and possibly a new EFI.
1436	 *
1437	 * tx4: Roll again, need space for an EFD.
1438	 *
1439	 * If the extent referenced by the pair of BUI/CUI items is not the one
1440	 * being currently processed, then we need to reserve space to relog
1441	 * both items.
1442	 */
1443	const unsigned int	tx0 = bui + cui;
1444	const unsigned int	tx1 = bud + rui + cui + cud;
1445	const unsigned int	tx2 = rud + cui + cud;
1446	const unsigned int	tx3 = cud + efi;
1447	const unsigned int	tx4 = efd;
1448	const unsigned int	relog = bui + bud + cui + cud;
1449
1450	const unsigned int	per_intent = max(max3(tx0, tx1, tx2),
1451						 max3(tx3, tx4, relog));
1452
1453	/* Overhead to finish one step of each intent item type */
1454	const unsigned int	f1 = xfs_calc_finish_efi_reservation(mp, 1);
1455	const unsigned int	f2 = xfs_calc_finish_rui_reservation(mp, 1);
1456	const unsigned int	f3 = xfs_calc_finish_cui_reservation(mp, 1);
1457	const unsigned int	f4 = xfs_calc_finish_bui_reservation(mp, 1);
1458
1459	/* We only finish one item per transaction in a chain */
1460	*step_size = max(f4, max3(f1, f2, f3));
1461
1462	return per_intent;
1463}
1464
1465/*
1466 * Compute the maximum size (in fsblocks) of atomic writes that we can complete
1467 * given the existing log reservations.
1468 */
1469xfs_extlen_t
1470xfs_calc_max_atomic_write_fsblocks(
1471	struct xfs_mount		*mp)
1472{
1473	const struct xfs_trans_res	*resv = &M_RES(mp)->tr_atomic_ioend;
1474	unsigned int			per_intent = 0;
1475	unsigned int			step_size = 0;
1476	unsigned int			ret = 0;
1477
1478	if (resv->tr_logres > 0) {
1479		per_intent = xfs_calc_atomic_write_ioend_geometry(mp,
1480				&step_size);
1481
1482		if (resv->tr_logres >= step_size)
1483			ret = (resv->tr_logres - step_size) / per_intent;
1484	}
1485
1486	trace_xfs_calc_max_atomic_write_fsblocks(mp, per_intent, step_size,
1487			resv->tr_logres, ret);
1488
1489	return ret;
1490}
1491
1492/*
1493 * Compute the log blocks and transaction reservation needed to complete an
1494 * atomic write of a given number of blocks.  Worst case, each block requires
1495 * separate handling.  A return value of 0 means something went wrong.
1496 */
1497xfs_extlen_t
1498xfs_calc_atomic_write_log_geometry(
1499	struct xfs_mount	*mp,
1500	xfs_extlen_t		blockcount,
1501	unsigned int		*new_logres)
1502{
1503	struct xfs_trans_res	*curr_res = &M_RES(mp)->tr_atomic_ioend;
1504	uint			old_logres = curr_res->tr_logres;
1505	unsigned int		per_intent, step_size;
1506	unsigned int		logres;
1507	xfs_extlen_t		min_logblocks;
1508
1509	ASSERT(blockcount > 0);
1510
1511	xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
1512
1513	per_intent = xfs_calc_atomic_write_ioend_geometry(mp, &step_size);
1514
1515	/* Check for overflows */
1516	if (check_mul_overflow(blockcount, per_intent, &logres) ||
1517	    check_add_overflow(logres, step_size, &logres))
1518		return 0;
1519
1520	curr_res->tr_logres = logres;
1521	min_logblocks = xfs_log_calc_minimum_size(mp);
1522	curr_res->tr_logres = old_logres;
1523
1524	trace_xfs_calc_max_atomic_write_log_geometry(mp, per_intent, step_size,
1525			blockcount, min_logblocks, logres);
1526
1527	*new_logres = logres;
1528	return min_logblocks;
1529}
1530
1531/*
1532 * Compute the transaction reservation needed to complete an out of place
1533 * atomic write of a given number of blocks.
1534 */
1535int
1536xfs_calc_atomic_write_reservation(
1537	struct xfs_mount	*mp,
1538	xfs_extlen_t		blockcount)
1539{
1540	unsigned int		new_logres;
1541	xfs_extlen_t		min_logblocks;
1542
1543	/*
1544	 * If the caller doesn't ask for a specific atomic write size, then
1545	 * use the defaults.
1546	 */
1547	if (blockcount == 0) {
1548		xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
1549		return 0;
1550	}
1551
1552	min_logblocks = xfs_calc_atomic_write_log_geometry(mp, blockcount,
1553			&new_logres);
1554	if (!min_logblocks || min_logblocks > mp->m_sb.sb_logblocks)
1555		return -EINVAL;
1556
1557	M_RES(mp)->tr_atomic_ioend.tr_logres = new_logres;
1558	return 0;
1559}
Configure Feed

Configure Feed