// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. * Copyright (C) 2010 Red Hat, Inc. * All Rights Reserved.
*/ #include"xfs.h" #include"xfs_fs.h" #include"xfs_shared.h" #include"xfs_format.h" #include"xfs_log_format.h" #include"xfs_trans_resv.h" #include"xfs_mount.h" #include"xfs_da_format.h" #include"xfs_da_btree.h" #include"xfs_inode.h" #include"xfs_bmap_btree.h" #include"xfs_quota.h" #include"xfs_trans.h" #include"xfs_qm.h" #include"xfs_trans_space.h" #include"xfs_rtbitmap.h" #include"xfs_attr_item.h" #include"xfs_log.h" #include"xfs_defer.h" #include"xfs_bmap_item.h" #include"xfs_extfree_item.h" #include"xfs_rmap_item.h" #include"xfs_refcount_item.h" #include"xfs_trace.h"
#define _ALLOC true #define _FREE false
/* * A buffer has a format structure overhead in the log in addition * to the data, so we need to take this into account when reserving * space in a transaction for a buffer. Round the space required up * to a multiple of 128 bytes so that we don't change the historical * reservation that has been used for this overhead.
*/ STATIC uint
xfs_buf_log_overhead(void)
{ return round_up(sizeof(struct xlog_op_header) + sizeof(struct xfs_buf_log_format), 128);
}
/* * Calculate out transaction log reservation per item in bytes. * * The nbufs argument is used to indicate the number of items that * will be changed in a transaction. size is used to tell how many * bytes should be reserved per item.
*/ STATIC uint
xfs_calc_buf_res(
uint nbufs,
uint size)
{ return nbufs * (size + xfs_buf_log_overhead());
}
/* * Per-extent log reservation for the btree changes involved in freeing or * allocating an extent. In classic XFS there were two trees that will be * modified (bnobt + cntbt). With rmap enabled, there are three trees * (rmapbt). The number of blocks reserved is based on the formula: * * num trees * ((2 blocks/level * max depth) - 1) * * Keep in mind that max depth is calculated separately for each type of tree.
*/
uint
xfs_allocfree_block_count( struct xfs_mount *mp,
uint num_ops)
{
uint blocks;
/* * Per-extent log reservation for refcount btree changes. These are never done * in the same transaction as an allocation or a free, so we compute them * separately.
*/ staticunsignedint
xfs_refcountbt_block_count( struct xfs_mount *mp, unsignedint num_ops)
{ return num_ops * (2 * mp->m_refc_maxlevels - 1);
}
/* * Logging inodes is really tricksy. They are logged in memory format, * which means that what we write into the log doesn't directly translate into * the amount of space they use on disk. * * Case in point - btree format forks in memory format use more space than the * on-disk format. In memory, the buffer contains a normal btree block header so * the btree code can treat it as though it is just another generic buffer. * However, when we write it to the inode fork, we don't write all of this * header as it isn't needed. e.g. the root is only ever in the inode, so * there's no need for sibling pointers which would waste 16 bytes of space. * * Hence when we have an inode with a maximally sized btree format fork, then * amount of information we actually log is greater than the size of the inode * on disk. Hence we need an inode reservation function that calculates all this * correctly. So, we log: * * - 4 log op headers for object * - for the ilf, the inode core and 2 forks * - inode log format object * - the inode core * - two inode forks containing bmap btree root blocks. * - the btree data contained by both forks will fit into the inode size, * hence when combined with the inode core above, we have a total of the * actual inode size. * - the BMBT headers need to be accounted separately, as they are * additional to the records and pointers that fit inside the inode * forks.
*/ STATIC uint
xfs_calc_inode_res( struct xfs_mount *mp,
uint ninodes)
{ return ninodes *
(4 * sizeof(struct xlog_op_header) + sizeof(struct xfs_inode_log_format) +
mp->m_sb.sb_inodesize +
2 * xfs_bmbt_block_len(mp));
}
/* * Inode btree record insertion/removal modifies the inode btree and free space * btrees (since the inobt does not use the agfl). This requires the following * reservation: * * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size * * The caller must account for SB and AG header modifications, etc.
*/ STATIC uint
xfs_calc_inobt_res( struct xfs_mount *mp)
{ return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels,
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
/* * The free inode btree is a conditional feature. The behavior differs slightly * from that of the traditional inode btree in that the finobt tracks records * for inode chunks with at least one free inode. A record can be removed from * the tree during individual inode allocation. Therefore the finobt * reservation is unconditional for both the inode chunk allocation and * individual inode allocation (modify) cases. * * Behavior aside, the reservation for finobt modification is equivalent to the * traditional inobt: cover a full finobt shape change plus block allocation.
*/ STATIC uint
xfs_calc_finobt_res( struct xfs_mount *mp)
{ if (!xfs_has_finobt(mp)) return 0;
return xfs_calc_inobt_res(mp);
}
/* * Calculate the reservation required to allocate or free an inode chunk. This * includes: * * the allocation btrees: 2 trees * (max depth - 1) * block size * the inode chunk: m_ino_geo.ialloc_blks * N * * The size N of the inode chunk reservation depends on whether it is for * allocation or free and which type of create transaction is in use. An inode * chunk free always invalidates the buffers and only requires reservation for * headers (N == 0). An inode chunk allocation requires a chunk sized * reservation on v4 and older superblocks to initialize the chunk. No chunk * reservation is required for allocation on v5 supers, which use ordered * buffers to initialize.
*/ STATIC uint
xfs_calc_inode_chunk_res( struct xfs_mount *mp, bool alloc)
{
uint res, size = 0;
res = xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
XFS_FSB_TO_B(mp, 1)); if (alloc) { /* icreate tx uses ordered buffers */ if (xfs_has_v3inodes(mp)) return res;
size = XFS_FSB_TO_B(mp, 1);
}
res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size); return res;
}
/* * Per-extent log reservation for the btree changes involved in freeing or * allocating a realtime extent. We have to be able to log as many rtbitmap * blocks as needed to mark inuse XFS_BMBT_MAX_EXTLEN blocks' worth of realtime * extents, as well as the realtime summary block (t1). Realtime rmap btree * operations happen in a second transaction, so factor in a couple of rtrmapbt * splits (t2).
*/ staticunsignedint
xfs_rtalloc_block_count( struct xfs_mount *mp, unsignedint num_ops)
{ unsignedint rtbmp_blocks;
xfs_rtxlen_t rtxlen; unsignedint t1, t2 = 0;
/* * Various log reservation values. * * These are based on the size of the file system block because that is what * most transactions manipulate. Each adds in an additional 128 bytes per * item logged to try to account for the overhead of the transaction mechanism. * * Note: Most of the reservations underestimate the number of allocation * groups into which they could free extents in the xfs_defer_finish() call. * This is because the number in the worst case is quite high and quite * unusual. In order to fix this we need to change xfs_defer_finish() to free * extents in only a single AG at a time. This will require changes to the * EFI code as well, however, so that the EFI for the extents not freed is * logged again in each transaction. See SGI PV #261917. * * Reservation functions here avoid a huge stack in xfs_trans_init due to * register overflow from temporaries in the calculations.
*/
/* * Finishing a data device refcount updates (t1): * the agfs of the ags containing the blocks: nr_ops * sector size * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
*/ inlineunsignedint
xfs_calc_finish_cui_reservation( struct xfs_mount *mp, unsignedint nr_ops)
{ if (!xfs_has_reflink(mp)) return 0;
/* * Compute the log reservation required to handle the refcount update * transaction. Refcount updates are always done via deferred log items. * * This is calculated as the max of: * Data device refcount updates (t1): * the agfs of the ags containing the blocks: nr_ops * sector size * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size * Realtime refcount updates (t2); * the rt refcount inode * the rtrefcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
*/ staticunsignedint
xfs_calc_refcountbt_reservation( struct xfs_mount *mp, unsignedint nr_ops)
{ unsignedint t1, t2;
/* * In a write transaction we can allocate a maximum of 2 * extents. This gives (t1): * the inode getting the new extents: inode size * the inode's bmap btree: max depth * block size * the agfs of the ags from which the extents are allocated: 2 * sector * the superblock free block counter: sector size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size * Or, if we're writing to a realtime file (t2): * the inode getting the new extents: inode size * the inode's bmap btree: max depth * block size * the agfs of the ags from which the extents are allocated: 2 * sector * the superblock free block counter: sector size * the realtime bitmap: ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes * the realtime summary: 1 block * the allocation btrees: 2 trees * (2 * max depth - 1) * block size * And the bmap_finish transaction can free bmap blocks in a join (t3): * the agfs of the ags containing the blocks: 2 * sector size * the agfls of the ags containing the blocks: 2 * sector size * the super block free block counter: sector size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size * And any refcount updates that happen in a separate transaction (t4).
*/ STATIC uint
xfs_calc_write_reservation( struct xfs_mount *mp, bool for_minlogsize)
{ unsignedint t1, t2, t3, t4; unsignedint blksz = XFS_FSB_TO_B(mp, 1);
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
/* * In the early days of reflink, we included enough reservation to log * two refcountbt splits for each transaction. The codebase runs * refcountbt updates in separate transactions now, so to compute the * minimum log size, add the refcountbtree splits back to t1 and t3 and * do not account them separately as t4. Reflink did not support * realtime when the reservations were established, so no adjustment to * t2 is needed.
*/ if (for_minlogsize) { unsignedint adj = 0;
/* * Finishing an EFI can free the blocks and bmap blocks (t2): * the agf for each of the ags: nr * sector size * the agfl for each of the ags: nr * sector size * the super block to reflect the freed blocks: sector size * worst case split in allocation btrees per extent assuming nr extents: * nr exts * 2 trees * (2 * max depth - 1) * block size
*/ inlineunsignedint
xfs_calc_finish_efi_reservation( struct xfs_mount *mp, unsignedint nr)
{ return xfs_calc_buf_res((2 * nr) + 1, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_block_count(mp, nr),
mp->m_sb.sb_blocksize);
}
/* * Or, if it's a realtime file (t3): * the agf for each of the ags: 2 * sector size * the agfl for each of the ags: 2 * sector size * the super block to reflect the freed blocks: sector size * the realtime bitmap: * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes * the realtime summary: 2 exts * 1 block * worst case split in allocation btrees per extent assuming 2 extents: * 2 exts * 2 trees * (2 * max depth - 1) * block size
*/ inlineunsignedint
xfs_calc_finish_rt_efi_reservation( struct xfs_mount *mp, unsignedint nr)
{ if (!xfs_has_realtime(mp)) return 0;
/* * Finishing an RUI is the same as an EFI. We can split the rmap btree twice * on each end of the record, and that can cause the AGFL to be refilled or * emptied out.
*/ inlineunsignedint
xfs_calc_finish_rui_reservation( struct xfs_mount *mp, unsignedint nr)
{ if (!xfs_has_rmapbt(mp)) return 0; return xfs_calc_finish_efi_reservation(mp, nr);
}
/* * Finishing an RUI is the same as an EFI. We can split the rmap btree twice * on each end of the record, and that can cause the AGFL to be refilled or * emptied out.
*/ inlineunsignedint
xfs_calc_finish_rt_rui_reservation( struct xfs_mount *mp, unsignedint nr)
{ if (!xfs_has_rtrmapbt(mp)) return 0; return xfs_calc_finish_rt_efi_reservation(mp, nr);
}
/* * In finishing a BUI, we can modify: * the inode being truncated: inode size * dquots * the inode's bmap btree: (max depth + 1) * block size
*/ inlineunsignedint
xfs_calc_finish_bui_reservation( struct xfs_mount *mp, unsignedint nr)
{ return xfs_calc_inode_res(mp, 1) + XFS_DQUOT_LOGRES +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
mp->m_sb.sb_blocksize);
}
/* * In truncating a file we free up to two extents at once. We can modify (t1): * the inode being truncated: inode size * the inode's bmap btree: (max depth + 1) * block size * And the bmap_finish transaction can free the blocks and bmap blocks (t2): * the agf for each of the ags: 4 * sector size * the agfl for each of the ags: 4 * sector size * the super block to reflect the freed blocks: sector size * worst case split in allocation btrees per extent assuming 4 extents: * 4 exts * 2 trees * (2 * max depth - 1) * block size * Or, if it's a realtime file (t3): * the agf for each of the ags: 2 * sector size * the agfl for each of the ags: 2 * sector size * the super block to reflect the freed blocks: sector size * the realtime bitmap: * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes * the realtime summary: 2 exts * 1 block * worst case split in allocation btrees per extent assuming 2 extents: * 2 exts * 2 trees * (2 * max depth - 1) * block size * And any refcount updates that happen in a separate transaction (t4).
*/ STATIC uint
xfs_calc_itruncate_reservation( struct xfs_mount *mp, bool for_minlogsize)
{ unsignedint t1, t2, t3, t4; unsignedint blksz = XFS_FSB_TO_B(mp, 1);
t2 = xfs_calc_finish_efi_reservation(mp, 4);
t3 = xfs_calc_finish_rt_efi_reservation(mp, 2);
/* * In the early days of reflink, we included enough reservation to log * four refcountbt splits in the same transaction as bnobt/cntbt * updates. The codebase runs refcountbt updates in separate * transactions now, so to compute the minimum log size, add the * refcount btree splits back here and do not compute them separately * as t4. Reflink did not support realtime when the reservations were * established, so do not adjust t3.
*/ if (for_minlogsize) { if (xfs_has_reflink(mp))
t2 += xfs_calc_buf_res(
xfs_refcountbt_block_count(mp, 4),
blksz);
/* * In renaming a files we can modify: * the five inodes involved: 5 * inode size * the two directory btrees: 2 * (max depth + v2) * dir block size * the two directory bmap btrees: 2 * max depth * block size * And the bmap_finish transaction can free dir and bmap blocks (two sets * of bmap blocks) giving (t2): * the agf for the ags in which the blocks live: 3 * sector size * the agfl for the ags in which the blocks live: 3 * sector size * the superblock for the free block count: sector size * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size * If parent pointers are enabled (t3), then each transaction in the chain * must be capable of setting or removing the extended attribute * containing the parent information. It must also be able to handle * the three xattr intent items that track the progress of the parent * pointer update.
*/ STATIC uint
xfs_calc_rename_reservation( struct xfs_mount *mp)
{ unsignedint overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsignedint t1, t2, t3 = 0;
if (xfs_has_parent(mp)) { unsignedint rename_overhead, exchange_overhead;
t3 = max(resp->tr_attrsetm.tr_logres,
resp->tr_attrrm.tr_logres);
/* * For a standard rename, the three xattr intent log items * are (1) replacing the pptr for the source file; (2) * removing the pptr on the dest file; and (3) adding a * pptr for the whiteout file in the src dir. * * For an RENAME_EXCHANGE, there are two xattr intent * items to replace the pptr for both src and dest * files. Link counts don't change and there is no * whiteout. * * In the worst case we can end up relogging all log * intent items to allow the log tail to move ahead, so * they become overhead added to each transaction in a * processing chain.
*/
rename_overhead = xfs_calc_pptr_replace_overhead() +
xfs_calc_pptr_unlink_overhead() +
xfs_calc_pptr_link_overhead();
exchange_overhead = 2 * xfs_calc_pptr_replace_overhead();
staticinlineunsignedint
xfs_rename_log_count( struct xfs_mount *mp, struct xfs_trans_resv *resp)
{ /* One for the rename, one more for freeing blocks */ unsignedint ret = XFS_RENAME_LOG_COUNT;
/* * Pre-reserve enough log reservation to handle the transaction * rolling needed to remove or add one parent pointer.
*/ if (xfs_has_parent(mp))
ret += max(resp->tr_attrsetm.tr_logcount,
resp->tr_attrrm.tr_logcount);
return ret;
}
/* * For removing an inode from unlinked list at first, we can modify: * the agi hash list and counters: sector size * the on disk inode before ours in the agi hash list: inode cluster size * the on disk inode in the agi hash list: inode cluster size
*/ STATIC uint
xfs_calc_iunlink_remove_reservation( struct xfs_mount *mp)
{ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
2 * M_IGEO(mp)->inode_cluster_size;
}
/* * Pre-reserve enough log reservation to handle the transaction * rolling needed to add one parent pointer.
*/ if (xfs_has_parent(mp))
ret += resp->tr_attrsetm.tr_logcount;
return ret;
}
/* * For creating a link to an inode: * the parent directory inode: inode size * the linked inode: inode size * the directory btree could split: (max depth + v2) * dir block size * the directory bmap btree could join or split: (max depth + v2) * blocksize * And the bmap_finish transaction can free some bmap blocks giving: * the agf for the ag in which the blocks live: sector size * the agfl for the ag in which the blocks live: sector size * the superblock for the free block count: sector size * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
*/ STATIC uint
xfs_calc_link_reservation( struct xfs_mount *mp)
{ unsignedint overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsignedint t1, t2, t3 = 0;
if (xfs_has_parent(mp)) {
t3 = resp->tr_attrsetm.tr_logres;
overhead += xfs_calc_pptr_link_overhead();
}
return overhead + max3(t1, t2, t3);
}
/* * For adding an inode to unlinked list we can modify: * the agi hash list: sector size * the on disk inode: inode cluster size
*/ STATIC uint
xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
{ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
M_IGEO(mp)->inode_cluster_size;
}
/* * Pre-reserve enough log reservation to handle the transaction * rolling needed to add one parent pointer.
*/ if (xfs_has_parent(mp))
ret += resp->tr_attrrm.tr_logcount;
return ret;
}
/* * For removing a directory entry we can modify: * the parent directory inode: inode size * the removed inode: inode size * the directory btree could join: (max depth + v2) * dir block size * the directory bmap btree could join or split: (max depth + v2) * blocksize * And the bmap_finish transaction can free the dir and bmap blocks giving: * the agf for the ag in which the blocks live: 2 * sector size * the agfl for the ag in which the blocks live: 2 * sector size * the superblock for the free block count: sector size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
*/ STATIC uint
xfs_calc_remove_reservation( struct xfs_mount *mp)
{ unsignedint overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsignedint t1, t2, t3 = 0;
if (xfs_has_parent(mp)) {
t3 = resp->tr_attrrm.tr_logres;
overhead += xfs_calc_pptr_unlink_overhead();
}
return overhead + max3(t1, t2, t3);
}
/* * For create, break it in to the two cases that the transaction * covers. We start with the modify case - allocation done by modification * of the state of existing inodes - and the allocation case.
*/
/* * For create we can modify: * the parent directory inode: inode size * the new inode: inode size * the inode btree entry: block size * the superblock for the nlink flag: sector size * the directory btree: (max depth + v2) * dir block size * the directory inode's bmap btree: (max depth + v2) * block size * the finobt (record modification and allocation btrees)
*/ STATIC uint
xfs_calc_create_resv_modify( struct xfs_mount *mp)
{ return xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
(uint)XFS_FSB_TO_B(mp, 1) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
xfs_calc_finobt_res(mp);
}
/* * For icreate we can allocate some inodes giving: * the agi and agf of the ag getting the new inodes: 2 * sectorsize * the superblock for the nlink flag: sector size * the inode chunk (allocation, optional init) * the inobt (record insertion) * the finobt (optional, record insertion)
*/ STATIC uint
xfs_calc_icreate_resv_alloc( struct xfs_mount *mp)
{ return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
mp->m_sb.sb_sectsize +
xfs_calc_inode_chunk_res(mp, _ALLOC) +
xfs_calc_inobt_res(mp) +
xfs_calc_finobt_res(mp);
}
/* * Pre-reserve enough log reservation to handle the transaction * rolling needed to add one parent pointer.
*/ if (xfs_has_parent(mp))
ret += resp->tr_attrsetm.tr_logcount;
/* * Pre-reserve enough log reservation to handle the transaction * rolling needed to add one parent pointer.
*/ if (xfs_has_parent(mp))
ret += resp->tr_attrsetm.tr_logcount;
return ret;
}
/* * Making a new directory is the same as creating a new file.
*/ STATIC uint
xfs_calc_mkdir_reservation( struct xfs_mount *mp)
{ return xfs_calc_icreate_reservation(mp);
}
/* * Pre-reserve enough log reservation to handle the transaction * rolling needed to add one parent pointer.
*/ if (xfs_has_parent(mp))
ret += resp->tr_attrsetm.tr_logcount;
return ret;
}
/* * Making a new symplink is the same as creating a new file, but * with the added blocks for remote symlink data which can be up to 1kB in * length (XFS_SYMLINK_MAXLEN).
*/ STATIC uint
xfs_calc_symlink_reservation( struct xfs_mount *mp)
{ return xfs_calc_icreate_reservation(mp) +
xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
}
/* * In freeing an inode we can modify: * the inode being freed: inode size * the super block free inode counter, AGF and AGFL: sector size * the on disk inode (agi unlinked list removal) * the inode chunk (invalidated, headers only) * the inode btree * the finobt (record insertion, removal or modification) * * Note that the inode chunk res. includes an allocfree res. for freeing of the * inode chunk. This is technically extraneous because the inode chunk free is * deferred (it occurs after a transaction roll). Include the extra reservation * anyways since we've had reports of ifree transaction overruns due to too many * agfl fixups during inode chunk frees.
*/ STATIC uint
xfs_calc_ifree_reservation( struct xfs_mount *mp)
{ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_iunlink_remove_reservation(mp) +
xfs_calc_inode_chunk_res(mp, _FREE) +
xfs_calc_inobt_res(mp) +
xfs_calc_finobt_res(mp);
}
/* * When only changing the inode we log the inode and possibly the superblock * We also add a bit of slop for the transaction stuff.
*/ STATIC uint
xfs_calc_ichange_reservation( struct xfs_mount *mp)
{ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
}
/* * Growing the data section of the filesystem. * superblock * agi and agf * allocation btrees
*/ STATIC uint
xfs_calc_growdata_reservation( struct xfs_mount *mp)
{ return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
/* * Growing the rt section of the filesystem. * In the first set of transactions (ALLOC) we allocate space to the * bitmap or summary files. * superblock: sector size * agf of the ag from which the extent is allocated: sector size * bmap btree for bitmap/summary inode: max depth * blocksize * bitmap/summary inode: inode size * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
*/ STATIC uint
xfs_calc_growrtalloc_reservation( struct xfs_mount *mp)
{ return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
/* * Growing the rt section of the filesystem. * In the second set of transactions (ZERO) we zero the new metadata blocks. * one bitmap/summary block: blocksize
*/ STATIC uint
xfs_calc_growrtzero_reservation( struct xfs_mount *mp)
{ return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
}
/* * Growing the rt section of the filesystem. * In the third set of transactions (FREE) we update metadata without * allocating any new blocks. * superblock: sector size * bitmap inode: inode size * summary inode: inode size * one bitmap block: blocksize * summary blocks: new summary size
*/ STATIC uint
xfs_calc_growrtfree_reservation( struct xfs_mount *mp)
{ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, mp->m_rsumblocks));
}
/* * Logging the inode modification timestamp on a synchronous write. * inode
*/ STATIC uint
xfs_calc_swrite_reservation( struct xfs_mount *mp)
{ return xfs_calc_inode_res(mp, 1);
}
/* * Logging the inode mode bits when writing a setuid/setgid file * inode
*/ STATIC uint
xfs_calc_writeid_reservation( struct xfs_mount *mp)
{ return xfs_calc_inode_res(mp, 1);
}
/* * Converting the inode from non-attributed to attributed. * the inode being converted: inode size * agf block and superblock (for block allocation) * the new block (directory sized) * bmap blocks for the new directory block * allocation btrees
*/ STATIC uint
xfs_calc_addafork_reservation( struct xfs_mount *mp)
{ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
/* * Removing the attribute fork of a file * the inode being truncated: inode size * the inode's bmap btree: max depth * block size * And the bmap_finish transaction can free the blocks and bmap blocks: * the agf for each of the ags: 4 * sector size * the agfl for each of the ags: 4 * sector size * the super block to reflect the freed blocks: sector size * worst case split in allocation btrees per extent assuming 4 extents: * 4 exts * 2 trees * (2 * max depth - 1) * block size
*/ STATIC uint
xfs_calc_attrinval_reservation( struct xfs_mount *mp)
{ return max((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4),
XFS_FSB_TO_B(mp, 1))));
}
/* * Setting an attribute at mount time. * the inode getting the attribute * the superblock for allocations * the agfs extents are allocated from * the attribute btree * max depth * the inode allocation btree * Since attribute transaction space is dependent on the size of the attribute, * the calculation is done partially at mount time and partially at runtime(see * below).
*/ STATIC uint
xfs_calc_attrsetm_reservation( struct xfs_mount *mp)
{ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
}
/* * Setting an attribute at runtime, transaction space unit per block. * the superblock for allocations: sector size * the inode bmap btree could join or split: max depth * block size * Since the runtime attribute transaction space is dependent on the total * blocks needed for the 1st bmap, here we calculate out the space unit for * one block so that the caller could figure out the total space according * to the attibute extent length in blocks by: * ext * M_RES(mp)->tr_attrsetrt.tr_logres
*/ STATIC uint
xfs_calc_attrsetrt_reservation( struct xfs_mount *mp)
{ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
XFS_FSB_TO_B(mp, 1));
}
/* * Removing an attribute. * the inode: inode size * the attribute btree could join: max depth * block size * the inode bmap btree could join or split: max depth * block size * And the bmap_finish transaction can free the attr blocks freed giving: * the agf for the ag in which the blocks live: 2 * sector size * the agfl for the ag in which the blocks live: 2 * sector size * the superblock for the free block count: sector size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
*/ STATIC uint
xfs_calc_attrrm_reservation( struct xfs_mount *mp)
{ return XFS_DQUOT_LOGRES +
max((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
XFS_FSB_TO_B(mp, 1)) +
(uint)XFS_FSB_TO_B(mp,
XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
(xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
XFS_FSB_TO_B(mp, 1))));
}
/* * Clearing a bad agino number in an agi hash bucket.
*/ STATIC uint
xfs_calc_clear_agi_bucket_reservation( struct xfs_mount *mp)
{ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
}
/* * Allocating quota on disk if needed. * the write transaction log space for quota file extent allocation * the unit of quota allocation: one system block size
*/ STATIC uint
xfs_calc_qm_dqalloc_reservation( struct xfs_mount *mp, bool for_minlogsize)
{ return xfs_calc_write_reservation(mp, for_minlogsize) +
xfs_calc_buf_res(1,
XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
}
/* * Syncing the incore super block changes to disk. * the super block to reflect the changes: sector size
*/ STATIC uint
xfs_calc_sb_reservation( struct xfs_mount *mp)
{ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
}
/* * Namespace reservations. * * These get tricky when parent pointers are enabled as we have attribute * modifications occurring from within these transactions. Rather than confuse * each of these reservation calculations with the conditional attribute * reservations, add them here in a clear and concise manner. This requires that * the attribute reservations have already been calculated. * * Note that we only include the static attribute reservation here; the runtime * reservation will have to be modified by the size of the attributes being * added/removed/modified. See the comments on the attribute reservation * calculations for more details.
*/ STATICvoid
xfs_calc_namespace_reservations( struct xfs_mount *mp, struct xfs_trans_resv *resp)
{
ASSERT(resp->tr_attrsetm.tr_logres > 0);
/* * The following transactions are logged in physical format and * require a permanent reservation on space.
*/
resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
/* * The following transactions are logged in logical format with * a default log count.
*/
resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation();
resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
/* growdata requires permanent res; it can free space to the last AG */
resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
resp->tr_growdata.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
resp->tr_growdata.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
/* The following transaction are logged in logical format */
resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
/* * Add one logcount for BUI items that appear with rmap or reflink, * one logcount for refcount intent items, and one logcount for rmap * intent items.
*/ if (xfs_has_reflink(mp) || xfs_has_rmapbt(mp))
logcount_adj++; if (xfs_has_reflink(mp))
logcount_adj++; if (xfs_has_rmapbt(mp))
logcount_adj++;
/* * Now that we've finished computing the static reservations, we can * compute the dynamic reservation for atomic writes.
*/
xfs_calc_default_atomic_ioend_reservation(mp, resp);
}
/* * Return the per-extent and fixed transaction reservation sizes needed to * complete an atomic write.
*/ STATICunsignedint
xfs_calc_atomic_write_ioend_geometry( struct xfs_mount *mp, unsignedint *step_size)
{ constunsignedint efi = xfs_efi_log_space(1); constunsignedint efd = xfs_efd_log_space(1); constunsignedint rui = xfs_rui_log_space(1); constunsignedint rud = xfs_rud_log_space(); constunsignedint cui = xfs_cui_log_space(1); constunsignedint cud = xfs_cud_log_space(); constunsignedint bui = xfs_bui_log_space(1); constunsignedint bud = xfs_bud_log_space();
/* * Maximum overhead to complete an atomic write ioend in software: * remove data fork extent + remove cow fork extent + map extent into * data fork. * * tx0: Creates a BUI and a CUI and that's all it needs. * * tx1: Roll to finish the BUI. Need space for the BUD, an RUI, and * enough space to relog the CUI (== CUI + CUD). * * tx2: Roll again to finish the RUI. Need space for the RUD and space * to relog the CUI. * * tx3: Roll again, need space for the CUD and possibly a new EFI. * * tx4: Roll again, need space for an EFD. * * If the extent referenced by the pair of BUI/CUI items is not the one * being currently processed, then we need to reserve space to relog * both items.
*/ constunsignedint tx0 = bui + cui; constunsignedint tx1 = bud + rui + cui + cud; constunsignedint tx2 = rud + cui + cud; constunsignedint tx3 = cud + efi; constunsignedint tx4 = efd; constunsignedint relog = bui + bud + cui + cud;
/* Overhead to finish one step of each intent item type */ constunsignedint f1 = xfs_calc_finish_efi_reservation(mp, 1); constunsignedint f2 = xfs_calc_finish_rui_reservation(mp, 1); constunsignedint f3 = xfs_calc_finish_cui_reservation(mp, 1); constunsignedint f4 = xfs_calc_finish_bui_reservation(mp, 1);
/* We only finish one item per transaction in a chain */
*step_size = max(f4, max3(f1, f2, f3));
return per_intent;
}
/* * Compute the maximum size (in fsblocks) of atomic writes that we can complete * given the existing log reservations.
*/
xfs_extlen_t
xfs_calc_max_atomic_write_fsblocks( struct xfs_mount *mp)
{ conststruct xfs_trans_res *resv = &M_RES(mp)->tr_atomic_ioend; unsignedint per_intent = 0; unsignedint step_size = 0; unsignedint ret = 0;
if (resv->tr_logres > 0) {
per_intent = xfs_calc_atomic_write_ioend_geometry(mp,
&step_size);
if (resv->tr_logres >= step_size)
ret = (resv->tr_logres - step_size) / per_intent;
}
/* * Compute the log blocks and transaction reservation needed to complete an * atomic write of a given number of blocks. Worst case, each block requires * separate handling. A return value of 0 means something went wrong.
*/
xfs_extlen_t
xfs_calc_atomic_write_log_geometry( struct xfs_mount *mp,
xfs_extlen_t blockcount, unsignedint *new_logres)
{ struct xfs_trans_res *curr_res = &M_RES(mp)->tr_atomic_ioend;
uint old_logres = curr_res->tr_logres; unsignedint per_intent, step_size; unsignedint logres;
xfs_extlen_t min_logblocks;
/* * Compute the transaction reservation needed to complete an out of place * atomic write of a given number of blocks.
*/ int
xfs_calc_atomic_write_reservation( struct xfs_mount *mp,
xfs_extlen_t blockcount)
{ unsignedint new_logres;
xfs_extlen_t min_logblocks;
/* * If the caller doesn't ask for a specific atomic write size, then * use the defaults.
*/ if (blockcount == 0) {
xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp)); return 0;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.