// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2014 Red Hat, Inc. * All Rights Reserved.
*/ #include"xfs.h" #include"xfs_fs.h" #include"xfs_shared.h" #include"xfs_format.h" #include"xfs_log_format.h" #include"xfs_trans_resv.h" #include"xfs_mount.h" #include"xfs_trans.h" #include"xfs_alloc.h" #include"xfs_btree.h" #include"xfs_btree_staging.h" #include"xfs_rmap.h" #include"xfs_rmap_btree.h" #include"xfs_health.h" #include"xfs_trace.h" #include"xfs_error.h" #include"xfs_extent_busy.h" #include"xfs_ag.h" #include"xfs_ag_resv.h" #include"xfs_buf_mem.h" #include"xfs_btree_mem.h"
staticstruct kmem_cache *xfs_rmapbt_cur_cache;
/* * Reverse map btree. * * This is a per-ag tree used to track the owner(s) of a given extent. With * reflink it is possible for there to be multiple owners, which is a departure * from classic XFS. Owner records for data extents are inserted when the * extent is mapped and removed when an extent is unmapped. Owner records for * all other block types (i.e. metadata) are inserted when an extent is * allocated and removed when an extent is freed. There can only be one owner * of a metadata extent, usually an inode or some other metadata structure like * an AG btree. * * The rmap btree is part of the free space management, so blocks for the tree * are sourced from the agfl. Hence we need transaction reservation support for * this tree so that the freelist is always large enough. This also impacts on * the minimum space we need to leave free in the AG. * * The tree is ordered by [ag block, owner, offset]. This is a large key size, * but it is the only way to enforce unique keys when a block can be owned by * multiple files at any offset. There's no need to order/search by extent * size for online updating/management of the tree. It is intended that most * reverse lookups will be to find the owner(s) of a particular block, or to * try to recover tree and file data from corrupt primary metadata.
*/
/* Allocate the new block from the freelist. If we can't, give up. */
error = xfs_alloc_get_freelist(pag, cur->bc_tp, cur->bc_ag.agbp,
&bno, 1); if (error) return error; if (bno == NULLAGBLOCK) {
*stat = 0; return 0;
}
/* * Since rmapbt blocks are sourced from the AGFL, they are allocated one * at a time and the reservation updates don't require a transaction.
*/
xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args);
/* * Convert the ondisk record's offset field into the ondisk key's offset field. * Fork and bmbt are significant parts of the rmap record key, but written * status is merely a record attribute.
*/ staticinline __be64 ondisk_rec_offset_to_key(constunion xfs_btree_rec *rec)
{ return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
}
/* * The high key for a reverse mapping record can be computed by shifting * the startblock and offset to the highest value that would still map * to that record. In practice this means that we add blockcount-1 to * the startblock for all records, and if the record is for a data/attr * fork mapping, we add blockcount-1 to the offset too.
*/ STATICvoid
xfs_rmapbt_init_high_key_from_rec( union xfs_btree_key *key, constunion xfs_btree_rec *rec)
{
uint64_t off; int adj;
/* * Mask the appropriate parts of the ondisk key field for a key comparison. * Fork and bmbt are significant parts of the rmap record key, but written * status is merely a record attribute.
*/ staticinline uint64_t offset_keymask(uint64_t offset)
{ return offset & ~XFS_RMAP_OFF_UNWRITTEN;
}
/* * magic number and level verification * * During growfs operations, we can't verify the exact level or owner as * the perag is not fully initialised and hence not attached to the * buffer. In this case, check against the maximum tree depth. * * Similarly, during log recovery we will have a perag structure * attached, but the agf information will not yet have been initialised * from the on disk AGF. Again, we can only check against maximum limits * in this case.
*/ if (!xfs_verify_magic(bp, block->bb_magic)) return __this_address;
if (!xfs_has_rmapbt(mp)) return __this_address;
fa = xfs_btree_agblock_v5hdr_verify(bp); if (fa) return fa;
#ifdef CONFIG_XFS_ONLINE_REPAIR /* * Online repair could be rewriting the free space btrees, so * we'll validate against the larger of either tree while this * is going on.
*/
maxlevel = max_t(unsignedint, maxlevel,
pag->pagf_repair_rmap_level); #endif if (level >= maxlevel) return __this_address;
} elseif (level >= mp->m_rmap_maxlevels) return __this_address;
x = be32_to_cpu(k1->rmap.rm_startblock);
y = be32_to_cpu(k2->rmap.rm_startblock); if (x < y) return 1; elseif (x > y) return 0;
a = be64_to_cpu(k1->rmap.rm_owner);
b = be64_to_cpu(k2->rmap.rm_owner); if (a < b) return 1; elseif (a > b) return 0;
a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset)); if (a <= b) return 1; return 0;
}
x = be32_to_cpu(r1->rmap.rm_startblock);
y = be32_to_cpu(r2->rmap.rm_startblock); if (x < y) return 1; elseif (x > y) return 0;
a = be64_to_cpu(r1->rmap.rm_owner);
b = be64_to_cpu(r2->rmap.rm_owner); if (a < b) return 1; elseif (a > b) return 0;
a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset)); if (a <= b) return 1; return 0;
}
/* * We only support checking contiguity of the physical space component. * If any callers ever need more specificity than that, they'll have to * implement it here.
*/
ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
/* * Validate an in-memory rmap btree block. Callers are allowed to generate an * in-memory btree even if the ondisk feature is not enabled.
*/ static xfs_failaddr_t
xfs_rmapbt_mem_verify( struct xfs_buf *bp)
{ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
xfs_failaddr_t fa; unsignedint level; unsignedint maxrecs;
if (!xfs_verify_magic(bp, block->bb_magic)) return __this_address;
fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); if (fa) return fa;
level = be16_to_cpu(block->bb_level); if (level >= xfs_rmapbt_maxlevels_ondisk()) return __this_address;
/* Compute the max possible height for reverse mapping btrees in memory. */ staticunsignedint
xfs_rmapbt_mem_maxlevels(void)
{ unsignedint minrecs[2]; unsignedint blocklen;
/* * How tall can an in-memory rmap btree become if we filled the entire * AG with rmap records?
*/ return xfs_btree_compute_maxlevels(minrecs,
XFS_MAX_AG_BYTES / sizeof(struct xfs_rmap_rec));
} #else # define xfs_rmapbt_mem_maxlevels() (0) #endif/* CONFIG_XFS_BTREE_IN_MEM */
/* * Install a new reverse mapping btree root. Caller is responsible for * invalidating and freeing the old btree blocks.
*/ void
xfs_rmapbt_commit_staged_btree( struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp)
{ struct xfs_agf *agf = agbp->b_addr; struct xbtree_afakeroot *afake = cur->bc_ag.afake;
/* Calculate number of records in a reverse mapping btree block. */ staticinlineunsignedint
xfs_rmapbt_block_maxrecs( unsignedint blocklen, bool leaf)
{ if (leaf) return blocklen / sizeof(struct xfs_rmap_rec); return blocklen /
(2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
}
/* * Calculate number of records in an rmap btree block.
*/ unsignedint
xfs_rmapbt_maxrecs( struct xfs_mount *mp, unsignedint blocklen, bool leaf)
{
blocklen -= XFS_RMAP_BLOCK_LEN; return xfs_rmapbt_block_maxrecs(blocklen, leaf);
}
/* Compute the max possible height for reverse mapping btrees. */ unsignedint
xfs_rmapbt_maxlevels_ondisk(void)
{ unsignedint minrecs[2]; unsignedint blocklen;
/* * Compute the asymptotic maxlevels for an rmapbt on any reflink fs. * * On a reflink filesystem, each AG block can have up to 2^32 (per the * refcount record format) owners, which means that theoretically we * could face up to 2^64 rmap records. However, we're likely to run * out of blocks in the AG long before that happens, which means that * we must compute the max height based on what the btree will look * like if it consumes almost all the blocks in the AG due to maximal * sharing factor.
*/ return max(xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS),
xfs_rmapbt_mem_maxlevels());
}
/* Compute the maximum height of an rmap btree. */ void
xfs_rmapbt_compute_maxlevels( struct xfs_mount *mp)
{ if (!xfs_has_rmapbt(mp)) {
mp->m_rmap_maxlevels = 0; return;
}
if (xfs_has_reflink(mp)) { /* * Compute the asymptotic maxlevels for an rmap btree on a * filesystem that supports reflink. * * On a reflink filesystem, each AG block can have up to 2^32 * (per the refcount record format) owners, which means that * theoretically we could face up to 2^64 rmap records. * However, we're likely to run out of blocks in the AG long * before that happens, which means that we must compute the * max height based on what the btree will look like if it * consumes almost all the blocks in the AG due to maximal * sharing factor.
*/
mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr,
mp->m_sb.sb_agblocks);
} else { /* * If there's no block sharing, compute the maximum rmapbt * height assuming one rmap record per AG block.
*/
mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
}
ASSERT(mp->m_rmap_maxlevels <= xfs_rmapbt_maxlevels_ondisk());
}
/* Calculate the refcount btree size for some records. */
xfs_extlen_t
xfs_rmapbt_calc_size( struct xfs_mount *mp, unsignedlonglong len)
{ return xfs_btree_calc_size(mp->m_rmap_mnr, len);
}
/* * Calculate the maximum refcount btree size.
*/
xfs_extlen_t
xfs_rmapbt_max_size( struct xfs_mount *mp,
xfs_agblock_t agblocks)
{ /* Bail out if we're uninitialized, which can happen in mkfs. */ if (mp->m_rmap_mxr[0] == 0) return 0;
return xfs_rmapbt_calc_size(mp, agblocks);
}
/* * Figure out how many blocks to reserve and how many are used by this btree.
*/ int
xfs_rmapbt_calc_reserves( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_perag *pag,
xfs_extlen_t *ask,
xfs_extlen_t *used)
{ struct xfs_buf *agbp; struct xfs_agf *agf;
xfs_agblock_t agblocks;
xfs_extlen_t tree_len; int error;
if (!xfs_has_rmapbt(mp)) return 0;
error = xfs_alloc_read_agf(pag, tp, 0, &agbp); if (error) return error;
/* * The log is permanently allocated, so the space it occupies will * never be available for the kinds of things that would require btree * expansion. We therefore can pretend the space isn't there.
*/ if (xfs_ag_contains_log(mp, pag_agno(pag)))
agblocks -= mp->m_sb.sb_logblocks;
/* Reserve 1% of the AG or enough for 1 block per record. */
*ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
*used += tree_len;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.