/* * Reverse Mapping Btree Repair * ============================ * * This is the most involved of all the AG space btree rebuilds. Everywhere * else in XFS we lock inodes and then AG data structures, but generating the * list of rmap records requires that we be able to scan both block mapping * btrees of every inode in the filesystem to see if it owns any extents in * this AG. We can't tolerate any inode updates while we do this, so we * freeze the filesystem to lock everyone else out, and grant ourselves * special privileges to run transactions with regular background reclamation * turned off. * * We also have to be very careful not to allow inode reclaim to start a * transaction because all transactions (other than our own) will block. * Deferred inode inactivation helps us out there. * * I) Reverse mappings for all non-space metadata and file data are collected * according to the following algorithm: * * 1. For each fork of each inode: * 1.1. Create a bitmap BMBIT to track bmbt blocks if necessary. * 1.2. If the incore extent map isn't loaded, walk the bmbt to accumulate * bmaps into rmap records (see 1.1.4). Set bits in BMBIT for each btree * block. * 1.3. If the incore extent map is loaded but the fork is in btree format, * just visit the bmbt blocks to set the corresponding BMBIT areas. * 1.4. From the incore extent map, accumulate each bmap that falls into our * target AG. Remember, multiple bmap records can map to a single rmap * record, so we cannot simply emit rmap records 1:1. * 1.5. Emit rmap records for each extent in BMBIT and free it. * 2. Create bitmaps INOBIT and ICHUNKBIT. * 3. For each record in the inobt, set the corresponding areas in ICHUNKBIT, * and set bits in INOBIT for each btree block. If the inobt has no records * at all, we must be careful to record its root in INOBIT. * 4. For each block in the finobt, set the corresponding INOBIT area. * 5. Emit rmap records for each extent in INOBIT and ICHUNKBIT and free them. * 6. Create bitmaps REFCBIT and COWBIT. * 7. For each CoW staging extent in the refcountbt, set the corresponding * areas in COWBIT. * 8. For each block in the refcountbt, set the corresponding REFCBIT area. * 9. Emit rmap records for each extent in REFCBIT and COWBIT and free them. * A. Emit rmap for the AG headers. * B. Emit rmap for the log, if there is one. * * II) The rmapbt shape and space metadata rmaps are computed as follows: * * 1. Count the rmaps collected in the previous step. (= NR) * 2. Estimate the number of rmapbt blocks needed to store NR records. (= RMB) * 3. Reserve RMB blocks through the newbt using the allocator in normap mode. * 4. Create bitmap AGBIT. * 5. For each reservation in the newbt, set the corresponding areas in AGBIT. * 6. For each block in the AGFL, bnobt, and cntbt, set the bits in AGBIT. * 7. Count the extents in AGBIT. (= AGNR) * 8. Estimate the number of rmapbt blocks needed for NR + AGNR rmaps. (= RMB') * 9. If RMB' >= RMB, reserve RMB' - RMB more newbt blocks, set RMB = RMB', * and clear AGBIT. Go to step 5. * A. Emit rmaps for each extent in AGBIT. * * III) The rmapbt is constructed and set in place as follows: * * 1. Sort the rmap records. * 2. Bulk load the rmaps. * * IV) Reap the old btree blocks. * * 1. Create a bitmap OLDRMBIT. * 2. For each gap in the new rmapbt, set the corresponding areas of OLDRMBIT. * 3. For each extent in the bnobt, clear the corresponding parts of OLDRMBIT. * 4. Reap the extents corresponding to the set areas in OLDRMBIT. These are * the parts of the AG that the rmap didn't find during its scan of the * primary metadata and aren't known to be in the free space, which implies * that they were the old rmapbt blocks. * 5. Commit. * * We use the 'xrep_rmap' prefix for all the rmap functions.
*/
/* Context for collecting rmaps */ struct xrep_rmap { /* new rmapbt information */ struct xrep_newbt new_btree;
/* lock for the xfbtree and xfile */ struct mutex lock;
/* rmap records generated from primary metadata */ struct xfbtree rmap_btree;
struct xfs_scrub *sc;
/* in-memory btree cursor for the xfs_btree_bload iteration */ struct xfs_btree_cur *mcur;
/* Hooks into rmap update code. */ struct xfs_rmap_hook rhook;
/* inode scan cursor */ struct xchk_iscan iscan;
/* Number of non-freespace records found. */ unsignedlonglong nr_records;
/* bnobt/cntbt contribution to btreeblks */
xfs_agblock_t freesp_btblocks;
/* old agf_rmap_blocks counter */ unsignedint old_rmapbt_fsbcount;
};
/* Set us up to repair reverse mapping btrees. */ int
xrep_setup_ag_rmapbt( struct xfs_scrub *sc)
{ struct xrep_rmap *rr; char *descr; int error;
/* * Emit rmaps for every extent of bits set in the bitmap. Caller must ensure * that the ranges are in units of FS blocks.
*/ STATICint
xrep_rmap_stash_bitmap( struct xrep_rmap *rr, struct xagb_bitmap *bitmap, conststruct xfs_owner_info *oinfo)
{ struct xrep_rmap_stash_run rsr = {
.rr = rr,
.owner = oinfo->oi_owner,
.rmap_flags = 0,
};
if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
rsr.rmap_flags |= XFS_RMAP_ATTR_FORK; if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
rsr.rmap_flags |= XFS_RMAP_BMBT_BLOCK;
/* Section (I): Finding all file and bmbt extents. */
/* Context for accumulating rmaps for an inode fork. */ struct xrep_rmap_ifork { /* * Accumulate rmap data here to turn multiple adjacent bmaps into a * single rmap.
*/ struct xfs_rmap_irec accum;
/* Bitmap of bmbt blocks in this AG. */ struct xagb_bitmap bmbt_blocks;
struct xrep_rmap *rr;
/* Which inode fork? */ int whichfork;
};
/* Stash an rmap that we accumulated while walking an inode fork. */ STATICint
xrep_rmap_stash_accumulated( struct xrep_rmap_ifork *rf)
{ if (rf->accum.rm_blockcount == 0) return 0;
/* * Iterate a metadata btree rooted in an inode to collect rmap records for * anything in this fork that matches the AG.
*/ STATICint
xrep_rmap_scan_iroot_btree( struct xrep_rmap_ifork *rf, struct xfs_btree_cur *cur)
{ struct xfs_owner_info oinfo; struct xrep_rmap *rr = rf->rr; int error;
xagb_bitmap_init(&rf->bmbt_blocks);
/* Record all the blocks in the btree itself. */
error = xfs_btree_visit_blocks(cur, xrep_rmap_visit_iroot_btree_block,
XFS_BTREE_VISIT_ALL, rf); if (error) goto out;
/* Emit rmaps for the btree blocks. */
xfs_rmap_ino_bmbt_owner(&oinfo, rf->accum.rm_owner, rf->whichfork);
error = xrep_rmap_stash_bitmap(rr, &rf->bmbt_blocks, &oinfo); if (error) goto out;
/* * Iterate the block mapping btree to collect rmap records for anything in this * fork that matches the AG. Sets @mappings_done to true if we've scanned the * block mappings in this fork.
*/ STATICint
xrep_rmap_scan_bmbt( struct xrep_rmap_ifork *rf, struct xfs_inode *ip, bool *mappings_done)
{ struct xrep_rmap *rr = rf->rr; struct xfs_btree_cur *cur; struct xfs_ifork *ifp; int error;
if (!xfs_ifork_is_realtime(ip, rf->whichfork) &&
xfs_need_iread_extents(ifp)) { /* * If the incore extent cache isn't loaded, scan the bmbt for * mapping records. This avoids loading the incore extent * tree, which will increase memory pressure at a time when * we're trying to run as quickly as we possibly can. Ignore * realtime extents.
*/
error = xfs_bmap_query_all(cur, xrep_rmap_visit_bmbt, rf); if (error) goto out_cur;
*mappings_done = true;
}
/* Scan for the bmbt blocks, which always live on the data device. */
error = xrep_rmap_scan_iroot_btree(rf, cur);
out_cur:
xfs_btree_del_cursor(cur, error); return error;
}
/* * Iterate the in-core extent cache to collect rmap records for anything in * this fork that matches the AG.
*/ STATICint
xrep_rmap_scan_iext( struct xrep_rmap_ifork *rf, struct xfs_ifork *ifp)
{ struct xfs_bmbt_irec rec; struct xfs_iext_cursor icur; int error;
for_each_xfs_iext(ifp, &icur, &rec) { if (isnullstartblock(rec.br_startblock)) continue;
error = xrep_rmap_visit_bmbt(NULL, &rec, rf); if (error) return error;
}
if (rf->whichfork != XFS_DATA_FORK) return -EFSCORRUPTED;
switch (ip->i_metatype) { case XFS_METAFILE_RTRMAP:
type = XFS_RTGI_RMAP; break; case XFS_METAFILE_RTREFCOUNT:
type = XFS_RTGI_REFCOUNT; break; default:
ASSERT(0); return -EFSCORRUPTED;
}
while ((rtg = xfs_rtgroup_next(sc->mp, rtg))) { if (ip == rtg->rtg_inodes[type]) goto found;
}
/* * We should never find an rt metadata btree inode that isn't * associated with an rtgroup yet has ondisk blocks allocated to it.
*/ if (ip->i_nblocks) {
ASSERT(0); return -EFSCORRUPTED;
}
return 0;
found: switch (ip->i_metatype) { case XFS_METAFILE_RTRMAP:
cur = xfs_rtrmapbt_init_cursor(sc->tp, rtg); break; case XFS_METAFILE_RTREFCOUNT:
cur = xfs_rtrefcountbt_init_cursor(sc->tp, rtg); break; default:
ASSERT(0);
error = -EFSCORRUPTED; goto out_rtg;
}
/* Find all the extents from a given AG in an inode fork. */ STATICint
xrep_rmap_scan_ifork( struct xrep_rmap *rr, struct xfs_inode *ip, int whichfork)
{ struct xrep_rmap_ifork rf = {
.accum = { .rm_owner = ip->i_ino, },
.rr = rr,
.whichfork = whichfork,
}; struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); bool mappings_done; int error = 0;
if (!ifp) return 0;
switch (ifp->if_format) { case XFS_DINODE_FMT_BTREE: /* * Scan the bmap btree for data device mappings. This includes * the btree blocks themselves, even if this is a realtime * file.
*/
error = xrep_rmap_scan_bmbt(&rf, ip, &mappings_done); if (error || mappings_done) return error;
fallthrough; case XFS_DINODE_FMT_EXTENTS: /* Scan incore extent cache if this isn't a realtime file. */ if (xfs_ifork_is_realtime(ip, whichfork)) return 0;
return xrep_rmap_scan_iext(&rf, ifp); case XFS_DINODE_FMT_META_BTREE: return xrep_rmap_scan_meta_btree(&rf, ip);
}
return 0;
}
/* * Take ILOCK on a file that we want to scan. * * Select ILOCK_EXCL if the file has an unloaded data bmbt or has an unloaded * attr bmbt. Otherwise, take ILOCK_SHARED.
*/ staticinlineunsignedint
xrep_rmap_scan_ilock( struct xfs_inode *ip)
{
uint lock_mode = XFS_ILOCK_SHARED;
if (xfs_need_iread_extents(&ip->i_df)) {
lock_mode = XFS_ILOCK_EXCL; goto lock;
}
if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
lock_mode = XFS_ILOCK_EXCL;
/* * Iterate every record in the inobt so we can capture all the inode * chunks and the blocks in the inobt itself.
*/
error = xfs_btree_query_all(sc->sa.ino_cur, xrep_rmap_walk_inobt, &ri); if (error) goto out_bitmap;
/* * Note that if there are zero records in the inobt then query_all does * nothing and we have to account the empty inobt root manually.
*/ if (xagb_bitmap_empty(&ri.ichunk_blocks)) { struct xfs_agi *agi = sc->sa.agi_bp->b_addr;
error = xagb_bitmap_set(&ri.inobt_blocks,
be32_to_cpu(agi->agi_root), 1); if (error) goto out_bitmap;
}
/* Scan the finobt too. */ if (xfs_has_finobt(sc->mp)) {
error = xagb_bitmap_set_btblocks(&ri.inobt_blocks,
sc->sa.fino_cur); if (error) goto out_bitmap;
}
/* Generate rmaps for everything. */
error = xrep_rmap_stash_bitmap(rr, &ri.inobt_blocks,
&XFS_RMAP_OINFO_INOBT); if (error) goto out_bitmap;
error = xrep_rmap_stash_bitmap(rr, &ri.ichunk_blocks,
&XFS_RMAP_OINFO_INODES);
/* Generate rmaps for the AG headers (AGI/AGF/AGFL) */ STATICint
xrep_rmap_find_agheader_rmaps( struct xrep_rmap *rr)
{ struct xfs_scrub *sc = rr->sc;
/* Create a record for the AG sb->agfl. */ return xrep_rmap_stash(rr, XFS_SB_BLOCK(sc->mp),
XFS_AGFL_BLOCK(sc->mp) - XFS_SB_BLOCK(sc->mp) + 1,
XFS_RMAP_OWN_FS, 0, 0);
}
/* Generate rmaps for the log, if it's in this AG. */ STATICint
xrep_rmap_find_log_rmaps( struct xrep_rmap *rr)
{ struct xfs_scrub *sc = rr->sc;
if (!xfs_ag_contains_log(sc->mp, pag_agno(sc->sa.pag))) return 0;
/* Check and count all the records that we gathered. */ STATICint
xrep_rmap_check_record( struct xfs_btree_cur *cur, conststruct xfs_rmap_irec *rec, void *priv)
{ struct xrep_rmap *rr = priv; int error;
error = xrep_rmap_check_mapping(rr->sc, rec); if (error) return error;
rr->nr_records++; return 0;
}
/* * Generate all the reverse-mappings for this AG, a list of the old rmapbt * blocks, and the new btreeblks count. Figure out if we have enough free * space to reconstruct the inode btrees. The caller must clean up the lists * if anything goes wrong. This implements section (I) above.
*/ STATICint
xrep_rmap_find_rmaps( struct xrep_rmap *rr)
{ struct xfs_scrub *sc = rr->sc; struct xchk_ag *sa = &sc->sa; struct xfs_inode *ip; struct xfs_btree_cur *mcur; int error;
/* Find all the per-AG metadata. */
xrep_ag_btcur_init(sc, &sc->sa);
error = xrep_rmap_find_inode_rmaps(rr); if (error) goto end_agscan;
error = xrep_rmap_find_refcount_rmaps(rr); if (error) goto end_agscan;
error = xrep_rmap_find_agheader_rmaps(rr); if (error) goto end_agscan;
error = xrep_rmap_find_log_rmaps(rr);
end_agscan:
xchk_ag_btcur_free(&sc->sa); if (error) return error;
/* * Set up for a potentially lengthy filesystem scan by reducing our * transaction resource usage for the duration. Specifically: * * Unlock the AG header buffers and cancel the transaction to release * the log grant space while we scan the filesystem. * * Create a new empty transaction to eliminate the possibility of the * inode scan deadlocking on cyclical metadata. * * We pass the empty transaction to the file scanning function to avoid * repeatedly cycling empty transactions. This can be done even though * we take the IOLOCK to quiesce the file because empty transactions * do not take sb_internal.
*/
sa->agf_bp = NULL;
sa->agi_bp = NULL;
xchk_trans_cancel(sc);
xchk_trans_alloc_empty(sc);
/* Iterate all AGs for inodes rmaps. */ while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
error = xrep_rmap_scan_inode(rr, ip);
xchk_irele(sc, ip); if (error) break;
if (xchk_should_terminate(sc, &error)) break;
}
xchk_iscan_iter_finish(&rr->iscan); if (error) return error;
/* * Switch out for a real transaction and lock the AG headers in * preparation for building a new tree.
*/
xchk_trans_cancel(sc);
error = xchk_setup_fs(sc); if (error) return error;
error = xchk_perag_drain_and_lock(sc); if (error) return error;
/* * If a hook failed to update the in-memory btree, we lack the data to * continue the repair.
*/ if (xchk_iscan_aborted(&rr->iscan)) return -EFSCORRUPTED;
/* * Now that we have everything locked again, we need to count the * number of rmap records stashed in the btree. This should reflect * all actively-owned space in the filesystem. At the same time, check * all our records before we start building a new btree, which requires * a bnobt cursor.
*/
mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, NULL, &rr->rmap_btree);
sc->sa.bno_cur = xfs_bnobt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
sc->sa.pag);
/* Add an AGFL block to the rmap list. */ STATICint
xrep_rmap_walk_agfl( struct xfs_mount *mp,
xfs_agblock_t agbno, void *priv)
{ struct xrep_rmap_agfl *ra = priv;
return xagb_bitmap_set(ra->bitmap, agbno, 1);
}
/* * Run one round of reserving space for the new rmapbt and recomputing the * number of blocks needed to store the previously observed rmapbt records and * the ones we'll create for the free space metadata. When we don't need more * blocks, return a bitmap of OWN_AG extents in @freesp_blocks and set @done to * true.
*/ STATICint
xrep_rmap_try_reserve( struct xrep_rmap *rr, struct xfs_btree_cur *rmap_cur, struct xagb_bitmap *freesp_blocks,
uint64_t *blocks_reserved, bool *done)
{ struct xrep_rmap_agfl ra = {
.bitmap = freesp_blocks,
.agno = pag_agno(rr->sc->sa.pag),
}; struct xfs_scrub *sc = rr->sc; struct xrep_newbt_resv *resv, *n; struct xfs_agf *agf = sc->sa.agf_bp->b_addr; struct xfs_buf *agfl_bp;
uint64_t nr_blocks; /* RMB */
uint64_t freesp_records; int error;
/* * We're going to recompute new_btree.bload.nr_blocks at the end of * this function to reflect however many btree blocks we need to store * all the rmap records (including the ones that reflect the changes we * made to support the new rmapbt blocks), so we save the old value * here so we can decide if we've reserved enough blocks.
*/
nr_blocks = rr->new_btree.bload.nr_blocks;
/* * Make sure we've reserved enough space for the new btree. This can * change the shape of the free space btrees, which can cause secondary * interactions with the rmap records because all three space btrees * have the same rmap owner. We'll account for all that below.
*/
error = xrep_newbt_alloc_blocks(&rr->new_btree,
nr_blocks - *blocks_reserved); if (error) return error;
*blocks_reserved = rr->new_btree.bload.nr_blocks;
/* Clear everything in the bitmap. */
xagb_bitmap_destroy(freesp_blocks);
/* Set all the bnobt blocks in the bitmap. */
sc->sa.bno_cur = xfs_bnobt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
sc->sa.pag);
error = xagb_bitmap_set_btblocks(freesp_blocks, sc->sa.bno_cur);
xfs_btree_del_cursor(sc->sa.bno_cur, error);
sc->sa.bno_cur = NULL; if (error) return error;
/* Set all the cntbt blocks in the bitmap. */
sc->sa.cnt_cur = xfs_cntbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
sc->sa.pag);
error = xagb_bitmap_set_btblocks(freesp_blocks, sc->sa.cnt_cur);
xfs_btree_del_cursor(sc->sa.cnt_cur, error);
sc->sa.cnt_cur = NULL; if (error) return error;
/* Record our new btreeblks value. */
rr->freesp_btblocks = xagb_bitmap_hweight(freesp_blocks) - 2;
/* Set all the new rmapbt blocks in the bitmap. */
list_for_each_entry_safe(resv, n, &rr->new_btree.resv_list, list) {
error = xagb_bitmap_set(freesp_blocks, resv->agbno, resv->len); if (error) return error;
}
/* Set all the AGFL blocks in the bitmap. */
error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp); if (error) return error;
/* Count the extents in the bitmap. */
freesp_records = xagb_bitmap_count_set_regions(freesp_blocks);
/* Compute how many blocks we'll need for all the rmaps. */
error = xfs_btree_bload_compute_geometry(rmap_cur,
&rr->new_btree.bload, rr->nr_records + freesp_records); if (error) return error;
/* We're done when we don't need more blocks. */
*done = nr_blocks >= rr->new_btree.bload.nr_blocks; return 0;
}
/* * Iteratively reserve space for rmap btree while recording OWN_AG rmaps for * the free space metadata. This implements section (II) above.
*/ STATICint
xrep_rmap_reserve_space( struct xrep_rmap *rr, struct xfs_btree_cur *rmap_cur)
{ struct xagb_bitmap freesp_blocks; /* AGBIT */
uint64_t blocks_reserved = 0; bool done = false; int error;
/* Compute how many blocks we'll need for the rmaps collected so far. */
error = xfs_btree_bload_compute_geometry(rmap_cur,
&rr->new_btree.bload, rr->nr_records); if (error) return error;
/* Last chance to abort before we start committing fixes. */ if (xchk_should_terminate(rr->sc, &error)) return error;
xagb_bitmap_init(&freesp_blocks);
/* * Iteratively reserve space for the new rmapbt and recompute the * number of blocks needed to store the previously observed rmapbt * records and the ones we'll create for the free space metadata. * Finish when we don't need more blocks.
*/ do {
error = xrep_rmap_try_reserve(rr, rmap_cur, &freesp_blocks,
&blocks_reserved, &done); if (error) goto out_bitmap;
} while (!done);
/* Emit rmaps for everything in the free space bitmap. */
xrep_ag_btcur_init(rr->sc, &rr->sc->sa);
error = xrep_rmap_stash_bitmap(rr, &freesp_blocks, &XFS_RMAP_OINFO_AG);
xchk_ag_btcur_free(&rr->sc->sa);
/* * The AGF header contains extra information related to the reverse * mapping btree, so we must update those fields here.
*/
rmap_btblocks = rr->new_btree.afake.af_blocks - 1;
agf->agf_btreeblks = cpu_to_be32(rr->freesp_btblocks + rmap_btblocks);
xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS);
/* * After we commit the new btree to disk, it is possible that the * process to reap the old btree blocks will race with the AIL trying * to checkpoint the old btree blocks into the filesystem. If the new * tree is shorter than the old one, the rmapbt write verifier will * fail and the AIL will shut down the filesystem. * * To avoid this, save the old incore btree height values as the alt * height values before re-initializing the perag info from the updated * AGF to capture all the new values.
*/
pag->pagf_repair_rmap_level = pag->pagf_rmap_level;
/* Reinitialize with the values we just logged. */ return xrep_reinit_pagf(sc);
}
/* Retrieve rmapbt data for bulk load. */ STATICint
xrep_rmap_get_records( struct xfs_btree_cur *cur, unsignedint idx, struct xfs_btree_block *block, unsignedint nr_wanted, void *priv)
{ struct xrep_rmap *rr = priv; union xfs_btree_rec *block_rec; unsignedint loaded; int error;
for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { int stat = 0;
error = xfs_btree_increment(rr->mcur, 0, &stat); if (error) return error; if (!stat) return -EFSCORRUPTED;
error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat); if (error) return error; if (!stat) return -EFSCORRUPTED;
/* Feed one of the new btree blocks to the bulk loader. */ STATICint
xrep_rmap_claim_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, void *priv)
{ struct xrep_rmap *rr = priv;
/* Custom allocation function for new rmap btrees. */ STATICint
xrep_rmap_alloc_vextent( struct xfs_scrub *sc, struct xfs_alloc_arg *args,
xfs_fsblock_t alloc_hint)
{ int error;
/* * We don't want an rmap update on the allocation, since we iteratively * compute the OWN_AG records /after/ allocating blocks for the records * that we already know we need to store. Therefore, fix the freelist * with the NORMAP flag set so that we don't also try to create an rmap * for new AGFL blocks.
*/
error = xrep_fix_freelist(sc, XFS_ALLOC_FLAG_NORMAP); if (error) return error;
/* * If xrep_fix_freelist fixed the freelist by moving blocks from the * free space btrees or by removing blocks from the AGFL and queueing * an EFI to free the block, the transaction will be dirty. This * second case is of interest to us. * * Later on, we will need to compare gaps in the new recordset against * the block usage of all OWN_AG owners in order to free the old * btree's blocks, which means that we can't have EFIs for former AGFL * blocks attached to the repair transaction when we commit the new * btree. * * xrep_newbt_alloc_blocks guarantees this for us by calling * xrep_defer_finish to commit anything that fix_freelist may have * added to the transaction.
*/ return xfs_alloc_vextent_near_bno(args, alloc_hint);
}
/* Count the records in this btree. */ STATICint
xrep_rmap_count_records( struct xfs_btree_cur *cur, unsignedlonglong *nr)
{ int running = 1; int error;
*nr = 0;
error = xfs_btree_goto_left_edge(cur); if (error) return error;
while (running && !(error = xfs_btree_increment(cur, 0, &running))) { if (running)
(*nr)++;
}
return error;
} /* * Use the collected rmap information to stage a new rmap btree. If this is * successful we'll return with the new btree root information logged to the * repair transaction but not yet committed. This implements section (III) * above.
*/ STATICint
xrep_rmap_build_new_tree( struct xrep_rmap *rr)
{ struct xfs_scrub *sc = rr->sc; struct xfs_perag *pag = sc->sa.pag; struct xfs_agf *agf = sc->sa.agf_bp->b_addr; struct xfs_btree_cur *rmap_cur; int error;
/* * Preserve the old rmapbt block count so that we can adjust the * per-AG rmapbt reservation after we commit the new btree root and * want to dispose of the old btree blocks.
*/
rr->old_rmapbt_fsbcount = be32_to_cpu(agf->agf_rmap_blocks);
/* * Prepare to construct the new btree by reserving disk space for the * new btree and setting up all the accounting information we'll need * to root the new btree while it's under construction and before we * attach it to the AG header. The new blocks are accounted to the * rmapbt per-AG reservation, which we will adjust further after * committing the new btree.
*/
xrep_newbt_init_ag(&rr->new_btree, sc, &XFS_RMAP_OINFO_SKIP_UPDATE,
xfs_agbno_to_fsb(pag, XFS_RMAP_BLOCK(sc->mp)),
XFS_AG_RESV_RMAPBT);
rr->new_btree.bload.get_records = xrep_rmap_get_records;
rr->new_btree.bload.claim_block = xrep_rmap_claim_block;
rr->new_btree.alloc_vextent = xrep_rmap_alloc_vextent;
rmap_cur = xfs_rmapbt_init_cursor(sc->mp, NULL, NULL, pag);
xfs_btree_stage_afakeroot(rmap_cur, &rr->new_btree.afake);
/* * Initialize @rr->new_btree, reserve space for the new rmapbt, * and compute OWN_AG rmaps.
*/
error = xrep_rmap_reserve_space(rr, rmap_cur); if (error) goto err_cur;
/* * Count the rmapbt records again, because the space reservation * for the rmapbt itself probably added more records to the btree.
*/
rr->mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, NULL,
&rr->rmap_btree);
error = xrep_rmap_count_records(rr->mcur, &rr->nr_records); if (error) goto err_mcur;
/* * Due to btree slack factors, it's possible for a new btree to be one * level taller than the old btree. Update the incore btree height so * that we don't trip the verifiers when writing the new btree blocks * to disk.
*/
pag->pagf_repair_rmap_level = rr->new_btree.bload.btree_height;
/* * Move the cursor to the left edge of the tree so that the first * increment in ->get_records positions us at the first record.
*/
error = xfs_btree_goto_left_edge(rr->mcur); if (error) goto err_level;
/* Add all observed rmap records. */
error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr); if (error) goto err_level;
/* * Install the new btree in the AG header. After this point the old * btree is no longer accessible and the new tree is live.
*/
xfs_rmapbt_commit_staged_btree(rmap_cur, sc->tp, sc->sa.agf_bp);
xfs_btree_del_cursor(rmap_cur, 0);
xfs_btree_del_cursor(rr->mcur, 0);
rr->mcur = NULL;
/* * Now that we've written the new btree to disk, we don't need to keep * updating the in-memory btree. Abort the scan to stop live updates.
*/
xchk_iscan_abort(&rr->iscan);
/* * The newly committed rmap recordset includes mappings for the blocks * that we reserved to build the new btree. If there is excess space * reservation to be freed, the corresponding rmap records must also be * removed.
*/
rr->new_btree.oinfo = XFS_RMAP_OINFO_AG;
/* Reset the AGF counters now that we've changed the btree shape. */
error = xrep_rmap_reset_counters(rr); if (error) goto err_newbt;
/* Dispose of any unused blocks and the accounting information. */
error = xrep_newbt_commit(&rr->new_btree); if (error) return error;
/* Record the free space we find, as part of cleaning out the btree. */ STATICint
xrep_rmap_find_gaps( struct xfs_btree_cur *cur, conststruct xfs_rmap_irec *rec, void *priv)
{ struct xrep_rmap_find_gaps *rfg = priv; int error;
if (rec->rm_startblock > rfg->next_agbno) {
error = xagb_bitmap_set(&rfg->rmap_gaps, rfg->next_agbno,
rec->rm_startblock - rfg->next_agbno); if (error) return error;
}
/* * Reap the old rmapbt blocks. Now that the rmapbt is fully rebuilt, we make * a list of gaps in the rmap records and a list of the extents mentioned in * the bnobt. Any block that's in the new rmapbt gap list but not mentioned * in the bnobt is a block from the old rmapbt and can be removed.
*/ STATICint
xrep_rmap_remove_old_tree( struct xrep_rmap *rr)
{ struct xrep_rmap_find_gaps rfg = {
.next_agbno = 0,
}; struct xfs_scrub *sc = rr->sc; struct xfs_agf *agf = sc->sa.agf_bp->b_addr; struct xfs_perag *pag = sc->sa.pag; struct xfs_btree_cur *mcur;
xfs_agblock_t agend; int error;
xagb_bitmap_init(&rfg.rmap_gaps);
/* Compute free space from the new rmapbt. */
mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, NULL, &rr->rmap_btree);
/* Insert a record for space between the last rmap and EOAG. */
agend = be32_to_cpu(agf->agf_length); if (rfg.next_agbno < agend) {
error = xagb_bitmap_set(&rfg.rmap_gaps, rfg.next_agbno,
agend - rfg.next_agbno); if (error) goto out_bitmap;
}
/* Compute free space from the existing bnobt. */
sc->sa.bno_cur = xfs_bnobt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
sc->sa.pag);
error = xfs_alloc_query_all(sc->sa.bno_cur, xrep_rmap_find_freesp,
&rfg);
xfs_btree_del_cursor(sc->sa.bno_cur, error);
sc->sa.bno_cur = NULL; if (error) goto out_bitmap;
/* * Free the "free" blocks that the new rmapbt knows about but the bnobt * doesn't--these are the old rmapbt blocks. Credit the old rmapbt * block usage count back to the per-AG rmapbt reservation (and not * fdblocks, since the rmap btree lives in free space) to keep the * reservation and free space accounting correct.
*/
error = xrep_reap_agblocks(sc, &rfg.rmap_gaps,
&XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_RMAPBT); if (error) goto out_bitmap;
/* * Now that we've zapped all the old rmapbt blocks we can turn off * the alternate height mechanism and reset the per-AG space * reservation.
*/
pag->pagf_repair_rmap_level = 0;
sc->flags |= XREP_RESET_PERAG_RESV;
out_bitmap:
xagb_bitmap_destroy(&rfg.rmap_gaps); return error;
}
/* * Before unlocking the AG header to perform the inode scan, we * recorded reverse mappings for all AG metadata except for the OWN_AG * metadata. IOWs, the in-memory btree knows about the AG headers, the * two inode btrees, the CoW staging extents, and the refcount btrees. * For these types of metadata, we need to record the live updates in * the in-memory rmap btree. * * However, we do not scan the free space btrees or the AGFL until we * have re-locked the AGF and are ready to reserve space for the new * rmap btree, so we do not want live updates for OWN_AG metadata.
*/ if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner)) return oi->oi_owner != XFS_RMAP_OWN_AG;
/* Ignore updates to files that the scanner hasn't visited yet. */ return xchk_iscan_want_live_update(iscan, oi->oi_owner);
}
/* * Apply a rmapbt update from the regular filesystem into our shadow btree. * We're running from the thread that owns the AGF buffer and is generating * the update, so we must be careful about which parts of the struct xrep_rmap * that we change.
*/ staticint
xrep_rmapbt_live_update( struct notifier_block *nb, unsignedlong action, void *data)
{ struct xfs_rmap_update_params *p = data; struct xrep_rmap *rr; struct xfs_mount *mp; struct xfs_btree_cur *mcur; struct xfs_trans *tp; int error;
/* Set up the filesystem scan components. */ STATICint
xrep_rmap_setup_scan( struct xrep_rmap *rr)
{ struct xfs_scrub *sc = rr->sc; int error;
mutex_init(&rr->lock);
/* Set up in-memory rmap btree */
error = xfs_rmapbt_mem_init(sc->mp, &rr->rmap_btree, sc->xmbtp,
pag_agno(sc->sa.pag)); if (error) goto out_mutex;
/* Retry iget every tenth of a second for up to 30 seconds. */
xchk_iscan_start(sc, 30000, 100, &rr->iscan);
/* * Hook into live rmap operations so that we can update our in-memory * btree to reflect live changes on the filesystem. Since we drop the * AGF buffer to scan all the inodes, we need this piece to avoid * installing a stale btree.
*/
ASSERT(sc->flags & XCHK_FSGATES_RMAP);
xfs_rmap_hook_setup(&rr->rhook, xrep_rmapbt_live_update);
error = xfs_rmap_hook_add(pag_group(sc->sa.pag), &rr->rhook); if (error) goto out_iscan; return 0;
/* Repair the rmap btree for some AG. */ int
xrep_rmapbt( struct xfs_scrub *sc)
{ struct xrep_rmap *rr = sc->buf; int error;
error = xrep_rmap_setup_scan(rr); if (error) return error;
/* * Collect rmaps for everything in this AG that isn't space metadata. * These rmaps won't change even as we try to allocate blocks.
*/
error = xrep_rmap_find_rmaps(rr); if (error) goto out_records;
/* Rebuild the rmap information. */
error = xrep_rmap_build_new_tree(rr); if (error) goto out_records;
/* Kill the old tree. */
error = xrep_rmap_remove_old_tree(rr); if (error) goto out_records;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.