/* * Realtime Reverse Mapping Btree Repair * ===================================== * * This isn't quite as difficult as repairing the rmap btree on the data * device, since we only store the data fork extents of realtime files on the * realtime device. We still have to freeze the filesystem and stop the * background threads like we do for the rmap repair, but we only have to scan * realtime inodes. * * Collecting entries for the new realtime rmap btree is easy -- all we have * to do is generate rtrmap entries from the data fork mappings of all realtime * files in the filesystem. We then scan the rmap btrees of the data device * looking for extents belonging to the old btree and note them in a bitmap. * * To rebuild the realtime rmap btree, we bulk-load the collected mappings into * a new btree cursor and atomically swap that into the realtime inode. Then * we can free the blocks from the old btree. * * We use the 'xrep_rtrmap' prefix for all the rmap functions.
*/
/* Context for collecting rmaps */ struct xrep_rtrmap { /* new rtrmapbt information */ struct xrep_newbt new_btree;
/* lock for the xfbtree and xfile */ struct mutex lock;
/* rmap records generated from primary metadata */ struct xfbtree rtrmap_btree;
struct xfs_scrub *sc;
/* bitmap of old rtrmapbt blocks */ struct xfsb_bitmap old_rtrmapbt_blocks;
/* Hooks into rtrmap update code. */ struct xfs_rmap_hook rhook;
/* inode scan cursor */ struct xchk_iscan iscan;
/* in-memory btree cursor for the ->get_blocks walk */ struct xfs_btree_cur *mcur;
/* Number of records we're staging in the new btree. */
uint64_t nr_records;
};
/* Set us up to repair rt reverse mapping btrees. */ int
xrep_setup_rtrmapbt( struct xfs_scrub *sc)
{ struct xrep_rtrmap *rr; char *descr; int error;
/* Context for accumulating rmaps for an inode fork. */ struct xrep_rtrmap_ifork { /* * Accumulate rmap data here to turn multiple adjacent bmaps into a * single rmap.
*/ struct xfs_rmap_irec accum;
struct xrep_rtrmap *rr;
};
/* Stash an rmap that we accumulated while walking an inode fork. */ STATICint
xrep_rtrmap_stash_accumulated( struct xrep_rtrmap_ifork *rf)
{ if (rf->accum.rm_blockcount == 0) return 0;
/* * Iterate the block mapping btree to collect rmap records for anything in this * fork that maps to the rt volume. Sets @mappings_done to true if we've * scanned the block mappings in this fork.
*/ STATICint
xrep_rtrmap_scan_bmbt( struct xrep_rtrmap_ifork *rf, struct xfs_inode *ip, bool *mappings_done)
{ struct xrep_rtrmap *rr = rf->rr; struct xfs_btree_cur *cur; struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); int error = 0;
*mappings_done = false;
/* * If the incore extent cache is already loaded, we'll just use the * incore extent scanner to record mappings. Don't bother walking the * ondisk extent tree.
*/ if (!xfs_need_iread_extents(ifp)) return 0;
/* Accumulate all the mappings in the bmap btree. */
cur = xfs_bmbt_init_cursor(rr->sc->mp, rr->sc->tp, ip, XFS_DATA_FORK);
error = xfs_bmap_query_all(cur, xrep_rtrmap_visit_bmbt, rf);
xfs_btree_del_cursor(cur, error); if (error) return error;
/* Stash any remaining accumulated rmaps and exit. */
*mappings_done = true; return xrep_rtrmap_stash_accumulated(rf);
}
/* * Iterate the in-core extent cache to collect rmap records for anything in * this fork that matches the AG.
*/ STATICint
xrep_rtrmap_scan_iext( struct xrep_rtrmap_ifork *rf, struct xfs_ifork *ifp)
{ struct xfs_bmbt_irec rec; struct xfs_iext_cursor icur; int error;
for_each_xfs_iext(ifp, &icur, &rec) { if (isnullstartblock(rec.br_startblock)) continue;
error = xrep_rtrmap_visit_bmbt(NULL, &rec, rf); if (error) return error;
}
return xrep_rtrmap_stash_accumulated(rf);
}
/* Find all the extents on the realtime device mapped by an inode fork. */ STATICint
xrep_rtrmap_scan_dfork( struct xrep_rtrmap *rr, struct xfs_inode *ip)
{ struct xrep_rtrmap_ifork rf = {
.accum = { .rm_owner = ip->i_ino, },
.rr = rr,
}; struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); int error = 0;
if (ifp->if_format == XFS_DINODE_FMT_BTREE) { bool mappings_done;
/* * Scan the bmbt for mappings. If the incore extent tree is * loaded, we want to scan the cached mappings since that's * faster when the extent counts are very high.
*/
error = xrep_rtrmap_scan_bmbt(&rf, ip, &mappings_done); if (error || mappings_done) return error;
} elseif (ifp->if_format != XFS_DINODE_FMT_EXTENTS) { /* realtime data forks should only be extents or btree */ return -EFSCORRUPTED;
}
/* Record reverse mappings for a file. */ STATICint
xrep_rtrmap_scan_inode( struct xrep_rtrmap *rr, struct xfs_inode *ip)
{ unsignedint lock_mode; int error = 0;
/* Skip the rt rmap btree inode. */ if (rr->sc->ip == ip) return 0;
lock_mode = xfs_ilock_data_map_shared(ip);
/* Check the data fork if it's on the realtime device. */ if (XFS_IS_REALTIME_INODE(ip)) {
error = xrep_rtrmap_scan_dfork(rr, ip); if (error) goto out_unlock;
}
/* Scan one AG for reverse mappings for the realtime rmap btree. */ STATICint
xrep_rtrmap_scan_ag( struct xrep_rtrmap *rr, struct xfs_perag *pag)
{ struct xfs_scrub *sc = rr->sc; int error;
error = xrep_ag_init(sc, pag, &sc->sa); if (error) return error;
/* * Emit rmaps for every extent of bits set in the bitmap. Caller must ensure * that the ranges are in units of FS blocks.
*/ STATICint
xrep_rtrmap_stash_bitmap( struct xrep_rtrmap *rr, struct xrgb_bitmap *bitmap, conststruct xfs_owner_info *oinfo)
{ struct xrep_rtrmap_stash_run rsr = {
.rr = rr,
.owner = oinfo->oi_owner,
};
/* * Set up for a potentially lengthy filesystem scan by reducing our * transaction resource usage for the duration. Specifically: * * Unlock the realtime metadata inodes and cancel the transaction to * release the log grant space while we scan the filesystem. * * Create a new empty transaction to eliminate the possibility of the * inode scan deadlocking on cyclical metadata. * * We pass the empty transaction to the file scanning function to avoid * repeatedly cycling empty transactions. This can be done even though * we take the IOLOCK to quiesce the file because empty transactions * do not take sb_internal.
*/
xchk_trans_cancel(sc);
xchk_rtgroup_unlock(&sc->sr);
xchk_trans_alloc_empty(sc);
while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
error = xrep_rtrmap_scan_inode(rr, ip);
xchk_irele(sc, ip); if (error) break;
if (xchk_should_terminate(sc, &error)) break;
}
xchk_iscan_iter_finish(&rr->iscan); if (error) return error;
/* * Switch out for a real transaction and lock the RT metadata in * preparation for building a new tree.
*/
xchk_trans_cancel(sc);
error = xchk_setup_rt(sc); if (error) return error;
error = xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL); if (error) return error;
/* * If a hook failed to update the in-memory btree, we lack the data to * continue the repair.
*/ if (xchk_iscan_aborted(&rr->iscan)) return -EFSCORRUPTED;
/* Scan for old rtrmap blocks. */ while ((pag = xfs_perag_next(sc->mp, pag))) {
error = xrep_rtrmap_scan_ag(rr, pag); if (error) {
xfs_perag_rele(pag); return error;
}
}
/* * Now that we have everything locked again, we need to count the * number of rmap records stashed in the btree. This should reflect * all actively-owned rt files in the filesystem. At the same time, * check all our records before we start building a new btree, which * requires the rtbitmap lock.
*/
mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, NULL, &rr->rtrmap_btree);
rr->nr_records = 0;
error = xfs_rmap_query_all(mcur, xrep_rtrmap_check_record, rr);
xfs_btree_del_cursor(mcur, error);
return error;
}
/* Building the new rtrmap btree. */
/* Retrieve rtrmapbt data for bulk load. */ STATICint
xrep_rtrmap_get_records( struct xfs_btree_cur *cur, unsignedint idx, struct xfs_btree_block *block, unsignedint nr_wanted, void *priv)
{ struct xrep_rtrmap *rr = priv; union xfs_btree_rec *block_rec; unsignedint loaded; int error;
for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { int stat = 0;
error = xfs_btree_increment(rr->mcur, 0, &stat); if (error) return error; if (!stat) return -EFSCORRUPTED;
error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat); if (error) return error; if (!stat) return -EFSCORRUPTED;
/* Feed one of the new btree blocks to the bulk loader. */ STATICint
xrep_rtrmap_claim_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, void *priv)
{ struct xrep_rtrmap *rr = priv;
/* Figure out how much space we need to create the incore btree root block. */ STATIC size_t
xrep_rtrmap_iroot_size( struct xfs_btree_cur *cur, unsignedint level, unsignedint nr_this_level, void *priv)
{ return xfs_rtrmap_broot_space_calc(cur->bc_mp, level, nr_this_level);
}
/* * Use the collected rmap information to stage a new rmap btree. If this is * successful we'll return with the new btree root information logged to the * repair transaction but not yet committed. This implements section (III) * above.
*/ STATICint
xrep_rtrmap_build_new_tree( struct xrep_rtrmap *rr)
{ struct xfs_scrub *sc = rr->sc; struct xfs_rtgroup *rtg = sc->sr.rtg; struct xfs_btree_cur *rmap_cur; int error;
/* * Prepare to construct the new btree by reserving disk space for the * new btree and setting up all the accounting information we'll need * to root the new btree while it's under construction and before we * attach it to the realtime rmapbt inode.
*/
error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc); if (error) return error;
/* Compute how many blocks we'll need for the rmaps collected. */
error = xfs_btree_bload_compute_geometry(rmap_cur,
&rr->new_btree.bload, rr->nr_records); if (error) goto err_cur;
/* Last chance to abort before we start committing fixes. */ if (xchk_should_terminate(sc, &error)) goto err_cur;
/* * Guess how many blocks we're going to need to rebuild an entire * rtrmapbt from the number of extents we found, and pump up our * transaction to have sufficient block reservation. We're allowed * to exceed quota to repair inconsistent metadata, though this is * unlikely.
*/
error = xfs_trans_reserve_more_inode(sc->tp, rtg_rmap(rtg),
rr->new_btree.bload.nr_blocks, 0, true); if (error) goto err_cur;
/* Reserve the space we'll need for the new btree. */
error = xrep_newbt_alloc_blocks(&rr->new_btree,
rr->new_btree.bload.nr_blocks); if (error) goto err_cur;
/* * Create a cursor to the in-memory btree so that we can bulk load the * new btree.
*/
rr->mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, NULL, &rr->rtrmap_btree);
error = xfs_btree_goto_left_edge(rr->mcur); if (error) goto err_mcur;
/* Add all observed rmap records. */
rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_META_BTREE;
error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr); if (error) goto err_mcur;
/* * Install the new rtrmap btree in the inode. After this point the old * btree is no longer accessible, the new tree is live, and we can * delete the cursor.
*/
xfs_rtrmapbt_commit_staged_btree(rmap_cur, sc->tp);
xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
xfs_btree_del_cursor(rmap_cur, 0);
xfs_btree_del_cursor(rr->mcur, 0);
rr->mcur = NULL;
/* * Now that we've written the new btree to disk, we don't need to keep * updating the in-memory btree. Abort the scan to stop live updates.
*/
xchk_iscan_abort(&rr->iscan);
/* Dispose of any unused blocks and the accounting information. */
error = xrep_newbt_commit(&rr->new_btree); if (error) return error;
/* * We scanned the CoW staging extents before we started the iscan, so * we need all the updates.
*/ if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner)) returntrue;
/* Ignore updates to files that the scanner hasn't visited yet. */ return xchk_iscan_want_live_update(iscan, oi->oi_owner);
}
/* * Apply a rtrmapbt update from the regular filesystem into our shadow btree. * We're running from the thread that owns the rtrmap ILOCK and is generating * the update, so we must be careful about which parts of the struct * xrep_rtrmap that we change.
*/ staticint
xrep_rtrmapbt_live_update( struct notifier_block *nb, unsignedlong action, void *data)
{ struct xfs_rmap_update_params *p = data; struct xrep_rtrmap *rr; struct xfs_mount *mp; struct xfs_btree_cur *mcur; struct xfs_trans *tp; int error;
/* Set up some storage */
error = xfs_rtrmapbt_mem_init(sc->mp, &rr->rtrmap_btree, sc->xmbtp,
rtg_rgno(sc->sr.rtg)); if (error) goto out_bitmap;
/* Retry iget every tenth of a second for up to 30 seconds. */
xchk_iscan_start(sc, 30000, 100, &rr->iscan);
/* * Hook into live rtrmap operations so that we can update our in-memory * btree to reflect live changes on the filesystem. Since we drop the * rtrmap ILOCK to scan all the inodes, we need this piece to avoid * installing a stale btree.
*/
ASSERT(sc->flags & XCHK_FSGATES_RMAP);
xfs_rmap_hook_setup(&rr->rhook, xrep_rtrmapbt_live_update);
error = xfs_rmap_hook_add(rtg_group(sc->sr.rtg), &rr->rhook); if (error) goto out_iscan; return 0;
/* Repair the realtime rmap btree. */ int
xrep_rtrmapbt( struct xfs_scrub *sc)
{ struct xrep_rtrmap *rr = sc->buf; int error;
/* Make sure any problems with the fork are fixed. */
error = xrep_metadata_inode_forks(sc); if (error) return error;
error = xrep_rtrmap_setup_scan(rr); if (error) return error;
/* Collect rmaps for realtime files. */
error = xrep_rtrmap_find_rmaps(rr); if (error) goto out_records;
xfs_trans_ijoin(sc->tp, sc->ip, 0);
/* Rebuild the rtrmap information. */
error = xrep_rtrmap_build_new_tree(rr); if (error) goto out_records;
/* * Free all the extents that were allocated to the former rtrmapbt and * aren't cross-linked with something else.
*/
error = xrep_reap_metadir_fsblocks(rr->sc, &rr->old_rtrmapbt_blocks); if (error) goto out_records;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.