// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved.
*/ #include"xfs.h" #include"xfs_fs.h" #include"xfs_shared.h" #include"xfs_format.h" #include"xfs_log_format.h" #include"xfs_trans_resv.h" #include"xfs_bit.h" #include"xfs_mount.h" #include"xfs_inode.h" #include"xfs_dir2.h" #include"xfs_dir2_priv.h" #include"xfs_trans.h" #include"xfs_bmap.h" #include"xfs_attr_leaf.h" #include"xfs_error.h" #include"xfs_trace.h" #include"xfs_buf_item.h" #include"xfs_log.h" #include"xfs_errortag.h" #include"xfs_health.h"
/* * xfs_da_btree.c * * Routines to implement directories as Btrees of hashed names.
*/
/*======================================================================== * Function prototypes for the kernel.
*========================================================================*/
/* * Routines used for growing the Btree.
*/ STATICint xfs_da3_root_split(xfs_da_state_t *state,
xfs_da_state_blk_t *existing_root,
xfs_da_state_blk_t *new_child); STATICint xfs_da3_node_split(xfs_da_state_t *state,
xfs_da_state_blk_t *existing_blk,
xfs_da_state_blk_t *split_blk,
xfs_da_state_blk_t *blk_to_add, int treelevel, int *result); STATICvoid xfs_da3_node_rebalance(xfs_da_state_t *state,
xfs_da_state_blk_t *node_blk_1,
xfs_da_state_blk_t *node_blk_2); STATICvoid xfs_da3_node_add(xfs_da_state_t *state,
xfs_da_state_blk_t *old_node_blk,
xfs_da_state_blk_t *new_node_blk);
/* * Routines used for shrinking the Btree.
*/ STATICint xfs_da3_root_join(xfs_da_state_t *state,
xfs_da_state_blk_t *root_blk); STATICint xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval); STATICvoid xfs_da3_node_remove(xfs_da_state_t *state,
xfs_da_state_blk_t *drop_blk); STATICvoid xfs_da3_node_unbalance(xfs_da_state_t *state,
xfs_da_state_blk_t *src_node_blk,
xfs_da_state_blk_t *dst_node_blk);
struct kmem_cache *xfs_da_state_cache; /* anchor for dir/attr state */
/* * Allocate a dir-state structure. * We don't put them on the stack since they're large.
*/ struct xfs_da_state *
xfs_da_state_alloc( struct xfs_da_args *args)
{ struct xfs_da_state *state;
/* * Verify an xfs_da3_blkinfo structure. Note that the da3 fields are only * accessible on v5 filesystems. This header format is common across da node, * attr leaf and dir leaf blocks.
*/
xfs_failaddr_t
xfs_da3_blkinfo_verify( struct xfs_buf *bp, struct xfs_da3_blkinfo *hdr3)
{ struct xfs_mount *mp = bp->b_mount; struct xfs_da_blkinfo *hdr = &hdr3->hdr;
if (!xfs_verify_magic16(bp, hdr->magic)) return __this_address;
if (xfs_has_crc(mp)) { if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp)) return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) return __this_address;
}
fa = xfs_da3_blkinfo_verify(bp, bp->b_addr); if (fa) return fa;
if (ichdr.level == 0) return __this_address; if (ichdr.level > XFS_DA_NODE_MAXDEPTH) return __this_address; if (ichdr.count == 0) return __this_address;
/* * we don't know if the node is for and attribute or directory tree, * so only fail if the count is outside both bounds
*/ if (ichdr.count > mp->m_dir_geo->node_ents &&
ichdr.count > mp->m_attr_geo->node_ents) return __this_address;
fa = xfs_da3_node_verify(bp); if (fa) {
xfs_verifier_error(bp, -EFSCORRUPTED, fa); return;
}
if (!xfs_has_crc(mp)) return;
if (bip)
hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
}
/* * leaf/node format detection on trees is sketchy, so a node read can be done on * leaf level blocks when detection identifies the tree as a node format tree * incorrectly. In this case, we need to swap the verifier to match the correct * format of the block being read.
*/ staticvoid
xfs_da3_node_read_verify( struct xfs_buf *bp)
{ struct xfs_da_blkinfo *info = bp->b_addr;
xfs_failaddr_t fa;
switch (be16_to_cpu(info->magic)) { case XFS_DA3_NODE_MAGIC: if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
xfs_verifier_error(bp, -EFSBADCRC,
__this_address); break;
}
fallthrough; case XFS_DA_NODE_MAGIC:
fa = xfs_da3_node_verify(bp); if (fa)
xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; case XFS_ATTR_LEAF_MAGIC: case XFS_ATTR3_LEAF_MAGIC:
bp->b_ops = &xfs_attr3_leaf_buf_ops;
bp->b_ops->verify_read(bp); return; case XFS_DIR2_LEAFN_MAGIC: case XFS_DIR3_LEAFN_MAGIC:
bp->b_ops = &xfs_dir3_leafn_buf_ops;
bp->b_ops->verify_read(bp); return; default:
xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); break;
}
}
/* Verify the structure of a da3 block. */ static xfs_failaddr_t
xfs_da3_node_verify_struct( struct xfs_buf *bp)
{ struct xfs_da_blkinfo *info = bp->b_addr;
switch (be16_to_cpu(info->magic)) { case XFS_DA3_NODE_MAGIC: case XFS_DA_NODE_MAGIC: return xfs_da3_node_verify(bp); case XFS_ATTR_LEAF_MAGIC: case XFS_ATTR3_LEAF_MAGIC:
bp->b_ops = &xfs_attr3_leaf_buf_ops; return bp->b_ops->verify_struct(bp); case XFS_DIR2_LEAFN_MAGIC: case XFS_DIR3_LEAFN_MAGIC:
bp->b_ops = &xfs_dir3_leafn_buf_ops; return bp->b_ops->verify_struct(bp); default: return __this_address;
}
}
if (whichfork == XFS_ATTR_FORK)
xfs_buf_set_ref(*bpp, XFS_ATTR_BTREE_REF); else
xfs_buf_set_ref(*bpp, XFS_DIR_BTREE_REF);
if (!tp) return 0; return xfs_da3_node_set_type(tp, dp, whichfork, *bpp);
}
/* * Copy src directory/attr leaf/node buffer to the dst. * For v5 file systems make sure the right blkno is stamped in.
*/ void
xfs_da_buf_copy( struct xfs_buf *dst, struct xfs_buf *src,
size_t size)
{ struct xfs_da3_blkinfo *da3 = dst->b_addr;
/*======================================================================== * Routines used for growing the Btree.
*========================================================================*/
/* * Create the initial contents of an intermediate node.
*/ int
xfs_da3_node_create( struct xfs_da_args *args,
xfs_dablk_t blkno, int level, struct xfs_buf **bpp, int whichfork)
{ struct xfs_da_intnode *node; struct xfs_trans *tp = args->trans; struct xfs_mount *mp = tp->t_mountp; struct xfs_da3_icnode_hdr ichdr = {0}; struct xfs_buf *bp; int error; struct xfs_inode *dp = args->dp;
/* * Split a leaf node, rebalance, then possibly split * intermediate nodes, rebalance, etc.
*/ int/* error */
xfs_da3_split( struct xfs_da_state *state)
{ struct xfs_da_state_blk *oldblk; struct xfs_da_state_blk *newblk; struct xfs_da_state_blk *addblk; struct xfs_da_intnode *node; int max; int action = 0; int error; int i;
trace_xfs_da_split(state->args);
if (XFS_TEST_ERROR(false, state->mp, XFS_ERRTAG_DA_LEAF_SPLIT)) return -EIO;
/* * Walk back up the tree splitting/inserting/adjusting as necessary. * If we need to insert and there isn't room, split the node, then * decide which fragment to insert the new block from below into. * Note that we may split the root this way, but we need more fixup.
*/
max = state->path.active - 1;
ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
addblk = &state->path.blk[max]; /* initial dummy value */ for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
oldblk = &state->path.blk[i];
newblk = &state->altpath.blk[i];
/* * If a leaf node then * Allocate a new leaf node, then rebalance across them. * else if an intermediate node then * We split on the last layer, must we split the node?
*/ switch (oldblk->magic) { case XFS_ATTR_LEAF_MAGIC:
error = xfs_attr3_leaf_split(state, oldblk, newblk); if (error < 0) return error; /* GROT: attr is inconsistent */ if (!error) {
addblk = newblk; break;
} /* * Entry wouldn't fit, split the leaf again. The new * extrablk will be consumed by xfs_da3_node_split if * the node is split.
*/
state->extravalid = 1; if (state->inleaf) {
state->extraafter = 0; /* before newblk */
trace_xfs_attr_leaf_split_before(state->args);
error = xfs_attr3_leaf_split(state, oldblk,
&state->extrablk);
} else {
state->extraafter = 1; /* after newblk */
trace_xfs_attr_leaf_split_after(state->args);
error = xfs_attr3_leaf_split(state, newblk,
&state->extrablk);
} if (error == 1) return -ENOSPC; if (error) return error; /* GROT: attr inconsistent */
addblk = newblk; break; case XFS_DIR2_LEAFN_MAGIC:
error = xfs_dir2_leafn_split(state, oldblk, newblk); if (error) return error;
addblk = newblk; break; case XFS_DA_NODE_MAGIC:
error = xfs_da3_node_split(state, oldblk, newblk, addblk,
max - i, &action);
addblk->bp = NULL; if (error) return error; /* GROT: dir is inconsistent */ /* * Record the newly split block for the next time thru?
*/ if (action)
addblk = newblk; else
addblk = NULL; break;
}
/* * Update the btree to show the new hashval for this child.
*/
xfs_da3_fixhashpath(state, &state->path);
} if (!addblk) return 0;
/* * xfs_da3_node_split() should have consumed any extra blocks we added * during a double leaf split in the attr fork. This is guaranteed as * we can't be here if the attr fork only has a single leaf block.
*/
ASSERT(state->extravalid == 0 ||
state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
/* * Update pointers to the node which used to be block 0 and just got * bumped because of the addition of a new root node. Note that the * original block 0 could be at any position in the list of blocks in * the tree. * * Note: the magic numbers and sibling pointers are in the same physical * place for both v2 and v3 headers (by design). Hence it doesn't matter * which version of the xfs_da_intnode structure we use here as the * result will be the same using either structure.
*/
node = oldblk->bp->b_addr; if (node->hdr.info.forw) { if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
xfs_buf_mark_corrupt(oldblk->bp);
xfs_da_mark_sick(state->args);
error = -EFSCORRUPTED; goto out;
}
node = addblk->bp->b_addr;
node->hdr.info.back = cpu_to_be32(oldblk->blkno);
xfs_trans_log_buf(state->args->trans, addblk->bp,
XFS_DA_LOGRANGE(node, &node->hdr.info, sizeof(node->hdr.info)));
}
node = oldblk->bp->b_addr; if (node->hdr.info.back) { if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
xfs_buf_mark_corrupt(oldblk->bp);
xfs_da_mark_sick(state->args);
error = -EFSCORRUPTED; goto out;
}
node = addblk->bp->b_addr;
node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
xfs_trans_log_buf(state->args->trans, addblk->bp,
XFS_DA_LOGRANGE(node, &node->hdr.info, sizeof(node->hdr.info)));
}
out:
addblk->bp = NULL; return error;
}
/* * Split the root. We have to create a new root and point to the two * parts (the split old root) that we just created. Copy block zero to * the EOF, extending the inode in process.
*/ STATICint/* error */
xfs_da3_root_split( struct xfs_da_state *state, struct xfs_da_state_blk *blk1, struct xfs_da_state_blk *blk2)
{ struct xfs_da_intnode *node; struct xfs_da_intnode *oldroot; struct xfs_da_node_entry *btree; struct xfs_da3_icnode_hdr nodehdr; struct xfs_da_args *args; struct xfs_buf *bp; struct xfs_inode *dp; struct xfs_trans *tp; struct xfs_dir2_leaf *leaf;
xfs_dablk_t blkno; int level; int error; int size;
trace_xfs_da_root_split(state->args);
/* * Copy the existing (incorrect) block from the root node position * to a free space somewhere.
*/
args = state->args;
error = xfs_da_grow_inode(args, &blkno); if (error) return error;
/* * With V2 dirs the extra block is data or freespace.
*/
useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
newcount = 1 + useextra; /* * Do we have to split the node?
*/ if (nodehdr.count + newcount > state->args->geo->node_ents) { /* * Allocate a new node, add to the doubly linked chain of * nodes, then move some of our excess entries into it.
*/
error = xfs_da_grow_inode(state->args, &blkno); if (error) return error; /* GROT: dir is inconsistent */
/* * Insert the new entry(s) into the correct block * (updating last hashval in the process). * * xfs_da3_node_add() inserts BEFORE the given index, * and as a result of using node_lookup_int() we always * point to a valid entry (not after one), but a split * operation always results in a new block whose hashvals * FOLLOW the current block. * * If we had double-split op below us, then add the extra block too.
*/
node = oldblk->bp->b_addr;
xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); if (oldblk->index <= nodehdr.count) {
oldblk->index++;
xfs_da3_node_add(state, oldblk, addblk); if (useextra) { if (state->extraafter)
oldblk->index++;
xfs_da3_node_add(state, oldblk, &state->extrablk);
state->extravalid = 0;
}
} else {
newblk->index++;
xfs_da3_node_add(state, newblk, addblk); if (useextra) { if (state->extraafter)
newblk->index++;
xfs_da3_node_add(state, newblk, &state->extrablk);
state->extravalid = 0;
}
}
return 0;
}
/* * Balance the btree elements between two intermediate nodes, * usually one full and one empty. * * NOTE: if blk2 is empty, then it will get the upper half of blk1.
*/ STATICvoid
xfs_da3_node_rebalance( struct xfs_da_state *state, struct xfs_da_state_blk *blk1, struct xfs_da_state_blk *blk2)
{ struct xfs_da_intnode *node1; struct xfs_da_intnode *node2; struct xfs_da_node_entry *btree1; struct xfs_da_node_entry *btree2; struct xfs_da_node_entry *btree_s; struct xfs_da_node_entry *btree_d; struct xfs_da3_icnode_hdr nodehdr1; struct xfs_da3_icnode_hdr nodehdr2; struct xfs_trans *tp; int count; int tmp; int swap = 0; struct xfs_inode *dp = state->args->dp;
/* * Figure out how many entries need to move, and in which direction. * Swap the nodes around if that makes it simpler.
*/ if (nodehdr1.count > 0 && nodehdr2.count > 0 &&
((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
(be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
swap(node1, node2);
xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
btree1 = nodehdr1.btree;
btree2 = nodehdr2.btree;
swap = 1;
}
count = (nodehdr1.count - nodehdr2.count) / 2; if (count == 0) return;
tp = state->args->trans; /* * Two cases: high-to-low and low-to-high.
*/ if (count > 0) { /* * Move elements in node2 up to make a hole.
*/
tmp = nodehdr2.count; if (tmp > 0) {
tmp *= (uint)sizeof(xfs_da_node_entry_t);
btree_s = &btree2[0];
btree_d = &btree2[count];
memmove(btree_d, btree_s, tmp);
}
/* * Move the req'd B-tree elements from high in node1 to * low in node2.
*/
nodehdr2.count += count;
tmp = count * (uint)sizeof(xfs_da_node_entry_t);
btree_s = &btree1[nodehdr1.count - count];
btree_d = &btree2[0];
memcpy(btree_d, btree_s, tmp);
nodehdr1.count -= count;
} else { /* * Move the req'd B-tree elements from low in node2 to * high in node1.
*/
count = -count;
tmp = count * (uint)sizeof(xfs_da_node_entry_t);
btree_s = &btree2[0];
btree_d = &btree1[nodehdr1.count];
memcpy(btree_d, btree_s, tmp);
nodehdr1.count += count;
/* * Move elements in node2 down to fill the hole.
*/
tmp = nodehdr2.count - count;
tmp *= (uint)sizeof(xfs_da_node_entry_t);
btree_s = &btree2[count];
btree_d = &btree2[0];
memmove(btree_d, btree_s, tmp);
nodehdr2.count -= count;
}
/* * Log header of node 1 and all current bits of node 2.
*/
xfs_da3_node_hdr_to_disk(dp->i_mount, node1, &nodehdr1);
xfs_trans_log_buf(tp, blk1->bp,
XFS_DA_LOGRANGE(node1, &node1->hdr,
state->args->geo->node_hdr_size));
/* * We may need to make some room before we insert the new node.
*/
tmp = 0; if (oldblk->index < nodehdr.count) {
tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree);
memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp);
}
btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval);
btree[oldblk->index].before = cpu_to_be32(newblk->blkno);
xfs_trans_log_buf(state->args->trans, oldblk->bp,
XFS_DA_LOGRANGE(node, &btree[oldblk->index],
tmp + sizeof(*btree)));
/* * Copy the last hash value from the oldblk to propagate upwards.
*/
oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
}
/*======================================================================== * Routines used for shrinking the Btree.
*========================================================================*/
/* * Deallocate an empty leaf node, remove it from its parent, * possibly deallocating that block, etc...
*/ int
xfs_da3_join( struct xfs_da_state *state)
{ struct xfs_da_state_blk *drop_blk; struct xfs_da_state_blk *save_blk; int action = 0; int error;
/* * Walk back up the tree joining/deallocating as necessary. * When we stop dropping blocks, break out.
*/ for ( ; state->path.active >= 2; drop_blk--, save_blk--,
state->path.active--) { /* * See if we can combine the block with a neighbor. * (action == 0) => no options, just leave * (action == 1) => coalesce, then unlink * (action == 2) => block empty, unlink it
*/ switch (drop_blk->magic) { case XFS_ATTR_LEAF_MAGIC:
error = xfs_attr3_leaf_toosmall(state, &action); if (error) return error; if (action == 0) return 0;
xfs_attr3_leaf_unbalance(state, drop_blk, save_blk); break; case XFS_DIR2_LEAFN_MAGIC:
error = xfs_dir2_leafn_toosmall(state, &action); if (error) return error; if (action == 0) return 0;
xfs_dir2_leafn_unbalance(state, drop_blk, save_blk); break; case XFS_DA_NODE_MAGIC: /* * Remove the offending node, fixup hashvals, * check for a toosmall neighbor.
*/
xfs_da3_node_remove(state, drop_blk);
xfs_da3_fixhashpath(state, &state->path);
error = xfs_da3_node_toosmall(state, &action); if (error) return error; if (action == 0) return 0;
xfs_da3_node_unbalance(state, drop_blk, save_blk); break;
}
xfs_da3_fixhashpath(state, &state->altpath);
error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
xfs_da_state_kill_altpath(state); if (error) return error;
error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
drop_blk->bp);
drop_blk->bp = NULL; if (error) return error;
} /* * We joined all the way to the top. If it turns out that * we only have one entry in the root, make the child block * the new root.
*/
xfs_da3_node_remove(state, drop_blk);
xfs_da3_fixhashpath(state, &state->path);
error = xfs_da3_root_join(state, &state->path.blk[0]); return error;
}
/* * We have only one entry in the root. Copy the only remaining child of * the old root to block 0 as the new root node.
*/ STATICint
xfs_da3_root_join( struct xfs_da_state *state, struct xfs_da_state_blk *root_blk)
{ struct xfs_da_intnode *oldroot; struct xfs_da_args *args;
xfs_dablk_t child; struct xfs_buf *bp; struct xfs_da3_icnode_hdr oldroothdr; int error; struct xfs_inode *dp = state->args->dp;
xfs_failaddr_t fa;
/* * If the root has more than one child, then don't do anything.
*/ if (oldroothdr.count > 1) return 0;
/* * Read in the (only) child block, then copy those bytes into * the root block's buffer and free the original child block.
*/
child = be32_to_cpu(oldroothdr.btree[0].before);
ASSERT(child != 0);
error = xfs_da3_node_read(args->trans, dp, child, &bp, args->whichfork); if (error) return error;
fa = xfs_da3_header_check(bp, args->owner); if (fa) {
__xfs_buf_mark_corrupt(bp, fa);
xfs_trans_brelse(args->trans, bp);
xfs_da_mark_sick(args); return -EFSCORRUPTED;
}
xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
/* * Copy child to root buffer and log it.
*/
xfs_da_buf_copy(root_blk->bp, bp, args->geo->blksize);
xfs_trans_log_buf(args->trans, root_blk->bp, 0,
args->geo->blksize - 1); /* * Now we can drop the child buffer.
*/
error = xfs_da_shrink_inode(args, child, bp); return error;
}
/* * Check a node block and its neighbors to see if the block should be * collapsed into one or the other neighbor. Always keep the block * with the smaller block number. * If the current block is over 50% full, don't try to join it, return 0. * If the block is empty, fill in the state structure and return 2. * If it can be collapsed, fill in the state structure and return 1. * If nothing can be done, return 0.
*/ STATICint
xfs_da3_node_toosmall( struct xfs_da_state *state, int *action)
{ struct xfs_da_intnode *node; struct xfs_da_state_blk *blk; struct xfs_da_blkinfo *info;
xfs_dablk_t blkno; struct xfs_buf *bp;
xfs_failaddr_t fa; struct xfs_da3_icnode_hdr nodehdr; int count; int forward; int error; int retval; int i; struct xfs_inode *dp = state->args->dp;
trace_xfs_da_node_toosmall(state->args);
/* * Check for the degenerate case of the block being over 50% full. * If so, it's not worth even looking to see if we might be able * to coalesce with a sibling.
*/
blk = &state->path.blk[ state->path.active-1 ];
info = blk->bp->b_addr;
node = (xfs_da_intnode_t *)info;
xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
*action = 0; /* blk over 50%, don't try to join */ return 0; /* blk over 50%, don't try to join */
}
/* * Check for the degenerate case of the block being empty. * If the block is empty, we'll simply delete it, no need to * coalesce it with a sibling block. We choose (arbitrarily) * to merge with the forward block unless it is NULL.
*/ if (nodehdr.count == 0) { /* * Make altpath point to the block we want to keep and * path point to the block we want to drop (this one).
*/
forward = (info->forw != 0);
memcpy(&state->altpath, &state->path, sizeof(state->path));
error = xfs_da3_path_shift(state, &state->altpath, forward,
0, &retval); if (error) return error; if (retval) {
*action = 0;
} else {
*action = 2;
} return 0;
}
/* * Examine each sibling block to see if we can coalesce with * at least 25% free space to spare. We need to figure out * whether to merge with the forward or the backward block. * We prefer coalescing with the lower numbered sibling so as * to shrink a directory over time.
*/
count = state->args->geo->node_ents;
count -= state->args->geo->node_ents >> 2;
count -= nodehdr.count;
/* start with smaller blk num */
forward = nodehdr.forw < nodehdr.back; for (i = 0; i < 2; forward = !forward, i++) { struct xfs_da3_icnode_hdr thdr; if (forward)
blkno = nodehdr.forw; else
blkno = nodehdr.back; if (blkno == 0) continue;
error = xfs_da3_node_read(state->args->trans, dp, blkno, &bp,
state->args->whichfork); if (error) return error;
fa = xfs_da3_node_header_check(bp, state->args->owner); if (fa) {
__xfs_buf_mark_corrupt(bp, fa);
xfs_trans_brelse(state->args->trans, bp);
xfs_da_mark_sick(state->args); return -EFSCORRUPTED;
}
if (count - thdr.count >= 0) break; /* fits with at least 25% to spare */
} if (i >= 2) {
*action = 0; return 0;
}
/* * Make altpath point to the block we want to keep (the lower * numbered block) and path point to the block we want to drop.
*/
memcpy(&state->altpath, &state->path, sizeof(state->path)); if (blkno < blk->blkno) {
error = xfs_da3_path_shift(state, &state->altpath, forward,
0, &retval);
} else {
error = xfs_da3_path_shift(state, &state->path, forward,
0, &retval);
} if (error) return error; if (retval) {
*action = 0; return 0;
}
*action = 1; return 0;
}
/* * Pick up the last hashvalue from an intermediate node.
*/ STATIC uint
xfs_da3_node_lasthash( struct xfs_inode *dp, struct xfs_buf *bp, int *count)
{ struct xfs_da3_icnode_hdr nodehdr;
xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, bp->b_addr); if (count)
*count = nodehdr.count; if (!nodehdr.count) return 0; return be32_to_cpu(nodehdr.btree[nodehdr.count - 1].hashval);
}
/* * Walk back up the tree adjusting hash values as necessary, * when we stop making changes, return.
*/ void
xfs_da3_fixhashpath( struct xfs_da_state *state, struct xfs_da_state_path *path)
{ struct xfs_da_state_blk *blk; struct xfs_da_intnode *node; struct xfs_da_node_entry *btree;
xfs_dahash_t lasthash=0; int level; int count; struct xfs_inode *dp = state->args->dp;
trace_xfs_da_fixhashpath(state->args);
level = path->active-1;
blk = &path->blk[ level ]; switch (blk->magic) { case XFS_ATTR_LEAF_MAGIC:
lasthash = xfs_attr_leaf_lasthash(blk->bp, &count); if (count == 0) return; break; case XFS_DIR2_LEAFN_MAGIC:
lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count); if (count == 0) return; break; case XFS_DA_NODE_MAGIC:
lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count); if (count == 0) return; break;
} for (blk--, level--; level >= 0; blk--, level--) { struct xfs_da3_icnode_hdr nodehdr;
/* * If the dying block has lower hashvals, then move all the * elements in the remaining block up to make a hole.
*/ if ((be32_to_cpu(drop_btree[0].hashval) <
be32_to_cpu(save_btree[0].hashval)) ||
(be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) <
be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) { /* XXX: check this - is memmove dst correct? */
tmp = save_hdr.count * sizeof(xfs_da_node_entry_t);
memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp);
/* * Move all the B-tree elements from drop_blk to save_blk.
*/
tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t);
memcpy(&save_btree[sindex], &drop_btree[0], tmp);
save_hdr.count += drop_hdr.count;
/* * Save the last hashval in the remaining block for upward propagation.
*/
save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval);
}
/*======================================================================== * Routines used for finding things in the Btree.
*========================================================================*/
/* * Walk down the Btree looking for a particular filename, filling * in the state structure as we go. * * We will set the state structure to point to each of the elements * in each of the nodes where either the hashval is or should be. * * We support duplicate hashval's so for each entry in the current * node that could contain the desired hashval, descend. This is a * pruned depth-first tree search.
*/ int/* error */
xfs_da3_node_lookup_int( struct xfs_da_state *state, int *result)
{ struct xfs_da_state_blk *blk; struct xfs_da_blkinfo *curr; struct xfs_da_intnode *node; struct xfs_da_node_entry *btree; struct xfs_da3_icnode_hdr nodehdr; struct xfs_da_args *args;
xfs_failaddr_t fa;
xfs_dablk_t blkno;
xfs_dahash_t hashval;
xfs_dahash_t btreehashval; int probe; int span; int max; int error; int retval; unsignedint expected_level = 0;
uint16_t magic; struct xfs_inode *dp = state->args->dp;
args = state->args;
/* * Descend thru the B-tree searching each level for the right * node to use, until the right hashval is found.
*/
blkno = args->geo->leafblk; for (blk = &state->path.blk[0], state->path.active = 1;
state->path.active <= XFS_DA_NODE_MAXDEPTH;
blk++, state->path.active++) { /* * Read the next node down in the tree.
*/
blk->blkno = blkno;
error = xfs_da3_node_read(args->trans, args->dp, blkno,
&blk->bp, args->whichfork); if (error) {
blk->blkno = 0;
state->path.active--; return error;
}
curr = blk->bp->b_addr;
magic = be16_to_cpu(curr->magic);
fa = xfs_da3_node_header_check(blk->bp, args->owner); if (fa) {
__xfs_buf_mark_corrupt(blk->bp, fa);
xfs_da_mark_sick(args); return -EFSCORRUPTED;
}
blk->magic = XFS_DA_NODE_MAGIC;
/* * Search an intermediate node for a match.
*/
node = blk->bp->b_addr;
xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
btree = nodehdr.btree;
/* Tree taller than we can handle; bail out! */ if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
xfs_buf_mark_corrupt(blk->bp);
xfs_da_mark_sick(args); return -EFSCORRUPTED;
}
/* Check the level from the root. */ if (blkno == args->geo->leafblk)
expected_level = nodehdr.level - 1; elseif (expected_level != nodehdr.level) {
xfs_buf_mark_corrupt(blk->bp);
xfs_da_mark_sick(args); return -EFSCORRUPTED;
} else
expected_level--;
max = nodehdr.count;
blk->hashval = be32_to_cpu(btree[max - 1].hashval);
/* * Since we may have duplicate hashval's, find the first * matching hashval in the node.
*/ while (probe > 0 &&
be32_to_cpu(btree[probe].hashval) >= hashval) {
probe--;
} while (probe < max &&
be32_to_cpu(btree[probe].hashval) < hashval) {
probe++;
}
/* * Pick the right block to descend on.
*/ if (probe == max) {
blk->index = max - 1;
blkno = be32_to_cpu(btree[max - 1].before);
} else {
blk->index = probe;
blkno = be32_to_cpu(btree[probe].before);
}
/* We can't point back to the root. */ if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) {
xfs_da_mark_sick(args); return -EFSCORRUPTED;
}
}
if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) {
xfs_da_mark_sick(args); return -EFSCORRUPTED;
}
/* * A leaf block that ends in the hashval that we are interested in * (final hashval == search hashval) means that the next block may * contain more entries with the same hashval, shift upward to the * next leaf and keep searching.
*/ for (;;) { if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
&blk->index, state);
} elseif (blk->magic == XFS_ATTR_LEAF_MAGIC) {
retval = xfs_attr3_leaf_lookup_int(blk->bp, args);
blk->index = args->index;
args->blkno = blk->blkno;
} else {
ASSERT(0);
xfs_da_mark_sick(args); return -EFSCORRUPTED;
} if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
(blk->hashval == args->hashval)) {
error = xfs_da3_path_shift(state, &state->path, 1, 1,
&retval); if (error) return error; if (retval == 0) { continue;
} elseif (blk->magic == XFS_ATTR_LEAF_MAGIC) { /* path_shift() gives ENOENT */
retval = -ENOATTR;
}
} break;
}
*result = retval; return 0;
}
switch (old_blk->magic) { case XFS_ATTR_LEAF_MAGIC:
before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); break; case XFS_DIR2_LEAFN_MAGIC:
before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp); break; case XFS_DA_NODE_MAGIC:
before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp); break;
}
/* * Link blocks in appropriate order.
*/ if (before) { /* * Link new block in before existing block.
*/
trace_xfs_da_link_before(args);
new_info->forw = cpu_to_be32(old_blk->blkno);
new_info->back = old_info->back; if (old_info->back) {
error = xfs_da3_node_read(args->trans, dp,
be32_to_cpu(old_info->back),
&bp, args->whichfork); if (error) return error;
fa = xfs_da3_header_check(bp, args->owner); if (fa) {
__xfs_buf_mark_corrupt(bp, fa);
xfs_trans_brelse(args->trans, bp);
xfs_da_mark_sick(args); return -EFSCORRUPTED;
}
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == old_info->magic);
ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
tmp_info->forw = cpu_to_be32(new_blk->blkno);
xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
}
old_info->back = cpu_to_be32(new_blk->blkno);
} else { /* * Link new block in after existing block.
*/
trace_xfs_da_link_after(args);
new_info->forw = old_info->forw;
new_info->back = cpu_to_be32(old_blk->blkno); if (old_info->forw) {
error = xfs_da3_node_read(args->trans, dp,
be32_to_cpu(old_info->forw),
&bp, args->whichfork); if (error) return error;
fa = xfs_da3_header_check(bp, args->owner); if (fa) {
__xfs_buf_mark_corrupt(bp, fa);
xfs_trans_brelse(args->trans, bp);
xfs_da_mark_sick(args); return -EFSCORRUPTED;
}
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == old_info->magic);
ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
tmp_info->back = cpu_to_be32(new_blk->blkno);
xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
}
old_info->forw = cpu_to_be32(new_blk->blkno);
}
/* * Move a path "forward" or "!forward" one block at the current level. * * This routine will adjust a "path" to point to the next block * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the * Btree, including updating pointers to the intermediate nodes between * the new bottom and the root.
*/ int/* error */
xfs_da3_path_shift( struct xfs_da_state *state, struct xfs_da_state_path *path, int forward, int release, int *result)
{ struct xfs_da_state_blk *blk; struct xfs_da_blkinfo *info; struct xfs_da_args *args; struct xfs_da_node_entry *btree; struct xfs_da3_icnode_hdr nodehdr; struct xfs_buf *bp;
xfs_failaddr_t fa;
xfs_dablk_t blkno = 0; int level; int error; struct xfs_inode *dp = state->args->dp;
trace_xfs_da_path_shift(state->args);
/* * Roll up the Btree looking for the first block where our * current index is not at the edge of the block. Note that * we skip the bottom layer because we want the sibling block.
*/
args = state->args;
ASSERT(args != NULL);
ASSERT(path != NULL);
ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
level = (path->active-1) - 1; /* skip bottom layer in path */ for (; level >= 0; level--) {
blk = &path->blk[level];
xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
blk->bp->b_addr);
if (forward && (blk->index < nodehdr.count - 1)) {
blk->index++;
blkno = be32_to_cpu(nodehdr.btree[blk->index].before); break;
} elseif (!forward && (blk->index > 0)) {
blk->index--;
blkno = be32_to_cpu(nodehdr.btree[blk->index].before); break;
}
} if (level < 0) {
*result = -ENOENT; /* we're out of our tree */
ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); return 0;
}
/* * Roll down the edge of the subtree until we reach the * same depth we were at originally.
*/ for (blk++, level++; level < path->active; blk++, level++) { /* * Read the next child block into a local buffer.
*/
error = xfs_da3_node_read(args->trans, dp, blkno, &bp,
args->whichfork); if (error) return error;
/* * Release the old block (if it's dirty, the trans doesn't * actually let go) and swap the local buffer into the path * structure. This ensures failure of the above read doesn't set * a NULL buffer in an active slot in the path.
*/ if (release)
xfs_trans_brelse(args->trans, blk->bp);
blk->blkno = blkno;
blk->bp = bp;
/* * Implement a simple hash on a character string. * Rotate the hash value by 7 bits, then XOR each character in. * This is implemented with some source-level loop unrolling.
*/
xfs_dahash_t
xfs_da_hashname(const uint8_t *name, int namelen)
{
xfs_dahash_t hash;
/* * Do four characters at a time as long as we can.
*/ for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
(name[3] << 0) ^ rol32(hash, 7 * 4);
/* * Now do the rest of the characters.
*/ switch (namelen) { case 3: return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
rol32(hash, 7 * 3); case 2: return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2); case 1: return (name[0] << 0) ^ rol32(hash, 7 * 1); default: /* case 0: */ return hash;
}
}
int
xfs_da_grow_inode_int( struct xfs_da_args *args,
xfs_fileoff_t *bno, int count)
{ struct xfs_trans *tp = args->trans; struct xfs_inode *dp = args->dp; int w = args->whichfork;
xfs_rfsblock_t nblks = dp->i_nblocks; struct xfs_bmbt_irec map, *mapp = ↦ int nmap, error, got, i, mapi = 1;
/* * Find a spot in the file space to put the new block.
*/
error = xfs_bmap_first_unused(tp, dp, count, bno, w); if (error) return error;
/* * Try mapping it in one filesystem block.
*/
nmap = 1;
error = xfs_bmapi_write(tp, dp, *bno, count,
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
args->total, &map, &nmap); if (error == -ENOSPC && count > 1) {
xfs_fileoff_t b; int c;
/* * If we didn't get it and the block might work if fragmented, * try without the CONTIG flag. Loop until we get it all.
*/
mapp = kmalloc(sizeof(*mapp) * count,
GFP_KERNEL | __GFP_NOFAIL); for (b = *bno, mapi = 0; b < *bno + count; ) {
c = (int)(*bno + count - b);
nmap = min(XFS_BMAP_MAX_NMAP, c);
error = xfs_bmapi_write(tp, dp, b, c,
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
args->total, &mapp[mapi], &nmap); if (error) goto out_free_map;
mapi += nmap;
b = mapp[mapi - 1].br_startoff +
mapp[mapi - 1].br_blockcount;
}
} if (error) goto out_free_map;
/* * Count the blocks we got, make sure it matches the total.
*/ for (i = 0, got = 0; i < mapi; i++)
got += mapp[i].br_blockcount; if (got != count || mapp[0].br_startoff != *bno ||
mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
*bno + count) {
error = -ENOSPC; goto out_free_map;
}
/* account for newly allocated blocks in reserved blocks total */
args->total -= dp->i_nblocks - nblks;
out_free_map: if (mapp != &map)
kfree(mapp); return error;
}
/* * Add a block to the btree ahead of the file. * Return the new block number to the caller.
*/ int
xfs_da_grow_inode( struct xfs_da_args *args,
xfs_dablk_t *new_blkno)
{
xfs_fileoff_t bno; int error;
/* * Ick. We need to always be able to remove a btree block, even * if there's no space reservation because the filesystem is full. * This is called if xfs_bunmapi on a btree block fails due to ENOSPC. * It swaps the target block with the last block in the file. The * last block in the file can always be removed since it can't cause * a bmap btree split to do that.
*/ STATICint
xfs_da3_swap_lastblock( struct xfs_da_args *args,
xfs_dablk_t *dead_blknop, struct xfs_buf **dead_bufp)
{ struct xfs_da_blkinfo *dead_info; struct xfs_da_blkinfo *sib_info; struct xfs_da_intnode *par_node; struct xfs_da_intnode *dead_node; struct xfs_dir2_leaf *dead_leaf2; struct xfs_da_node_entry *btree; struct xfs_da3_icnode_hdr par_hdr; struct xfs_inode *dp; struct xfs_trans *tp; struct xfs_mount *mp; struct xfs_buf *dead_buf; struct xfs_buf *last_buf; struct xfs_buf *sib_buf; struct xfs_buf *par_buf;
--> --------------------
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.