// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved.
*/ #include"xfs.h" #include"xfs_fs.h" #include"xfs_shared.h" #include"xfs_format.h" #include"xfs_log_format.h" #include"xfs_trans_resv.h" #include"xfs_sb.h" #include"xfs_mount.h" #include"xfs_da_format.h" #include"xfs_da_btree.h" #include"xfs_inode.h" #include"xfs_trans.h" #include"xfs_bmap_btree.h" #include"xfs_bmap.h" #include"xfs_attr_sf.h" #include"xfs_attr.h" #include"xfs_attr_remote.h" #include"xfs_attr_leaf.h" #include"xfs_error.h" #include"xfs_trace.h" #include"xfs_buf_item.h" #include"xfs_dir2.h" #include"xfs_log.h" #include"xfs_ag.h" #include"xfs_errortag.h" #include"xfs_health.h"
/* * xfs_attr_leaf.c * * Routines to implement leaf blocks of attributes as Btrees of hashed names.
*/
/*======================================================================== * Function prototypes for the kernel.
*========================================================================*/
/* * Utility routines.
*/ STATICvoid xfs_attr3_leaf_moveents(struct xfs_da_args *args, struct xfs_attr_leafblock *src_leaf, struct xfs_attr3_icleaf_hdr *src_ichdr, int src_start, struct xfs_attr_leafblock *dst_leaf, struct xfs_attr3_icleaf_hdr *dst_ichdr, int dst_start, int move_count); STATICint xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
/* * attr3 block 'firstused' conversion helpers. * * firstused refers to the offset of the first used byte of the nameval region * of an attr leaf block. The region starts at the tail of the block and expands * backwards towards the middle. As such, firstused is initialized to the block * size for an empty leaf block and is reduced from there. * * The attr3 block size is pegged to the fsb size and the maximum fsb is 64k. * The in-core firstused field is 32-bit and thus supports the maximum fsb size. * The on-disk field is only 16-bit, however, and overflows at 64k. Since this * only occurs at exactly 64k, we use zero as a magic on-disk value to represent * the attr block size. The following helpers manage the conversion between the * in-core and on-disk formats.
*/
/* * Convert from the magic fsb size value to actual blocksize. This * should only occur for empty blocks when the block size overflows * 16-bits.
*/ if (to->firstused == XFS_ATTR3_LEAF_NULLOFF) {
ASSERT(!to->count && !to->usedbytes);
ASSERT(geo->blksize > USHRT_MAX);
to->firstused = geo->blksize;
}
}
/* magic value should only be seen on disk */
ASSERT(from->firstused != XFS_ATTR3_LEAF_NULLOFF);
/* * Scale down the 32-bit in-core firstused value to the 16-bit on-disk * value. This only overflows at the max supported value of 64k. Use the * magic on-disk value to represent block size in this case.
*/
firstused = from->firstused; if (firstused > USHRT_MAX) {
ASSERT(from->firstused == geo->blksize);
firstused = XFS_ATTR3_LEAF_NULLOFF;
}
/* * Check the name information. The namelen fields are u8 so we can't * possibly exceed the maximum name length of 255 bytes.
*/ if (ent->flags & XFS_ATTR_LOCAL) {
lentry = xfs_attr3_leaf_name_local(leaf, idx);
namesize = xfs_attr_leaf_entsize_local(lentry->namelen,
be16_to_cpu(lentry->valuelen));
name_end = (char *)lentry + namesize; if (lentry->namelen == 0) return __this_address;
} else {
rentry = xfs_attr3_leaf_name_remote(leaf, idx);
namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
name_end = (char *)rentry + namesize; if (rentry->namelen == 0) return __this_address; if (!(ent->flags & XFS_ATTR_INCOMPLETE) &&
rentry->valueblk == 0) return __this_address;
}
if (name_end > buf_end) return __this_address;
return NULL;
}
/* * Validate an attribute leaf block. * * Empty leaf blocks can occur under the following circumstances: * * 1. setxattr adds a new extended attribute to a file; * 2. The file has zero existing attributes; * 3. The attribute is too large to fit in the attribute fork; * 4. The attribute is small enough to fit in a leaf block; * 5. A log flush occurs after committing the transaction that creates * the (empty) leaf block; and * 6. The filesystem goes down after the log flush but before the new * attribute can be committed to the leaf block. * * Hence we need to ensure that we don't fail the validation purely * because the leaf is empty.
*/ static xfs_failaddr_t
xfs_attr3_leaf_verify( struct xfs_buf *bp)
{ struct xfs_attr3_icleaf_hdr ichdr; struct xfs_mount *mp = bp->b_mount; struct xfs_attr_leafblock *leaf = bp->b_addr; struct xfs_attr_leaf_entry *entries; struct xfs_attr_leaf_entry *ent; char *buf_end;
uint32_t end; /* must be 32bit - see below */
__u32 last_hashval = 0; int i;
xfs_failaddr_t fa;
fa = xfs_da3_blkinfo_verify(bp, bp->b_addr); if (fa) return fa;
/* * firstused is the block offset of the first name info structure. * Make sure it doesn't go off the block or crash into the header.
*/ if (ichdr.firstused > mp->m_attr_geo->blksize) return __this_address; if (ichdr.firstused < xfs_attr3_leaf_hdr_size(leaf)) return __this_address;
/* Make sure the entries array doesn't crash into the name info. */
entries = xfs_attr3_leaf_entryp(bp->b_addr); if ((char *)&entries[ichdr.count] >
(char *)bp->b_addr + ichdr.firstused) return __this_address;
/* * NOTE: This verifier historically failed empty leaf buffers because * we expect the fork to be in another format. Empty attr fork format * conversions are possible during xattr set, however, and format * conversion is not atomic with the xattr set that triggers it. We * cannot assume leaf blocks are non-empty until that is addressed.
*/
buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; for (i = 0, ent = entries; i < ichdr.count; ent++, i++) {
fa = xfs_attr3_leaf_verify_entry(mp, buf_end, leaf, &ichdr,
ent, i, &last_hashval); if (fa) return fa;
}
/* * Quickly check the freemap information. Attribute data has to be * aligned to 4-byte boundaries, and likewise for the free space. * * Note that for 64k block size filesystems, the freemap entries cannot * overflow as they are only be16 fields. However, when checking end * pointer of the freemap, we have to be careful to detect overflows and * so use uint32_t for those checks.
*/ for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { if (ichdr.freemap[i].base > mp->m_attr_geo->blksize) return __this_address; if (ichdr.freemap[i].base & 0x3) return __this_address; if (ichdr.freemap[i].size > mp->m_attr_geo->blksize) return __this_address; if (ichdr.freemap[i].size & 0x3) return __this_address;
/* be care of 16 bit overflows here */
end = (uint32_t)ichdr.freemap[i].base + ichdr.freemap[i].size; if (end < ichdr.freemap[i].base) return __this_address; if (end > mp->m_attr_geo->blksize) return __this_address;
}
/* * leaf/node format detection on trees is sketchy, so a node read can be done on * leaf level blocks when detection identifies the tree as a node format tree * incorrectly. In this case, we need to swap the verifier to match the correct * format of the block being read.
*/ staticvoid
xfs_attr3_leaf_read_verify( struct xfs_buf *bp)
{ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_has_crc(mp) &&
!xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
xfs_verifier_error(bp, -EFSBADCRC, __this_address); else {
fa = xfs_attr3_leaf_verify(bp); if (fa)
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
}
}
/* * If we are in log recovery, then we want the lookup to ignore the INCOMPLETE * flag on disk - if there's an incomplete attr then recovery needs to tear it * down. If there's no incomplete attr, then recovery needs to tear that attr * down to replace it with the attr that has been logged. In this case, the * INCOMPLETE flag will not be set in attr->attr_filter, but rather * XFS_DA_OP_RECOVERY will be set in args->op_flags.
*/ staticinlineunsignedint xfs_attr_match_mask(conststruct xfs_da_args *args)
{ if (args->op_flags & XFS_DA_OP_RECOVERY) return XFS_ATTR_NSP_ONDISK_MASK; return XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE;
}
/* Parent pointers do not use remote values */ if (!value) returnfalse;
/* * The only value we support is a parent rec. However, we'll accept * any valuelen so that offline repair can delete ATTR_PARENT values * that are not parent pointers.
*/ if (valuelen != args->valuelen) returnfalse;
if (args->namelen != namelen) returnfalse; if ((args->attr_filter & mask) != (attr_flags & mask)) returnfalse; if (memcmp(args->name, name, namelen) != 0) returnfalse;
if (attr_flags & XFS_ATTR_PARENT) return xfs_attr_parent_match(args, value, valuelen);
returntrue;
}
staticint
xfs_attr_copy_value( struct xfs_da_args *args, unsignedchar *value, int valuelen)
{ /* * Parent pointer lookups require the caller to specify the name and * value, so don't copy anything.
*/ if (args->attr_filter & XFS_ATTR_PARENT) return 0;
/* * No copy if all we have to do is get the length
*/ if (!args->valuelen) {
args->valuelen = valuelen; return 0;
}
/* * No copy if the length of the existing buffer is too small
*/ if (args->valuelen < valuelen) {
args->valuelen = valuelen; return -ERANGE;
}
if (!args->value) {
args->value = kvmalloc(valuelen, GFP_KERNEL | __GFP_NOLOCKDEP); if (!args->value) return -ENOMEM;
}
args->valuelen = valuelen;
/* remote block xattr requires IO for copy-in */ if (args->rmtblkno) return xfs_attr_rmtval_get(args);
/* * This is to prevent a GCC warning because the remote xattr case * doesn't have a value to pass in. In that case, we never reach here, * but GCC can't work that out and so throws a "passing NULL to * memcpy" warning.
*/ if (!value) return -EINVAL;
memcpy(args->value, value, valuelen); return 0;
}
/* * Query whether the total requested number of attr fork bytes of extended * attribute space will be able to fit inline. * * Returns zero if not, else the i_forkoff fork offset to be used in the * literal area for attribute data once the new bytes have been added. * * i_forkoff must be 8 byte aligned, hence is stored as a >>3 value; * special case for dev/uuid inodes, they have fixed size data forks.
*/ int
xfs_attr_shortform_bytesfit( struct xfs_inode *dp, int bytes)
{ struct xfs_mount *mp = dp->i_mount;
int64_t dsize; int minforkoff; int maxforkoff; int offset;
/* * Check if the new size could fit at all first:
*/ if (bytes > XFS_LITINO(mp)) return 0;
/* * If the requested numbers of bytes is smaller or equal to the * current attribute fork size we can always proceed. * * Note that if_bytes in the data fork might actually be larger than * the current data fork size is due to delalloc extents. In that * case either the extent count will go down when they are converted * to real extents, or the delalloc conversion will take care of the * literal area rebalancing.
*/ if (bytes <= xfs_inode_attr_fork_size(dp)) return dp->i_forkoff;
/* * For attr2 we can try to move the forkoff if there is space in the * literal area, but for the old format we are done if there is no * space in the fixed attribute fork.
*/ if (!xfs_has_attr2(mp)) return 0;
dsize = dp->i_df.if_bytes;
switch (dp->i_df.if_format) { case XFS_DINODE_FMT_EXTENTS: /* * If there is no attr fork and the data fork is extents, * determine if creating the default attr fork will result * in the extents form migrating to btree. If so, the * minimum offset only needs to be the space required for * the btree root.
*/ if (!dp->i_forkoff && dp->i_df.if_bytes >
xfs_default_attroffset(dp))
dsize = xfs_bmdr_space_calc(MINDBTPTRS); break; case XFS_DINODE_FMT_BTREE: /* * If we have a data btree then keep forkoff if we have one, * otherwise we are adding a new attr, so then we set * minforkoff to where the btree root can finish so we have * plenty of room for attrs
*/ if (dp->i_forkoff) { if (offset < dp->i_forkoff) return 0; return dp->i_forkoff;
}
dsize = xfs_bmap_bmdr_space(dp->i_df.if_broot); break;
}
/* * A data fork btree root must have space for at least * MINDBTPTRS key/ptr pairs if the data fork is small or empty.
*/
minforkoff = max_t(int64_t, dsize, xfs_bmdr_space_calc(MINDBTPTRS));
minforkoff = roundup(minforkoff, 8) >> 3;
/* attr fork btree root can have at least this many key/ptr pairs */
maxforkoff = XFS_LITINO(mp) - xfs_bmdr_space_calc(MINABTPTRS);
maxforkoff = maxforkoff >> 3; /* rounded down */
if (offset >= maxforkoff) return maxforkoff; if (offset >= minforkoff) return offset; return 0;
}
/* * Switch on the ATTR2 superblock bit (implies also FEATURES2) unless: * - noattr2 mount option is set, * - on-disk version bit says it is already set, or * - the attr2 mount option is not set to enable automatic upgrade from attr1.
*/ STATICvoid
xfs_sbversion_add_attr2( struct xfs_mount *mp, struct xfs_trans *tp)
{ if (xfs_has_noattr2(mp)) return; if (mp->m_sb.sb_features2 & XFS_SB_VERSION2_ATTR2BIT) return; if (!xfs_has_attr2(mp)) return;
/* * Return the entry if the attr in args is found, or NULL if not.
*/ struct xfs_attr_sf_entry *
xfs_attr_sf_findname( struct xfs_da_args *args)
{ struct xfs_attr_sf_hdr *sf = args->dp->i_af.if_data; struct xfs_attr_sf_entry *sfe;
/* * After the last attribute is removed revert to original inode format, * making all literal area available to the data fork once more.
*/ void
xfs_attr_fork_remove( struct xfs_inode *ip, struct xfs_trans *tp)
{
ASSERT(ip->i_af.if_nextents == 0);
/* * Remove an attribute from the shortform attribute list structure.
*/ int
xfs_attr_sf_removename( struct xfs_da_args *args)
{ struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; struct xfs_attr_sf_hdr *sf = dp->i_af.if_data; struct xfs_attr_sf_entry *sfe;
uint16_t totsize = be16_to_cpu(sf->totsize); void *next, *end; int size = 0;
trace_xfs_attr_sf_remove(args);
sfe = xfs_attr_sf_findname(args); if (!sfe) { /* * If we are recovering an operation, finding nothing to remove * is not an error, it just means there was nothing to clean up.
*/ if (args->op_flags & XFS_DA_OP_RECOVERY) return 0; return -ENOATTR;
}
/* * Fix up the attribute fork data, covering the hole
*/
size = xfs_attr_sf_entsize(sfe);
next = xfs_attr_sf_nextentry(sfe);
end = xfs_attr_sf_endptr(sf); if (next < end)
memmove(sfe, next, end - next);
sf->count--;
totsize -= size;
sf->totsize = cpu_to_be16(totsize);
/* * Retrieve the attribute value and length. * * If args->valuelen is zero, only the length needs to be returned. Unlike a * lookup, we only return an error if the attribute does not exist or we can't * retrieve the value.
*/ int
xfs_attr_shortform_getvalue( struct xfs_da_args *args)
{ struct xfs_attr_sf_entry *sfe;
/* * Check a leaf attribute block to see if all the entries would fit into * a shortform attribute list.
*/ int
xfs_attr_shortform_allfit( struct xfs_buf *bp, struct xfs_inode *dp)
{ struct xfs_attr_leafblock *leaf; struct xfs_attr_leaf_entry *entry;
xfs_attr_leaf_name_local_t *name_loc; struct xfs_attr3_icleaf_hdr leafhdr; int bytes; int i; struct xfs_mount *mp = bp->b_mount;
bytes = sizeof(struct xfs_attr_sf_hdr); for (i = 0; i < leafhdr.count; entry++, i++) { if (entry->flags & XFS_ATTR_INCOMPLETE) continue; /* don't copy partial entries */ if (!(entry->flags & XFS_ATTR_LOCAL)) return 0;
name_loc = xfs_attr3_leaf_name_local(leaf, i); if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX) return 0; if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX) return 0;
bytes += xfs_attr_sf_entsize_byname(name_loc->namelen,
be16_to_cpu(name_loc->valuelen));
} if (xfs_has_attr2(dp->i_mount) &&
(dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
(bytes == sizeof(struct xfs_attr_sf_hdr))) return -1; return xfs_attr_shortform_bytesfit(dp, bytes);
}
/* Verify the consistency of a raw inline attribute fork. */
xfs_failaddr_t
xfs_attr_shortform_verify( struct xfs_attr_sf_hdr *sfp,
size_t size)
{ struct xfs_attr_sf_entry *sfep = xfs_attr_sf_firstentry(sfp); struct xfs_attr_sf_entry *next_sfep; char *endp; int i;
/* * Give up if the attribute is way too short.
*/ if (size < sizeof(struct xfs_attr_sf_hdr)) return __this_address;
endp = (char *)sfp + size;
/* Check all reported entries */ for (i = 0; i < sfp->count; i++) { /* * struct xfs_attr_sf_entry has a variable length. * Check the fixed-offset parts of the structure are * within the data buffer. * xfs_attr_sf_entry is defined with a 1-byte variable * array at the end, so we must subtract that off.
*/ if (((char *)sfep + sizeof(*sfep)) >= endp) return __this_address;
/* Don't allow names with known bad length. */ if (sfep->namelen == 0) return __this_address;
/* * Check that the variable-length part of the structure is * within the data buffer. The next entry starts after the * name component, so nextentry is an acceptable test.
*/
next_sfep = xfs_attr_sf_nextentry(sfep); if ((char *)next_sfep > endp) return __this_address;
/* * Check for unknown flags. Short form doesn't support * the incomplete or local bits, so we can use the namespace * mask here.
*/ if (sfep->flags & ~XFS_ATTR_NSP_ONDISK_MASK) return __this_address;
/* * Check for invalid namespace combinations. We only allow * one namespace flag per xattr, so we can just count the * bits (i.e. hweight) here.
*/ if (!xfs_attr_check_namespace(sfep->flags)) return __this_address;
/* XXX (dgc): buffer is about to be marked stale - why zero it? */
memset(bp->b_addr, 0, args->geo->blksize);
/* * Clean out the prior contents of the attribute list.
*/
error = xfs_da_shrink_inode(args, 0, bp); if (error) goto out;
if (forkoff == -1) { /* * Don't remove the attr fork if this operation is the first * part of a attr replace operations. We're going to add a new * attr immediately, so we need to keep the attr fork around in * this case.
*/ if (!(args->op_flags & XFS_DA_OP_REPLACE)) {
ASSERT(xfs_has_attr2(dp->i_mount));
ASSERT(dp->i_df.if_format != XFS_DINODE_FMT_BTREE);
xfs_attr_fork_remove(dp, args->trans);
} goto out;
}
/* * Copy leaf to new buffer and log it.
*/
xfs_da_buf_copy(bp2, bp1, args->geo->blksize);
xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1);
/* * Set up the new root node.
*/
error = xfs_da3_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK); if (error) goto out;
node = bp1->b_addr;
xfs_da3_node_hdr_from_disk(mp, &icnodehdr, node);
/*======================================================================== * Routines used for growing the Btree.
*========================================================================*/
/* * Create the initial contents of a leaf attribute list * or a leaf in a node attribute list.
*/ STATICint
xfs_attr3_leaf_create( struct xfs_da_args *args,
xfs_dablk_t blkno, struct xfs_buf **bpp)
{ struct xfs_attr_leafblock *leaf; struct xfs_attr3_icleaf_hdr ichdr; struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; struct xfs_buf *bp; int error;
/* * Split the leaf node, rebalance, then add the new entry. * * Returns 0 if the entry was added, 1 if a further split is needed or a * negative error number otherwise.
*/ int
xfs_attr3_leaf_split( struct xfs_da_state *state, struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk)
{ bool added;
xfs_dablk_t blkno; int error;
trace_xfs_attr_leaf_split(state->args);
/* * Allocate space for a new leaf node.
*/
ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC);
error = xfs_da_grow_inode(state->args, &blkno); if (error) return error;
error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp); if (error) return error;
newblk->blkno = blkno;
newblk->magic = XFS_ATTR_LEAF_MAGIC;
/* * Rebalance the entries across the two leaves. * NOTE: rebalance() currently depends on the 2nd block being empty.
*/
xfs_attr3_leaf_rebalance(state, oldblk, newblk);
error = xfs_da3_blk_link(state, oldblk, newblk); if (error) return error;
/* * Save info on "old" attribute for "atomic rename" ops, leaf_add() * modifies the index/blkno/rmtblk/rmtblkcnt fields to show the * "new" attrs info. Will need the "old" info to remove it later. * * Insert the "new" entry in the correct block.
*/ if (state->inleaf) {
trace_xfs_attr_leaf_add_old(state->args);
added = xfs_attr3_leaf_add(oldblk->bp, state->args);
} else {
trace_xfs_attr_leaf_add_new(state->args);
added = xfs_attr3_leaf_add(newblk->bp, state->args);
}
/* * Update last hashval in each block since we added the name.
*/
oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL); if (!added) return 1; return 0;
}
/* * Add a name to the leaf attribute list structure.
*/ bool
xfs_attr3_leaf_add( struct xfs_buf *bp, struct xfs_da_args *args)
{ struct xfs_attr_leafblock *leaf; struct xfs_attr3_icleaf_hdr ichdr; int tablesize; int entsize; bool added = true; int sum; int tmp; int i;
/* * Search through freemap for first-fit on new name length. * (may need to figure in size of entry struct too)
*/
tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t)
+ xfs_attr3_leaf_hdr_size(leaf); for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) { if (tablesize > ichdr.firstused) {
sum += ichdr.freemap[i].size; continue;
} if (!ichdr.freemap[i].size) continue; /* no space in this map */
tmp = entsize; if (ichdr.freemap[i].base < ichdr.firstused)
tmp += sizeof(xfs_attr_leaf_entry_t); if (ichdr.freemap[i].size >= tmp) {
xfs_attr3_leaf_add_work(bp, &ichdr, args, i); goto out_log_hdr;
}
sum += ichdr.freemap[i].size;
}
/* * If there are no holes in the address space of the block, * and we don't have enough freespace, then compaction will do us * no good and we should just give up.
*/ if (!ichdr.holes && sum < entsize) returnfalse;
/* * Compact the entries to coalesce free space. * This may change the hdr->count via dropping INCOMPLETE entries.
*/
xfs_attr3_leaf_compact(args, &ichdr, bp);
/* * After compaction, the block is guaranteed to have only one * free region, in freemap[0]. If it is not big enough, give up.
*/ if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) {
added = false; goto out_log_hdr;
}
/* * Force open some space in the entry array and fill it in.
*/
entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; if (args->index < ichdr->count) {
tmp = ichdr->count - args->index;
tmp *= sizeof(xfs_attr_leaf_entry_t);
memmove(entry + 1, entry, tmp);
xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
}
ichdr->count++;
/* * Allocate space for the new string (at the end of the run).
*/
mp = args->trans->t_mountp;
ASSERT(ichdr->freemap[mapindex].base < args->geo->blksize);
ASSERT((ichdr->freemap[mapindex].base & 0x3) == 0);
ASSERT(ichdr->freemap[mapindex].size >=
xfs_attr_leaf_newentsize(args, NULL));
ASSERT(ichdr->freemap[mapindex].size < args->geo->blksize);
ASSERT((ichdr->freemap[mapindex].size & 0x3) == 0);
/* * For "remote" attribute values, simply note that we need to * allocate space for the "remote" value. We can't actually * allocate the extents in this transaction, and we can't decide * which blocks they should be as we might allocate more blocks * as part of this transaction (a split operation for example).
*/ if (entry->flags & XFS_ATTR_LOCAL) {
name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
name_loc->namelen = args->namelen;
name_loc->valuelen = cpu_to_be16(args->valuelen);
memcpy((char *)name_loc->nameval, args->name, args->namelen);
memcpy((char *)&name_loc->nameval[args->namelen], args->value,
be16_to_cpu(name_loc->valuelen));
} else {
name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
name_rmt->namelen = args->namelen;
memcpy((char *)name_rmt->name, args->name, args->namelen);
entry->flags |= XFS_ATTR_INCOMPLETE; /* just in case */
name_rmt->valuelen = 0;
name_rmt->valueblk = 0;
args->rmtblkno = 1;
args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
args->rmtvaluelen = args->valuelen;
}
xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
xfs_attr_leaf_entsize(leaf, args->index)));
/* * Update the control info for this leaf node
*/ if (be16_to_cpu(entry->nameidx) < ichdr->firstused)
ichdr->firstused = be16_to_cpu(entry->nameidx);
/* * Copy the on-disk header back into the destination buffer to ensure * all the information in the header that is not part of the incore * header structure is preserved.
*/
memcpy(bp->b_addr, tmpbuffer, xfs_attr3_leaf_hdr_size(leaf_src));
/* write the header back to initialise the underlying buffer */
xfs_attr3_leaf_hdr_to_disk(args->geo, leaf_dst, ichdr_dst);
/* * Copy all entry's in the same (sorted) order, * but allocate name/value pairs packed and in sequence.
*/
xfs_attr3_leaf_moveents(args, leaf_src, &ichdr_src, 0,
leaf_dst, ichdr_dst, 0, ichdr_src.count); /* * this logs the entire buffer, but the caller must write the header * back to the buffer when it is finished modifying it.
*/
xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1);
kvfree(tmpbuffer);
}
/* * Compare two leaf blocks "order". * Return 0 unless leaf2 should go before leaf1.
*/ staticint
xfs_attr3_leaf_order( struct xfs_buf *leaf1_bp, struct xfs_attr3_icleaf_hdr *leaf1hdr, struct xfs_buf *leaf2_bp, struct xfs_attr3_icleaf_hdr *leaf2hdr)
{ struct xfs_attr_leaf_entry *entries1; struct xfs_attr_leaf_entry *entries2;
/* * Redistribute the attribute list entries between two leaf nodes, * taking into account the size of the new entry. * * NOTE: if new block is empty, then it will get the upper half of the * old block. At present, all (one) callers pass in an empty second block. * * This code adjusts the args->index/blkno and args->index2/blkno2 fields * to match what it is doing in splitting the attribute leaf block. Those * values are used in "atomic rename" operations on attributes. Note that * the "new" and "old" values can end up in different blocks.
*/ STATICvoid
xfs_attr3_leaf_rebalance( struct xfs_da_state *state, struct xfs_da_state_blk *blk1, struct xfs_da_state_blk *blk2)
{ struct xfs_da_args *args; struct xfs_attr_leafblock *leaf1; struct xfs_attr_leafblock *leaf2; struct xfs_attr3_icleaf_hdr ichdr1; struct xfs_attr3_icleaf_hdr ichdr2; struct xfs_attr_leaf_entry *entries1; struct xfs_attr_leaf_entry *entries2; int count; int totallen; int max; int space; int swap;
/* * Check ordering of blocks, reverse if it makes things simpler. * * NOTE: Given that all (current) callers pass in an empty * second block, this code should never set "swap".
*/
swap = 0; if (xfs_attr3_leaf_order(blk1->bp, &ichdr1, blk2->bp, &ichdr2)) {
swap(blk1, blk2);
/* swap structures rather than reconverting them */
swap(ichdr1, ichdr2);
/* * Examine entries until we reduce the absolute difference in * byte usage between the two blocks to a minimum. Then get * the direction to copy and the number of elements to move. * * "inleaf" is true if the new entry should be inserted into blk1. * If "swap" is also true, then reverse the sense of "inleaf".
*/
state->inleaf = xfs_attr3_leaf_figure_balance(state, blk1, &ichdr1,
blk2, &ichdr2,
&count, &totallen); if (swap)
state->inleaf = !state->inleaf;
/* * Move any entries required from leaf to leaf:
*/ if (count < ichdr1.count) { /* * Figure the total bytes to be added to the destination leaf.
*/ /* number entries being moved */
count = ichdr1.count - count;
space = ichdr1.usedbytes - totallen;
space += count * sizeof(xfs_attr_leaf_entry_t);
/* * leaf2 is the destination, compact it if it looks tight.
*/
max = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1);
max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t); if (space > max)
xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp);
/* * Move high entries from leaf1 to low end of leaf2.
*/
xfs_attr3_leaf_moveents(args, leaf1, &ichdr1,
ichdr1.count - count, leaf2, &ichdr2, 0, count);
} elseif (count > ichdr1.count) { /* * I assert that since all callers pass in an empty * second buffer, this code should never execute.
*/
ASSERT(0);
/* * Figure the total bytes to be added to the destination leaf.
*/ /* number entries being moved */
count -= ichdr1.count;
space = totallen - ichdr1.usedbytes;
space += count * sizeof(xfs_attr_leaf_entry_t);
/* * leaf1 is the destination, compact it if it looks tight.
*/
max = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1);
max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t); if (space > max)
xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp);
/* * Move low entries from leaf2 to high end of leaf1.
*/
xfs_attr3_leaf_moveents(args, leaf2, &ichdr2, 0, leaf1, &ichdr1,
ichdr1.count, count);
}
/* * Copy out last hashval in each block for B-tree code.
*/
entries1 = xfs_attr3_leaf_entryp(leaf1);
entries2 = xfs_attr3_leaf_entryp(leaf2);
blk1->hashval = be32_to_cpu(entries1[ichdr1.count - 1].hashval);
blk2->hashval = be32_to_cpu(entries2[ichdr2.count - 1].hashval);
/* * Adjust the expected index for insertion. * NOTE: this code depends on the (current) situation that the * second block was originally empty. * * If the insertion point moved to the 2nd block, we must adjust * the index. We must also track the entry just following the * new entry for use in an "atomic rename" operation, that entry * is always the "old" entry and the "new" entry is what we are * inserting. The index/blkno fields refer to the "old" entry, * while the index2/blkno2 fields refer to the "new" entry.
*/ if (blk1->index > ichdr1.count) {
ASSERT(state->inleaf == 0);
blk2->index = blk1->index - ichdr1.count;
args->index = args->index2 = blk2->index;
args->blkno = args->blkno2 = blk2->blkno;
} elseif (blk1->index == ichdr1.count) { if (state->inleaf) {
args->index = blk1->index;
args->blkno = blk1->blkno;
args->index2 = 0;
args->blkno2 = blk2->blkno;
} else { /* * On a double leaf split, the original attr location * is already stored in blkno2/index2, so don't * overwrite it overwise we corrupt the tree.
*/
blk2->index = blk1->index - ichdr1.count;
args->index = blk2->index;
args->blkno = blk2->blkno; if (!state->extravalid) { /* * set the new attr location to match the old * one and let the higher level split code * decide where in the leaf to place it.
*/
args->index2 = blk2->index;
args->blkno2 = blk2->blkno;
}
}
} else {
ASSERT(state->inleaf == 1);
args->index = args->index2 = blk1->index;
args->blkno = args->blkno2 = blk1->blkno;
}
}
/* * Examine entries until we reduce the absolute difference in * byte usage between the two blocks to a minimum. * GROT: Is this really necessary? With other than a 512 byte blocksize, * GROT: there will always be enough room in either block for a new entry. * GROT: Do a double-split for this case?
*/ STATICint
xfs_attr3_leaf_figure_balance( struct xfs_da_state *state, struct xfs_da_state_blk *blk1, struct xfs_attr3_icleaf_hdr *ichdr1, struct xfs_da_state_blk *blk2, struct xfs_attr3_icleaf_hdr *ichdr2, int *countarg, int *usedbytesarg)
{ struct xfs_attr_leafblock *leaf1 = blk1->bp->b_addr; struct xfs_attr_leafblock *leaf2 = blk2->bp->b_addr; struct xfs_attr_leaf_entry *entry; int count; int max; int index; int totallen = 0; int half; int lastdelta; int foundit = 0; int tmp;
/* * Examine entries until we reduce the absolute difference in * byte usage between the two blocks to a minimum.
*/
max = ichdr1->count + ichdr2->count;
half = (max + 1) * sizeof(*entry);
half += ichdr1->usedbytes + ichdr2->usedbytes +
xfs_attr_leaf_newentsize(state->args, NULL);
half /= 2;
lastdelta = state->args->geo->blksize;
entry = xfs_attr3_leaf_entryp(leaf1); for (count = index = 0; count < max; entry++, index++, count++) {
#define XFS_ATTR_ABS(A) (((A) < 0) ? -(A) : (A)) /* * The new entry is in the first block, account for it.
*/ if (count == blk1->index) {
tmp = totallen + sizeof(*entry) +
xfs_attr_leaf_newentsize(state->args, NULL); if (XFS_ATTR_ABS(half - tmp) > lastdelta) break;
lastdelta = XFS_ATTR_ABS(half - tmp);
totallen = tmp;
foundit = 1;
}
/* * Wrap around into the second block if necessary.
*/ if (count == ichdr1->count) {
leaf1 = leaf2;
entry = xfs_attr3_leaf_entryp(leaf1);
index = 0;
}
/* * Figure out if next leaf entry would be too much.
*/
tmp = totallen + sizeof(*entry) + xfs_attr_leaf_entsize(leaf1,
index); if (XFS_ATTR_ABS(half - tmp) > lastdelta) break;
lastdelta = XFS_ATTR_ABS(half - tmp);
totallen = tmp; #undef XFS_ATTR_ABS
}
/* * Calculate the number of usedbytes that will end up in lower block. * If new entry not in lower block, fix up the count.
*/
totallen -= count * sizeof(*entry); if (foundit) {
totallen -= sizeof(*entry) +
xfs_attr_leaf_newentsize(state->args, NULL);
}
/*======================================================================== * Routines used for shrinking the Btree.
*========================================================================*/
/* * Check a leaf block and its neighbors to see if the block should be * collapsed into one or the other neighbor. Always keep the block * with the smaller block number. * If the current block is over 50% full, don't try to join it, return 0. * If the block is empty, fill in the state structure and return 2. * If it can be collapsed, fill in the state structure and return 1. * If nothing can be done, return 0. * * GROT: allow for INCOMPLETE entries in calculation.
*/ int
xfs_attr3_leaf_toosmall( struct xfs_da_state *state, int *action)
{ struct xfs_attr_leafblock *leaf; struct xfs_da_state_blk *blk; struct xfs_attr3_icleaf_hdr ichdr; struct xfs_buf *bp;
xfs_dablk_t blkno; int bytes; int forward; int error; int retval; int i;
trace_xfs_attr_leaf_toosmall(state->args);
/* * Check for the degenerate case of the block being over 50% full. * If so, it's not worth even looking to see if we might be able * to coalesce with a sibling.
*/
blk = &state->path.blk[ state->path.active-1 ];
leaf = blk->bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr, leaf);
bytes = xfs_attr3_leaf_hdr_size(leaf) +
ichdr.count * sizeof(xfs_attr_leaf_entry_t) +
ichdr.usedbytes; if (bytes > (state->args->geo->blksize >> 1)) {
*action = 0; /* blk over 50%, don't try to join */ return 0;
}
/* * Check for the degenerate case of the block being empty. * If the block is empty, we'll simply delete it, no need to * coalesce it with a sibling block. We choose (arbitrarily) * to merge with the forward block unless it is NULL.
*/ if (ichdr.count == 0) { /* * Make altpath point to the block we want to keep and * path point to the block we want to drop (this one).
*/
forward = (ichdr.forw != 0);
memcpy(&state->altpath, &state->path, sizeof(state->path));
error = xfs_da3_path_shift(state, &state->altpath, forward,
0, &retval); if (error) return error; if (retval) {
*action = 0;
} else {
*action = 2;
} return 0;
}
/* * Examine each sibling block to see if we can coalesce with * at least 25% free space to spare. We need to figure out * whether to merge with the forward or the backward block. * We prefer coalescing with the lower numbered sibling so as * to shrink an attribute list over time.
*/ /* start with smaller blk num */
forward = ichdr.forw < ichdr.back; for (i = 0; i < 2; forward = !forward, i++) { struct xfs_attr3_icleaf_hdr ichdr2; if (forward)
blkno = ichdr.forw; else
blkno = ichdr.back; if (blkno == 0) continue;
error = xfs_attr3_leaf_read(state->args->trans, state->args->dp,
state->args->owner, blkno, &bp); if (error) return error;
xfs_trans_brelse(state->args->trans, bp); if (bytes >= 0) break; /* fits with at least 25% to spare */
} if (i >= 2) {
*action = 0; return 0;
}
/* * Make altpath point to the block we want to keep (the lower * numbered block) and path point to the block we want to drop.
*/
memcpy(&state->altpath, &state->path, sizeof(state->path)); if (blkno < blk->blkno) {
error = xfs_da3_path_shift(state, &state->altpath, forward,
0, &retval);
} else {
error = xfs_da3_path_shift(state, &state->path, forward,
0, &retval);
} if (error) return error; if (retval) {
*action = 0;
} else {
*action = 1;
} return 0;
}
/* * Remove a name from the leaf attribute list structure. * * Return 1 if leaf is less than 37% full, 0 if >= 37% full. * If two leaves are 37% full, when combined they will leave 25% free.
*/ int
xfs_attr3_leaf_remove( struct xfs_buf *bp, struct xfs_da_args *args)
{ struct xfs_attr_leafblock *leaf; struct xfs_attr3_icleaf_hdr ichdr; struct xfs_attr_leaf_entry *entry; int before; int after; int smallest; int entsize; int tablesize; int tmp; int i;
/* * Scan through free region table: * check for adjacency of free'd entry with an existing one, * find smallest free region in case we need to replace it, * adjust any map that borders the entry table,
*/
tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t)
+ xfs_attr3_leaf_hdr_size(leaf);
tmp = ichdr.freemap[0].size;
before = after = -1;
smallest = XFS_ATTR_LEAF_MAPSIZE - 1;
entsize = xfs_attr_leaf_entsize(leaf, args->index); for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
ASSERT(ichdr.freemap[i].base < args->geo->blksize);
ASSERT(ichdr.freemap[i].size < args->geo->blksize); if (ichdr.freemap[i].base == tablesize) {
ichdr.freemap[i].base -= sizeof(xfs_attr_leaf_entry_t);
ichdr.freemap[i].size += sizeof(xfs_attr_leaf_entry_t);
}
/* * If we removed the first entry, re-find the first used byte * in the name area. Note that if the entry was the "firstused", * then we don't have a "hole" in our block resulting from * removing the name.
*/ if (smallest) {
tmp = args->geo->blksize;
entry = xfs_attr3_leaf_entryp(leaf); for (i = ichdr.count - 1; i >= 0; entry++, i--) {
ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused);
ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize);
/* * Check if leaf is less than 50% full, caller may want to * "join" the leaf with a sibling if so.
*/
tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) +
ichdr.count * sizeof(xfs_attr_leaf_entry_t);
return tmp < args->geo->magicpct; /* leaf is < 37% full */
}
/* * Move all the attribute list entries from drop_leaf into save_leaf.
*/ void
xfs_attr3_leaf_unbalance( struct xfs_da_state *state, struct xfs_da_state_blk *drop_blk, struct xfs_da_state_blk *save_blk)
{ struct xfs_attr_leafblock *drop_leaf = drop_blk->bp->b_addr; struct xfs_attr_leafblock *save_leaf = save_blk->bp->b_addr; struct xfs_attr3_icleaf_hdr drophdr; struct xfs_attr3_icleaf_hdr savehdr; struct xfs_attr_leaf_entry *entry;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.