/* * xt_getpage() * * function: get the page buffer for a specified block address. * * parameters: * ip - pointer to the inode * bn - block number (s64) of the xtree page to be retrieved; * mp - pointer to a metapage pointer where the page buffer is returned; * * returns: * A pointer to the xtree page (xtpage_t) on success, -EIO on error.
*/
/* * xtLookup() * * function: map a single page into a physical extent;
*/ int xtLookup(struct inode *ip, s64 lstart,
s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
{ int rc = 0; struct btstack btstack; int cmp;
s64 bn; struct metapage *mp;
xtpage_t *p; int index;
xad_t *xad;
s64 next, size, xoff, xend; int xlen;
s64 xaddr;
*paddr = 0;
*plen = llen;
if (!no_check) { /* is lookup offset beyond eof ? */
size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
JFS_SBI(ip->i_sb)->l2bsize; if (lstart >= size) return 0;
}
/* * search for the xad entry covering the logical extent
*/ //search: if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
jfs_err("xtLookup: xtSearch returned %d", rc); return rc;
}
/* * compute the physical extent covering logical extent * * N.B. search may have failed (e.g., hole in sparse file), * and returned the index of the next entry.
*/ /* retrieve search result */
XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
/* is xad found covering start of logical extent ? * lstart is a page start address, * i.e., lstart cannot start in a hole;
*/ if (cmp) { if (next)
*plen = min(next - lstart, llen); goto out;
}
/* initialize new pxd */
*pflag = xad->flag;
*paddr = xaddr + (lstart - xoff); /* a page must be fully covered by an xad */
*plen = min(xend - lstart, llen);
out:
XT_PUTPAGE(mp);
return rc;
}
/* * xtSearch() * * function: search for the xad entry covering specified offset. * * parameters: * ip - file object; * xoff - extent offset; * nextp - address of next extent (if any) for search miss * cmpp - comparison result: * btstack - traverse stack; * flag - search process flag (XT_INSERT); * * returns: * btstack contains (bn, index) of search path traversed to the entry. * *cmpp is set to result of comparison with the entry returned. * the page containing the entry is pinned at exit.
*/ staticint xtSearch(struct inode *ip, s64 xoff, s64 *nextp, int *cmpp, struct btstack * btstack, int flag)
{ struct jfs_inode_info *jfs_ip = JFS_IP(ip); int cmp = 1; /* init for empty page */
s64 bn; /* block number */ struct metapage *mp; /* page buffer */
xtpage_t *p; /* page */
xad_t *xad; int base, index, lim, btindex; struct btframe *btsp; int nsplit = 0; /* number of pages to split */
s64 t64;
s64 next = 0;
INCREMENT(xtStat.search);
BT_CLR(btstack);
btstack->nsplit = 0;
/* * search down tree from root: * * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of * internal page, child page Pi contains entry with k, Ki <= K < Kj. * * if entry with search key K is not found * internal page search find the entry with largest key Ki * less than K which point to the child page to search; * leaf page search find the entry with smallest key Kj * greater than K so that the returned index is the position of * the entry to be shifted right for insertion of new entry. * for empty tree, search key is greater than any key of the tree. * * by convention, root bn = 0.
*/ for (bn = 0;;) { /* get/pin the page to search */
p = xt_getpage(ip, bn, &mp); if (IS_ERR(p)) return PTR_ERR(p);
/* try sequential access heuristics with the previous * access entry in target leaf page: * once search narrowed down into the target leaf, * key must either match an entry in the leaf or * key entry does not exist in the tree;
*/ //fastSearch: if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
(p->header.flag & BT_LEAF) &&
(index = jfs_ip->btindex) <
le16_to_cpu(p->header.nextindex)) {
xad = &p->xad[index];
t64 = offsetXAD(xad); if (xoff < t64 + lengthXAD(xad)) { if (xoff >= t64) {
*cmpp = 0; goto out;
}
/* try next sequential entry */
index++; if (index <
le16_to_cpu(p->header.nextindex)) {
xad++;
t64 = offsetXAD(xad); if (xoff < t64 + lengthXAD(xad)) { if (xoff >= t64) {
*cmpp = 0; goto out;
}
/* miss: key falls between * previous and this entry
*/
*cmpp = 1;
next = t64; goto out;
}
/* (xoff >= t64 + lengthXAD(xad)); * matching entry may be further out: * stop heuristic search
*/ /* stop sequential access heuristics */ goto binarySearch;
}
/* (index == p->header.nextindex); * miss: key entry does not exist in * the target leaf/tree
*/
*cmpp = 1; goto out;
}
/* * if hit, return index of the entry found, and * if miss, where new entry with search key is * to be inserted;
*/
out: /* compute number of pages to split */ if (flag & XT_INSERT) { if (p->header.nextindex == /* little-endian */
p->header.maxentry)
nsplit++; else
nsplit = 0;
btstack->nsplit = nsplit;
}
/* save search result */
btsp = btstack->top;
btsp->bn = bn;
btsp->index = index;
btsp->mp = mp;
return 0;
} /* search hit - internal page: * descend/search its child page
*/ if (index < le16_to_cpu(p->header.nextindex)-1)
next = offsetXAD(&p->xad[index + 1]); goto next;
}
if (cmp > 0) {
base = index + 1;
--lim;
}
}
/* * search miss * * base is the smallest index with key (Kj) greater than * search key (K) and may be zero or maxentry index.
*/ if (base < le16_to_cpu(p->header.nextindex))
next = offsetXAD(&p->xad[base]); /* * search miss - leaf page: * * return location of entry (base) where new entry with * search key K is to be inserted.
*/ if (p->header.flag & BT_LEAF) {
*cmpp = cmp;
/* compute number of pages to split */ if (flag & XT_INSERT) { if (p->header.nextindex ==
p->header.maxentry)
nsplit++; else
nsplit = 0;
btstack->nsplit = nsplit;
}
/* save search result */
btsp = btstack->top;
btsp->bn = bn;
btsp->index = base;
btsp->mp = mp;
/* * search miss - non-leaf page: * * if base is non-zero, decrement base by one to get the parent * entry of the child page to search.
*/
index = base ? base - 1 : base;
/* * go down to child page
*/
next: /* update number of pages to split */ if (p->header.nextindex == p->header.maxentry)
nsplit++; else
nsplit = 0;
/* push (bn, index) of the parent page/entry */ if (BT_STACK_FULL(btstack)) {
jfs_error(ip->i_sb, "stack overrun!\n");
XT_PUTPAGE(mp); return -EIO;
}
BT_PUSH(btstack, bn, index);
/* get the child page block number */
bn = addressXAD(&p->xad[index]);
/* unpin the parent page */
XT_PUTPAGE(mp);
}
}
/* * xtInsert() * * function: * * parameter: * tid - transaction id; * ip - file object; * xflag - extent flag (XAD_NOTRECORDED): * xoff - extent offset; * xlen - extent length; * xaddrp - extent address pointer (in/out): * if (*xaddrp) * caller allocated data extent at *xaddrp; * else * allocate data extent and return its xaddr; * flag - * * return:
*/ int xtInsert(tid_t tid, /* transaction id */ struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, int flag)
{ int rc = 0;
s64 xaddr, hint; struct metapage *mp; /* meta-page buffer */
xtpage_t *p; /* base B+-tree index page */
s64 bn; int index, nextindex; struct btstack btstack; /* traverse stack */ struct xtsplit split; /* split information */
xad_t *xad; int cmp;
s64 next; struct tlock *tlck; struct xtlock *xtlck;
/* * search for the entry location at which to insert: * * xtFastSearch() and xtSearch() both returns (leaf page * pinned, index at which to insert). * n.b. xtSearch() may return index of maxentry of * the full page.
*/ if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) return rc;
/* This test must follow XT_GETSEARCH since mp must be valid if
* we branch to out: */ if ((cmp == 0) || (next && (xlen > next - xoff))) {
rc = -EEXIST; goto out;
}
/* * allocate data extent requested * * allocation hint: last xad
*/ if ((xaddr = *xaddrp) == 0) { if (index > XTENTRYSTART) {
xad = &p->xad[index - 1];
hint = addressXAD(xad) + lengthXAD(xad) - 1;
} else
hint = 0; if ((rc = dquot_alloc_block(ip, xlen))) goto out; if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
dquot_free_block(ip, xlen); goto out;
}
}
/* * insert entry for new extent
*/
xflag |= XAD_NEW;
/* * if the leaf page is full, split the page and * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page.
*/
nextindex = le16_to_cpu(p->header.nextindex); if (nextindex == le16_to_cpu(p->header.maxentry)) {
split.mp = mp;
split.index = index;
split.flag = xflag;
split.off = xoff;
split.len = xlen;
split.addr = xaddr;
split.pxdlist = NULL; if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { /* undo data extent allocation */ if (*xaddrp == 0) {
dbFree(ip, xaddr, (s64) xlen);
dquot_free_block(ip, xlen);
} return rc;
}
*xaddrp = xaddr; return 0;
}
/* * insert the new entry into the leaf page
*/ /* * acquire a transaction lock on the leaf page; * * action: xad insertion/extension;
*/
BT_MARK_DIRTY(mp, ip);
/* if insert into middle, shift right remaining entries. */ if (index < nextindex)
memmove(&p->xad[index + 1], &p->xad[index],
(nextindex - index) * sizeof(xad_t));
/* insert the new entry: mark the entry NEW */
xad = &p->xad[index];
XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
/* advance next available entry index */
le16_add_cpu(&p->header.nextindex, 1);
/* Don't log it if there are no links to the file */ if (!test_cflag(COMMIT_Nolink, ip)) {
tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
xtlck = (struct xtlock *) & tlck->lock;
xtlck->lwm.offset =
(xtlck->lwm.offset) ? min(index,
(int)xtlck->lwm.offset) : index;
xtlck->lwm.length =
le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
}
*xaddrp = xaddr;
out: /* unpin the leaf page */
XT_PUTPAGE(mp);
return rc;
}
/* * xtSplitUp() * * function: * split full pages as propagating insertion up the tree * * parameter: * tid - transaction id; * ip - file object; * split - entry parameter descriptor; * btstack - traverse stack from xtSearch() * * return:
*/ staticint
xtSplitUp(tid_t tid, struct inode *ip, struct xtsplit * split, struct btstack * btstack)
{ int rc = 0; struct metapage *smp;
xtpage_t *sp; /* split page */ struct metapage *rmp;
s64 rbn; /* new right page block number */ struct metapage *rcmp;
xtpage_t *rcp; /* right child page */
s64 rcbn; /* right child page block number */ int skip; /* index of entry of insertion */ int nextindex; /* next available entry index of p */ struct btframe *parent; /* parent page entry on traverse stack */
xad_t *xad;
s64 xaddr; int xlen; int nsplit; /* number of pages split */ struct pxdlist pxdlist;
pxd_t *pxd; struct tlock *tlck; struct xtlock *xtlck;
smp = split->mp;
sp = XT_PAGE(ip, smp);
/* is inode xtree root extension/inline EA area free ? */ if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
(le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
(JFS_IP(ip)->mode2 & INLINEEA)) {
sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
JFS_IP(ip)->mode2 &= ~INLINEEA;
BT_MARK_DIRTY(smp, ip); /* * acquire a transaction lock on the leaf page; * * action: xad insertion/extension;
*/
/* if insert into middle, shift right remaining entries. */
skip = split->index;
nextindex = le16_to_cpu(sp->header.nextindex); if (skip < nextindex)
memmove(&sp->xad[skip + 1], &sp->xad[skip],
(nextindex - skip) * sizeof(xad_t));
/* insert the new entry: mark the entry NEW */
xad = &sp->xad[skip];
XT_PUTENTRY(xad, split->flag, split->off, split->len,
split->addr);
/* advance next available entry index */
le16_add_cpu(&sp->header.nextindex, 1);
/* Don't log it if there are no links to the file */ if (!test_cflag(COMMIT_Nolink, ip)) {
tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
xtlck = (struct xtlock *) & tlck->lock;
xtlck->lwm.offset = (xtlck->lwm.offset) ?
min(skip, (int)xtlck->lwm.offset) : skip;
xtlck->lwm.length =
le16_to_cpu(sp->header.nextindex) -
xtlck->lwm.offset;
}
return 0;
}
/* * allocate new index blocks to cover index page split(s) * * allocation hint: ?
*/ if (split->pxdlist == NULL) {
nsplit = btstack->nsplit;
split->pxdlist = &pxdlist;
pxdlist.maxnpxd = pxdlist.npxd = 0;
pxd = &pxdlist.pxd[0];
xlen = JFS_SBI(ip->i_sb)->nbperpage; for (; nsplit > 0; nsplit--, pxd++) { if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
== 0) {
PXDaddress(pxd, xaddr);
PXDlength(pxd, xlen);
pxdlist.maxnpxd++;
continue;
}
/* undo allocation */
XT_PUTPAGE(smp); return rc;
}
}
/* * Split leaf page <sp> into <sp> and a new right page <rp>. * * The split routines insert the new entry into the leaf page, * and acquire txLock as appropriate. * return <rp> pinned and its block number <rpbn>.
*/
rc = (sp->header.flag & BT_ROOT) ?
xtSplitRoot(tid, ip, split, &rmp) :
xtSplitPage(tid, ip, split, &rmp, &rbn);
XT_PUTPAGE(smp);
if (rc) return -EIO; /* * propagate up the router entry for the leaf page just split * * insert a router entry for the new page into the parent page, * propagate the insert/split up the tree by walking back the stack * of (bn of parent page, index of child page entry in parent page) * that were traversed during the search for the page that split. * * the propagation of insert/split up the tree stops if the root * splits or the page inserted into doesn't have to split to hold * the new entry. * * the parent entry for the split page remains the same, and * a new entry is inserted at its right with the first key and * block number of the new right page. * * There are a maximum of 3 pages pinned at any time: * right child, left parent and right parent (when the parent splits) * to keep the child page pinned while working on the parent. * make sure that all pins are released at exit.
*/ while ((parent = BT_POP(btstack)) != NULL) { /* parent page specified by stack frame <parent> */
/* * insert router entry in parent for new right child page <rp>
*/ /* get/pin the parent page <sp> */
sp = xt_getpage(ip, parent->bn, &smp); if (IS_ERR(sp)) {
XT_PUTPAGE(rcmp); return PTR_ERR(sp);
}
/* * The new key entry goes ONE AFTER the index of parent entry, * because the split was to the right.
*/
skip = parent->index + 1;
/* * split or shift right remaining entries of the parent page
*/
nextindex = le16_to_cpu(sp->header.nextindex); /* * parent page is full - split the parent page
*/ if (nextindex == le16_to_cpu(sp->header.maxentry)) { /* init for parent page split */
split->mp = smp;
split->index = skip; /* index at insert */
split->flag = XAD_NEW;
split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
split->len = JFS_SBI(ip->i_sb)->nbperpage;
split->addr = rcbn;
/* unpin previous right child page */
XT_PUTPAGE(rcmp);
/* The split routines insert the new entry, * and acquire txLock as appropriate. * return <rp> pinned and its block number <rpbn>.
*/
rc = (sp->header.flag & BT_ROOT) ?
xtSplitRoot(tid, ip, split, &rmp) :
xtSplitPage(tid, ip, split, &rmp, &rbn); if (rc) {
XT_PUTPAGE(smp); return rc;
}
XT_PUTPAGE(smp); /* keep new child page <rp> pinned */
} /* * parent page is not full - insert in parent page
*/ else { /* * insert router entry in parent for the right child * page from the first entry of the right child page:
*/ /* * acquire a transaction lock on the parent page; * * action: router xad insertion;
*/
BT_MARK_DIRTY(smp, ip);
/* * if insert into middle, shift right remaining entries
*/ if (skip < nextindex)
memmove(&sp->xad[skip + 1], &sp->xad[skip],
(nextindex -
skip) << L2XTSLOTSIZE);
BT_MARK_DIRTY(smp, ip); /* Don't log it if there are no links to the file */ if (!test_cflag(COMMIT_Nolink, ip)) { /* * acquire a transaction lock on the new right page;
*/
tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
rxtlck = (struct xtlock *) & tlck->lock;
rxtlck->lwm.offset = XTENTRYSTART; /* * acquire a transaction lock on the split page
*/
tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
sxtlck = (struct xtlock *) & tlck->lock;
}
/* * sequential append at tail (after last entry of last page) * * if splitting the last page on a level because of appending * a entry to it (skip is maxentry), it's likely that the access is * sequential. adding an empty page on the side of the level is less * work and can push the fill factor much higher than normal. * if we're wrong it's no big deal - we will do the split the right * way next time. * (it may look like it's equally easy to do a similar hack for * reverse sorted data, that is, split the tree left, but it's not. * Be my guest.)
*/ if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { /* * acquire a transaction lock on the new/right page; * * action: xad insertion;
*/ /* insert entry at the first entry of the new right page */
xad = &rp->xad[XTENTRYSTART];
XT_PUTENTRY(xad, split->flag, split->off, split->len,
split->addr);
/* * update previous pointer of old next/right page of <sp>
*/ if (nextbn != 0) {
p = xt_getpage(ip, nextbn, &mp); if (IS_ERR(p)) {
XT_PUTPAGE(rmp); return PTR_ERR(p);
}
BT_MARK_DIRTY(mp, ip); /* * acquire a transaction lock on the next page; * * action:sibling pointer update;
*/ if (!test_cflag(COMMIT_Nolink, ip))
tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
p->header.prev = cpu_to_le64(rbn);
/* sibling page may have been updated previously, or * it may be updated later;
*/
XT_PUTPAGE(mp);
}
/* * split the data between the split and new/right pages
*/
maxentry = le16_to_cpu(sp->header.maxentry);
middle = maxentry >> 1;
righthalf = maxentry - middle;
/* * skip index in old split/left page - insert into left page:
*/ if (skip <= middle) { /* move right half of split page to the new right page */
memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
righthalf << L2XTSLOTSIZE);
/* shift right tail of left half to make room for new entry */ if (skip < middle)
memmove(&sp->xad[skip + 1], &sp->xad[skip],
(middle - skip) << L2XTSLOTSIZE);
rp->header.nextindex =
cpu_to_le16(XTENTRYSTART + righthalf);
} /* * skip index in new right page - insert into right page:
*/ else { /* move left head of right half to right page */
n = skip - middle;
memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
n << L2XTSLOTSIZE);
/* insert new entry */
n += XTENTRYSTART;
xad = &rp->xad[n];
XT_PUTENTRY(xad, split->flag, split->off, split->len,
split->addr);
/* move right tail of right half to right page */ if (skip < maxentry)
memmove(&rp->xad[n + 1], &sp->xad[skip],
(maxentry - skip) << L2XTSLOTSIZE);
/* Rollback quota allocation. */ if (quota_allocation)
dquot_free_block(ip, quota_allocation);
return (rc);
}
/* * xtSplitRoot() * * function: * split the full root page into original/root/split page and new * right page * i.e., root remains fixed in tree anchor (inode) and the root is * copied to a single new right child page since root page << * non-root page, and the split root page contains a single entry * for the new right child page. * * parameter: * int tid, * struct inode *ip, * struct xtsplit *split, * struct metapage **rmpp) * * return: * Pointer to page in which to insert or NULL on error.
*/ staticint
xtSplitRoot(tid_t tid, struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
{
xtpage_t *sp; struct metapage *rmp;
xtpage_t *rp;
s64 rbn; int skip, nextindex;
xad_t *xad;
pxd_t *pxd; struct pxdlist *pxdlist; struct tlock *tlck; struct xtlock *xtlck; int rc;
/* * copy the in-line root page into new right page extent
*/
nextindex = le16_to_cpu(sp->header.maxentry);
memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
(nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
/* * insert the new entry into the new right/child page * (skip index in the new right page will not change)
*/
skip = split->index; /* if insert into middle, shift right remaining entries */ if (skip != nextindex)
memmove(&rp->xad[skip + 1], &rp->xad[skip],
(nextindex - skip) * sizeof(xad_t));
/* * reset the root * * init root with the single entry for the new right page * set the 1st entry offset to 0, which force the left-most key * at any level of the tree to be less than any search key.
*/ /* * acquire a transaction lock on the root page (in-memory inode); * * action: root split;
*/
BT_MARK_DIRTY(split->mp, ip);
if (cmp != 0) {
XT_PUTPAGE(mp);
jfs_error(ip->i_sb, "xtSearch did not find extent\n"); return -EIO;
}
/* extension must be contiguous */
xad = &p->xad[index]; if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
XT_PUTPAGE(mp);
jfs_error(ip->i_sb, "extension is not contiguous\n"); return -EIO;
}
/* * acquire a transaction lock on the leaf page; * * action: xad insertion/extension;
*/
BT_MARK_DIRTY(mp, ip); if (!test_cflag(COMMIT_Nolink, ip)) {
tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
xtlck = (struct xtlock *) & tlck->lock;
}
/* * if the leaf page is full, insert the new entry and * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page.
*/ if (nextindex == le16_to_cpu(p->header.maxentry)) { /* xtSpliUp() unpins leaf pages */
split.mp = mp;
split.index = index + 1;
split.flag = XAD_NEW;
split.off = xoff; /* split offset */
split.len = len;
split.addr = xaddr;
split.pxdlist = NULL; if ((rc = xtSplitUp(tid, ip, &split, &btstack))) return rc;
/* get back old page */
p = xt_getpage(ip, bn, &mp); if (IS_ERR(p)) return PTR_ERR(p); /* * if leaf root has been split, original root has been * copied to new child page, i.e., original entry now * resides on the new child page;
*/ if (p->header.flag & BT_INTERNAL) {
ASSERT(p->header.nextindex ==
cpu_to_le16(XTENTRYSTART + 1));
xad = &p->xad[XTENTRYSTART];
bn = addressXAD(xad);
XT_PUTPAGE(mp);
/* get new child page */
p = xt_getpage(ip, bn, &mp); if (IS_ERR(p)) return PTR_ERR(p);
BT_MARK_DIRTY(mp, ip); if (!test_cflag(COMMIT_Nolink, ip)) {
tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
xtlck = (struct xtlock *) & tlck->lock;
}
}
} /* * insert the new entry into the leaf page
*/ else { /* insert the new entry: mark the entry NEW */
xad = &p->xad[index + 1];
XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
/* advance next available entry index */
le16_add_cpu(&p->header.nextindex, 1);
}
/* get back old entry */
xad = &p->xad[index];
xlen = MAXXLEN;
/* * extend old extent
*/
extendOld:
XADlength(xad, xlen); if (!(xad->flag & XAD_NEW))
xad->flag |= XAD_EXTENDED;
/* nXAD must be completely contained within XAD */ if ((xoff > nxoff) ||
(nxoff + nxlen > xoff + xlen)) {
XT_PUTPAGE(mp);
jfs_error(ip->i_sb, "nXAD in not completely contained within XAD\n"); return -EIO;
}
index = index0;
newindex = index + 1;
nextindex = le16_to_cpu(p->header.nextindex);
if (xoff < nxoff) goto coalesceRight;
/* * coalesce with left XAD
*/ /* is XAD first entry of page ? */ if (index == XTENTRYSTART) goto replace;
/* is nXAD logically and physically contiguous with lXAD ? */
lxad = &p->xad[index - 1];
lxlen = lengthXAD(lxad); if (!(lxad->flag & XAD_NOTRECORDED) &&
(nxoff == offsetXAD(lxad) + lxlen) &&
(nxaddr == addressXAD(lxad) + lxlen) &&
(lxlen + nxlen < MAXXLEN)) { /* extend right lXAD */
index0 = index - 1;
XADlength(lxad, lxlen + nxlen);
/* If we just merged two extents together, need to make sure the * right extent gets logged. If the left one is marked XAD_NEW, * then we know it will be logged. Otherwise, mark as * XAD_EXTENDED
*/ if (!(lxad->flag & XAD_NEW))
lxad->flag |= XAD_EXTENDED;
/* If we just merged two extents together, need to make sure * the left extent gets logged. If the right one is marked * XAD_NEW, then we know it will be logged. Otherwise, mark as * XAD_EXTENDED
*/ if (!(rxad->flag & XAD_NEW))
rxad->flag |= XAD_EXTENDED;
/* get back old page */
p = xt_getpage(ip, bn, &mp); if (IS_ERR(p)) return PTR_ERR(p); /* * if leaf root has been split, original root has been * copied to new child page, i.e., original entry now * resides on the new child page;
*/ if (p->header.flag & BT_INTERNAL) {
ASSERT(p->header.nextindex ==
cpu_to_le16(XTENTRYSTART + 1));
xad = &p->xad[XTENTRYSTART];
bn = addressXAD(xad);
XT_PUTPAGE(mp);
/* get new child page */
p = xt_getpage(ip, bn, &mp); if (IS_ERR(p)) return PTR_ERR(p);
BT_MARK_DIRTY(mp, ip); if (!test_cflag(COMMIT_Nolink, ip)) {
tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
xtlck = (struct xtlock *) & tlck->lock;
}
} else { /* is nXAD on new page ? */ if (newindex >
(le16_to_cpu(p->header.maxentry) >> 1)) {
newindex =
newindex -
le16_to_cpu(p->header.nextindex) +
XTENTRYSTART;
newpage = 1;
}
}
} else { /* if insert into middle, shift right remaining entries */ if (newindex < nextindex)
memmove(&p->xad[newindex + 1], &p->xad[newindex],
(nextindex - newindex) << L2XTSLOTSIZE);
/* advance next available entry index. */
p->header.nextindex =
cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
}
/* * does nXAD force 3-way split ? * * |---nXAD--->| * --|----------XAD-------------|-- * |-lXAD-| |-rXAD -|
*/ if (nxoff + nxlen == xoff + xlen) goto out;
/* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ if (newpage) { /* close out old page */ if (!test_cflag(COMMIT_Nolink, ip)) {
xtlck->lwm.offset = (xtlck->lwm.offset) ?
min(index0, (int)xtlck->lwm.offset) : index0;
xtlck->lwm.length =
le16_to_cpu(p->header.nextindex) -
xtlck->lwm.offset;
}
bn = le64_to_cpu(p->header.next);
XT_PUTPAGE(mp);
/* get new right page */
p = xt_getpage(ip, bn, &mp); if (IS_ERR(p)) return PTR_ERR(p);
/* get back old page */
p = xt_getpage(ip, bn, &mp); if (IS_ERR(p)) return PTR_ERR(p);
/* * if leaf root has been split, original root has been * copied to new child page, i.e., original entry now * resides on the new child page;
*/ if (p->header.flag & BT_INTERNAL) {
ASSERT(p->header.nextindex ==
cpu_to_le16(XTENTRYSTART + 1));
xad = &p->xad[XTENTRYSTART];
bn = addressXAD(xad);
XT_PUTPAGE(mp);
/* get new child page */
p = xt_getpage(ip, bn, &mp); if (IS_ERR(p)) return PTR_ERR(p);
BT_MARK_DIRTY(mp, ip); if (!test_cflag(COMMIT_Nolink, ip)) {
tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
xtlck = (struct xtlock *) & tlck->lock;
}
}
} else { /* if insert into middle, shift right remaining entries */ if (newindex < nextindex)
memmove(&p->xad[newindex + 1], &p->xad[newindex],
(nextindex - newindex) << L2XTSLOTSIZE);
/* * search for the entry location at which to insert: * * xtFastSearch() and xtSearch() both returns (leaf page * pinned, index at which to insert). * n.b. xtSearch() may return index of maxentry of * the full page.
*/ if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) return rc;
if (next)
xlen = min(xlen, (int)(next - xoff)); //insert: /* * insert entry for new extent
*/
xflag |= XAD_NEW;
/* * if the leaf page is full, split the page and * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page.
*/
nextindex = le16_to_cpu(p->header.nextindex); if (nextindex < le16_to_cpu(p->header.maxentry)) goto insertLeaf;
/* * allocate new index blocks to cover index page split(s)
*/
nsplit = btstack.nsplit;
split.pxdlist = &pxdlist;
pxdlist.maxnpxd = pxdlist.npxd = 0;
pxd = &pxdlist.pxd[0];
nblocks = JFS_SBI(ip->i_sb)->nbperpage; for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
PXDaddress(pxd, xaddr);
PXDlength(pxd, nblocks);
pxdlist.maxnpxd++;
continue;
}
/* undo allocation */
goto out;
}
xlen = min(xlen, maxblocks);
/* * allocate data extent requested
*/ if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) goto out;
/* * insert the new entry into the leaf page
*/
insertLeaf: /* * allocate data extent requested
*/ if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) goto out;
/* * We can run into a deadlock truncating a file with a large number of * xtree pages (large fragmented file). A robust fix would entail a * reservation system where we would reserve a number of metadata pages * and tlocks which we would be guaranteed without a deadlock. Without * this, a partial fix is to limit number of metadata pages we will lock * in a single transaction. Currently we will truncate the file so that * no more than 50 leaf pages will be locked. The caller of xtTruncate * will be responsible for ensuring that the current transaction gets * committed, and that subsequent transactions are created to truncate * the file further if needed.
*/ #define MAX_TRUNCATE_LEAVES 50
/* * xtTruncate() * * function: * traverse for truncation logging backward bottom up; * terminate at the last extent entry at the current subtree * root page covering new down size. * truncation may occur within the last extent entry. * * parameter: * int tid, * struct inode *ip, * s64 newsize, * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} * * return: * * note: * PWMAP: * 1. truncate (non-COMMIT_NOLINK file) * by jfs_truncate() or jfs_open(O_TRUNC): * xtree is updated; * 2. truncate index table of directory when last entry removed * map update via tlock at commit time; * PMAP: * Call xtTruncate_pmap instead * WMAP: * 1. remove (free zero link count) on last reference release * (pmap has been freed at commit zero link count); * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): * xtree is updated; * map update directly at truncation time; * * if (DELETE) * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); * else if (TRUNCATE) * must write LOG_NOREDOPAGE for deleted index page; * * pages may already have been tlocked by anonymous transactions * during file growth (i.e., write) before truncation; * * except last truncated entry, deleted entries remains as is * in the page (nextindex is updated) for other use * (e.g., log/update allocation map): this avoid copying the page * info but delay free of pages; *
*/
s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
{
s64 teof; struct metapage *mp;
xtpage_t *p;
s64 bn; int index, nextindex;
xad_t *xad;
s64 xoff, xaddr; int xlen, len, freexlen; struct btstack btstack; struct btframe *parent; struct tblock *tblk = NULL; struct tlock *tlck = NULL; struct xtlock *xtlck = NULL; struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */
s64 nfreed; int freed, log; int locked_leaves = 0;
/* save object truncation type */ if (tid) {
tblk = tid_to_tblock(tid);
tblk->xflag |= flag;
}
/* * if the newsize is not an integral number of pages, * the file between newsize and next page boundary will * be cleared. * if truncating into a file hole, it will cause * a full block to be allocated for the logical block.
*/
/* * release page blocks of truncated region <teof, eof> * * free the data blocks from the leaf index blocks. * delete the parent index entries corresponding to * the freed child data/index blocks. * free the index blocks themselves which aren't needed * in new sized file. * * index blocks are updated only if the blocks are to be * retained in the new sized file. * if type is PMAP, the data and index pages are NOT * freed, and the data and index blocks are NOT freed * from working map. * (this will allow continued access of data/index of * temporary file (zerolink count file truncated to zero-length)).
*/
teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
JFS_SBI(ip->i_sb)->l2bsize;
/* clear stack */
BT_CLR(&btstack);
/* * start with root * * root resides in the inode
*/
bn = 0;
/* * first access of each page:
*/
getPage:
p = xt_getpage(ip, bn, &mp); if (IS_ERR(p)) return PTR_ERR(p);
/* process entries backward from last index */
index = le16_to_cpu(p->header.nextindex) - 1;
/* Since this is the rightmost page at this level, and we may have * already freed a page that was formerly to the right, let's make * sure that the next pointer is zero.
*/ if (p->header.next) { if (log) /* * Make sure this change to the header is logged. * If we really truncate this leaf, the flag * will be changed to tlckTRUNCATE
*/
tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
BT_MARK_DIRTY(mp, ip);
p->header.next = 0;
}
if (p->header.flag & BT_INTERNAL) goto getChild;
/* * leaf page
*/
freed = 0;
/* does region covered by leaf page precede Teof ? */
xad = &p->xad[index];
xoff = offsetXAD(xad);
xlen = lengthXAD(xad); if (teof >= xoff + xlen) {
XT_PUTPAGE(mp); goto getParent;
}
/* (re)acquire tlock of the leaf page */ if (log) { if (++locked_leaves > MAX_TRUNCATE_LEAVES) { /* * We need to limit the size of the transaction * to avoid exhausting pagecache & tlocks
*/
XT_PUTPAGE(mp);
newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; goto getParent;
}
tlck = txLock(tid, ip, mp, tlckXTREE);
tlck->type = tlckXTREE | tlckTRUNCATE;
xtlck = (struct xtlock *) & tlck->lock;
xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
}
BT_MARK_DIRTY(mp, ip);
/* * The "data" for a directory is indexed by the block * device's address space. This metadata must be invalidated * here
*/ if (S_ISDIR(ip->i_mode) && (teof == 0))
invalidate_xad_metapages(ip, *xad); /* * entry beyond eof: continue scan of current page * xad * ---|---=======-------> * eof
*/ if (teof < xoff) {
nfreed += xlen; continue;
}
/* * (xoff <= teof): last entry to be deleted from page; * If other entries remain in page: keep and update the page.
*/
/* assert(freed == 0); */ goto getParent;
} /* end scan of leaf page entries */
freed = 1;
/* * leaf page become empty: free the page if type != PMAP
*/ if (log) { /* COMMIT_PWMAP */ /* txCommit() with tlckFREE: * free data extents covered by leaf [XTENTRYSTART:hwm); * invalidate leaf if COMMIT_PWMAP; * if (TRUNCATE), will write LOG_NOREDOPAGE;
*/
tlck->type = tlckXTREE | tlckFREE;
} else { /* COMMIT_WAMP */
/* * the leaf page become empty: delete the parent entry * for the leaf page if the parent page is to be kept * in the new sized file.
*/
/* * go back up to the parent page
*/
getParent: /* pop/restore parent entry for the current child page */ if ((parent = BT_POP(&btstack)) == NULL) /* current page must have been root */ goto out;
/* get back the parent page */
bn = parent->bn;
p = xt_getpage(ip, bn, &mp); if (IS_ERR(p)) return PTR_ERR(p);
index = parent->index;
/* * child page was not empty:
*/ if (freed == 0) { /* has any entry deleted from parent ? */ if (index < le16_to_cpu(p->header.nextindex) - 1) { /* (re)acquire tlock on the parent page */ if (log) { /* COMMIT_PWMAP */ /* txCommit() with tlckTRUNCATE: * free child extents covered by parent [);
*/
tlck = txLock(tid, ip, mp, tlckXTREE);
xtlck = (struct xtlock *) & tlck->lock; if (!(tlck->type & tlckTRUNCATE)) {
xtlck->hwm.offset =
le16_to_cpu(p->header.
nextindex) - 1;
tlck->type =
tlckXTREE | tlckTRUNCATE;
}
} else { /* COMMIT_WMAP */
/* * child page was empty:
*/
nfreed += lengthXAD(&p->xad[index]);
/* * During working map update, child page's tlock must be handled * before parent's. This is because the parent's tlock will cause * the child's disk space to be marked available in the wmap, so * it's important that the child page be released by that time. * * ToDo: tlocks should be on doubly-linked list, so we can * quickly remove it and add it to the end.
*/
/* * Move parent page's tlock to the end of the tid's tlock list
*/ if (log && mp->lid && (tblk->last != mp->lid) &&
lid_to_tlock(mp->lid)->tid) {
lid_t lid = mp->lid; struct tlock *prev;
/* parent has become empty and freed: * go back up to its parent page
*/ /* freed = 1; */ goto getParent;
}
} /* * parent page still has entries for front region;
*/ else { /* try truncate region covered by preceding entry * (process backward)
*/
index--;
/* go back down to the child page corresponding * to the entry
*/ goto getChild;
}
/* * internal page: go down to child page of current entry
*/
getChild: /* save current parent entry for the child page */ if (BT_STACK_FULL(&btstack)) {
jfs_error(ip->i_sb, "stack overrun!\n");
XT_PUTPAGE(mp); return -EIO;
}
BT_PUSH(&btstack, bn, index);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.