// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2005 * Portions Copyright (C) Christoph Hellwig, 2001-2002
*/
/* * jfs_txnmgr.c: transaction manager * * notes: * transaction starts with txBegin() and ends with txCommit() * or txAbort(). * * tlock is acquired at the time of update; * (obviate scan at commit time for xtree and dtree) * tlock and mp points to each other; * (no hashlist for mp -> tlock). * * special cases: * tlock on in-memory inode: * in-place tlock in the in-memory inode itself; * converted to page lock by iWrite() at commit time. * * tlock during write()/mmap() under anonymous transaction (tid = 0): * transferred (?) to transaction at commit time. * * use the page itself to update allocation maps * (obviate intermediate replication of allocation/deallocation data) * hold on to mp+lock thru update of maps
*/
/* * transaction management structures
*/ staticstruct { int freetid; /* index of a free tid structure */ int freelock; /* index first free lock word */
wait_queue_head_t freewait; /* eventlist of free tblock */
wait_queue_head_t freelockwait; /* eventlist of free tlock */
wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ int tlocksInUse; /* Number of tlocks in use */
spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ /* struct tblock *sync_queue; * Transactions waiting for data sync */ struct list_head unlock_queue; /* Txns waiting to be released */ struct list_head anon_list; /* inodes having anonymous txns */ struct list_head anon_list2; /* inodes having anonymous txns
that couldn't be sync'ed */
} TxAnchor;
int jfs_tlocks_low; /* Indicates low number of available tlocks */
staticint nTxBlock = -1; /* number of transaction blocks */
module_param(nTxBlock, int, 0);
MODULE_PARM_DESC(nTxBlock, "Number of transaction blocks (max:65536)");
staticint nTxLock = -1; /* number of transaction locks */
module_param(nTxLock, int, 0);
MODULE_PARM_DESC(nTxLock, "Number of transaction locks (max:65536)");
struct tblock *TxBlock; /* transaction block table */ staticint TxLockLWM; /* Low water mark for number of txLocks used */ staticint TxLockHWM; /* High water mark for number of txLocks used */ staticint TxLockVHWM; /* Very High water mark */ struct tlock *TxLock; /* transaction lock table */
/* * statistics
*/ staticstruct {
tid_t maxtid; /* 4: biggest tid ever used */
lid_t maxlid; /* 4: biggest lid ever used */ int ntid; /* 4: # of transactions performed */ int nlid; /* 4: # of tlocks acquired */ int waitlock; /* 4: # of tlock wait */
} stattx;
/* * Get a transaction lock from the free list. If the number in use is * greater than the high water mark, wake up the sync daemon. This should * free some anonymous transaction locks. (TXN_LOCK must be held.)
*/ static lid_t txLockAlloc(void)
{
lid_t lid;
INCREMENT(TxStat.txLockAlloc); if (!TxAnchor.freelock) {
INCREMENT(TxStat.txLockAlloc_freelock);
}
/* Verify tunable parameters */ if (nTxBlock < 16)
nTxBlock = 16; /* No one should set it this low */ if (nTxBlock > 65536)
nTxBlock = 65536; if (nTxLock < 256)
nTxLock = 256; /* No one should set it this low */ if (nTxLock > 65536)
nTxLock = 65536;
/* * NAME: txExit() * * FUNCTION: clean up when module is unloaded
*/ void txExit(void)
{
vfree(TxLock);
TxLock = NULL;
vfree(TxBlock);
TxBlock = NULL;
}
/* * NAME: txBegin() * * FUNCTION: start a transaction. * * PARAMETER: sb - superblock * flag - force for nested tx; * * RETURN: tid - transaction id * * note: flag force allows to start tx for nested tx * to prevent deadlock on logsync barrier;
*/
tid_t txBegin(struct super_block *sb, int flag)
{
tid_t t; struct tblock *tblk; struct jfs_log *log;
jfs_info("txBegin: flag = 0x%x", flag);
log = JFS_SBI(sb)->log;
if (!log) {
jfs_error(sb, "read-only filesystem\n"); return 0;
}
TXN_LOCK();
INCREMENT(TxStat.txBegin);
retry: if (!(flag & COMMIT_FORCE)) { /* * synchronize with logsync barrier
*/ if (test_bit(log_SYNCBARRIER, &log->flag) ||
test_bit(log_QUIESCE, &log->flag)) {
INCREMENT(TxStat.txBegin_barrier);
TXN_SLEEP(&log->syncwait); goto retry;
}
} if (flag == 0) { /* * Don't begin transaction if we're getting starved for tlocks * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately * free tlocks)
*/ if (TxAnchor.tlocksInUse > TxLockVHWM) {
INCREMENT(TxStat.txBegin_lockslow);
TXN_SLEEP(&TxAnchor.lowlockwait); goto retry;
}
}
if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { /* Don't let a non-forced transaction take the last tblk */
jfs_info("txBegin: waiting for free tid");
INCREMENT(TxStat.txBegin_freetid);
TXN_SLEEP(&TxAnchor.freewait); goto retry;
}
TxAnchor.freetid = tblk->next;
/* * initialize transaction
*/
/* * We can't zero the whole thing or we screw up another thread being * awakened after sleeping on tblk->waitor * * memset(tblk, 0, sizeof(struct tblock));
*/
tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
/* * wakeup transactions waiting on the page locked * by the current transaction
*/
TXN_WAKEUP(&tblk->waitor);
log = JFS_SBI(tblk->sb)->log;
/* * Lazy commit thread can't free this guy until we mark it UNLOCKED, * otherwise, we would be left with a transaction that may have been * reused. * * Lazy commit thread will turn off tblkGC_LAZY before calling this * routine.
*/ if (tblk->flag & tblkGC_LAZY) {
jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk);
TXN_UNLOCK();
if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
!(mp->xflag & COMMIT_PAGE)) { /* * Directory inode is special. It can have both an xtree tlock * and a dtree tlock associated with it.
*/
dir_xtree = 1;
lid = jfs_ip->xtlid;
} else
lid = mp->lid;
/* is page not locked by a transaction ? */ if (lid == 0) goto allocateLock;
/* is page locked by the requester transaction ? */
tlck = lid_to_tlock(lid); if ((xtid = tlck->tid) == tid) {
TXN_UNLOCK(); goto grantLock;
}
/* * is page locked by anonymous transaction/lock ? * * (page update without transaction (i.e., file write) is * locked under anonymous transaction tid = 0: * anonymous tlocks maintained on anonymous tlock list of * the inode of the page and available to all anonymous * transactions until txCommit() time at which point * they are transferred to the transaction tlock list of * the committing transaction of the inode)
*/ if (xtid == 0) {
tlck->tid = tid;
TXN_UNLOCK();
tblk = tid_to_tblock(tid); /* * The order of the tlocks in the transaction is important * (during truncate, child xtree pages must be freed before * parent's tlocks change the working map). * Take tlock off anonymous list and add to tail of * transaction list * * Note: We really need to get rid of the tid & lid and * use list_head's. This code is getting UGLY!
*/ if (jfs_ip->atlhead == lid) { if (jfs_ip->atltail == lid) { /* only anonymous txn. * Remove from anon_list
*/
TXN_LOCK();
list_del_init(&jfs_ip->anon_inode_list);
TXN_UNLOCK();
}
jfs_ip->atlhead = tlck->next;
} else {
lid_t last; for (last = jfs_ip->atlhead;
lid_to_tlock(last)->next != lid;
last = lid_to_tlock(last)->next) {
assert(last);
}
lid_to_tlock(last)->next = tlck->next; if (jfs_ip->atltail == lid)
jfs_ip->atltail = last;
}
/* insert the tlock at tail of transaction tlock list */
/* if anonymous transaction, and buffer is on the group * commit synclist, mark inode to show this. This will * prevent the buffer from being marked nohomeok for too * long a time.
*/ if ((tid == 0) && mp->lsn)
set_cflag(COMMIT_Synclist, ip);
} /* mark tlock for in-memory inode */ else
tlck->flag = tlckINODELOCK;
if (S_ISDIR(ip->i_mode))
tlck->flag |= tlckDIRECTORY;
tlck->type = 0;
/* bind the tlock and the page */
tlck->ip = ip;
tlck->mp = mp; if (dir_xtree)
jfs_ip->xtlid = lid; else
mp->lid = lid;
/* * enqueue transaction lock to transaction/inode
*/ /* insert the tlock at tail of transaction tlock list */ if (tid) {
tblk = tid_to_tblock(tid); if (tblk->next)
lid_to_tlock(tblk->last)->next = lid; else
tblk->next = lid;
tlck->next = 0;
tblk->last = lid;
} /* anonymous transaction: * insert the tlock at head of inode anonymous tlock list
*/ else {
tlck->next = jfs_ip->atlhead;
jfs_ip->atlhead = lid; if (tlck->next == 0) { /* This inode's first anonymous transaction */
jfs_ip->atltail = lid;
TXN_LOCK();
list_add_tail(&jfs_ip->anon_inode_list,
&TxAnchor.anon_list);
TXN_UNLOCK();
}
}
/* initialize type dependent area for linelock */
linelock = (struct linelock *) & tlck->lock;
linelock->next = 0;
linelock->flag = tlckLINELOCK;
linelock->maxcnt = TLOCKSHORT;
linelock->index = 0;
/* * page is being locked by another transaction:
*/
waitLock: /* Only locks on ipimap or ipaimap should reach here */ /* assert(jfs_ip->fileset == AGGREGATE_I); */ if (jfs_ip->fileset != AGGREGATE_I) {
printk(KERN_ERR "txLock: trying to lock locked page!");
print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
ip, sizeof(*ip), 0);
print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
mp, sizeof(*mp), 0);
print_hex_dump(KERN_ERR, "Locker's tblock: ",
DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid), sizeof(struct tblock), 0);
print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
tlck, sizeof(*tlck), 0);
BUG();
}
INCREMENT(stattx.waitlock); /* statistics */
TXN_UNLOCK();
release_metapage(mp);
TXN_LOCK();
xtid = tlck->tid; /* reacquire after dropping TXN_LOCK */
jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
tid, xtid, lid);
/* Recheck everything since dropping TXN_LOCK */ if (xtid && (tlck->mp == mp) && (mp->lid == lid))
TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); else
TXN_UNLOCK();
jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid);
return NULL;
}
/* * NAME: txRelease() * * FUNCTION: Release buffers associated with transaction locks, but don't * mark homeok yet. The allows other transactions to modify * buffers, but won't let them go to disk until commit record * actually gets written. * * PARAMETER: * tblk - * * RETURN: Errors from subroutines.
*/ staticvoid txRelease(struct tblock * tblk)
{ struct metapage *mp;
lid_t lid; struct tlock *tlck;
/* * mark page under tlock homeok (its log has been written):
*/ for (lid = tblk->next; lid; lid = next) {
tlck = lid_to_tlock(lid);
next = tlck->next;
jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck);
/* * remove tblock from logsynclist * (allocation map pages inherited lsn of tblk and * has been inserted in logsync list at txUpdateMap())
*/ if (tblk->lsn) {
LOGSYNC_LOCK(log, flags);
log->count--;
list_del(&tblk->synclist);
LOGSYNC_UNLOCK(log, flags);
}
}
/* * txMaplock() * * function: allocate a transaction lock for freed page/entry; * for freed page, maplock is used as xtlock/dtlock type;
*/ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
{ struct jfs_inode_info *jfs_ip = JFS_IP(ip);
lid_t lid; struct tblock *tblk; struct tlock *tlck; struct maplock *maplock;
TXN_LOCK();
/* * allocate a tlock
*/
lid = txLockAlloc();
tlck = lid_to_tlock(lid);
/* * initialize tlock
*/
tlck->tid = tid;
/* bind the tlock and the object */
tlck->flag = tlckINODELOCK; if (S_ISDIR(ip->i_mode))
tlck->flag |= tlckDIRECTORY;
tlck->ip = ip;
tlck->mp = NULL;
tlck->type = type;
/* * enqueue transaction lock to transaction/inode
*/ /* insert the tlock at tail of transaction tlock list */ if (tid) {
tblk = tid_to_tblock(tid); if (tblk->next)
lid_to_tlock(tblk->last)->next = lid; else
tblk->next = lid;
tlck->next = 0;
tblk->last = lid;
} /* anonymous transaction: * insert the tlock at head of inode anonymous tlock list
*/ else {
tlck->next = jfs_ip->atlhead;
jfs_ip->atlhead = lid; if (tlck->next == 0) { /* This inode's first anonymous transaction */
jfs_ip->atltail = lid;
list_add_tail(&jfs_ip->anon_inode_list,
&TxAnchor.anon_list);
}
}
TXN_UNLOCK();
/* initialize type dependent area for maplock */
maplock = (struct maplock *) & tlck->lock;
maplock->next = 0;
maplock->maxcnt = 0;
maplock->index = 0;
return tlck;
}
/* * txLinelock() * * function: allocate a transaction lock for log vector list
*/ struct linelock *txLinelock(struct linelock * tlock)
{
lid_t lid; struct tlock *tlck; struct linelock *linelock;
TXN_LOCK();
/* allocate a TxLock structure */
lid = txLockAlloc();
tlck = lid_to_tlock(lid);
/* * NAME: txCommit() * * FUNCTION: commit the changes to the objects specified in * clist. For journalled segments only the * changes of the caller are committed, ie by tid. * for non-journalled segments the data are flushed to * disk and then the change to the disk inode and indirect * blocks committed (so blocks newly allocated to the * segment will be made a part of the segment atomically). * * all of the segments specified in clist must be in * one file system. no more than 6 segments are needed * to handle all unix svcs. * * if the i_nlink field (i.e. disk inode link count) * is zero, and the type of inode is a regular file or * directory, or symbolic link , the inode is truncated * to zero length. the truncation is committed but the * VM resources are unaffected until it is closed (see * iput and iclose). * * PARAMETER: * * RETURN: * * serialization: * on entry the inode lock on each segment is assumed * to be held. * * i/o error:
*/ int txCommit(tid_t tid, /* transaction identifier */ int nip, /* number of inodes to commit */ struct inode **iplist, /* list of inode to commit */ int flag)
{ int rc = 0; struct commit cd; struct jfs_log *log; struct tblock *tblk; struct lrd *lrd; struct inode *ip; struct jfs_inode_info *jfs_ip; int k, n;
ino_t top; struct super_block *sb;
jfs_info("txCommit, tid = %d, flag = %d", tid, flag); /* is read-only file system ? */ if (isReadOnly(iplist[0])) {
rc = -EROFS; goto TheEnd;
}
sb = cd.sb = iplist[0]->i_sb;
cd.tid = tid;
if (tid == 0)
tid = txBegin(sb, 0);
tblk = tid_to_tblock(tid);
/* initialize log record descriptor in commit */
lrd = &cd.lrd;
lrd->logtid = cpu_to_le32(tblk->logtid);
lrd->backchain = 0;
tblk->xflag |= flag;
if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
tblk->xflag |= COMMIT_LAZY; /* * prepare non-journaled objects for commit * * flush data pages of non-journaled file * to prevent the file getting non-initialized disk blocks * in case of crash. * (new blocks - )
*/
cd.iplist = iplist;
cd.nip = nip;
/* * acquire transaction lock on (on-disk) inodes * * update on-disk inode from in-memory inode * acquiring transaction locks for AFTER records * on the on-disk inode of file object * * sort the inodes array by inode number in descending order * to prevent deadlock when acquiring transaction lock * of on-disk inodes on multiple on-disk inode pages by * multiple concurrent transactions
*/ for (k = 0; k < cd.nip; k++) {
top = (cd.iplist[k])->i_ino; for (n = k + 1; n < cd.nip; n++) {
ip = cd.iplist[n]; if (ip->i_ino > top) {
top = ip->i_ino;
cd.iplist[n] = cd.iplist[k];
cd.iplist[k] = ip;
}
}
ip = cd.iplist[k];
jfs_ip = JFS_IP(ip);
/* * BUGBUG - This code has temporarily been removed. The * intent is to ensure that any file data is written before * the metadata is committed to the journal. This prevents * uninitialized data from appearing in a file after the * journal has been replayed. (The uninitialized data * could be sensitive data removed by another user.) * * The problem now is that we are holding the IWRITELOCK * on the inode, and calling filemap_fdatawrite on an * unmapped page will cause a deadlock in jfs_get_block. * * The long term solution is to pare down the use of * IWRITELOCK. We are currently holding it too long. * We could also be smarter about which data pages need * to be written before the transaction is committed and * when we don't need to worry about it at all. * * if ((!S_ISDIR(ip->i_mode)) * && (tblk->flag & COMMIT_DELETE) == 0) * filemap_write_and_wait(ip->i_mapping);
*/
/* * Mark inode as not dirty. It will still be on the dirty * inode list, but we'll know not to commit it again unless * it gets marked dirty again
*/
clear_cflag(COMMIT_Dirty, ip);
/* inherit anonymous tlock(s) of inode */ if (jfs_ip->atlhead) {
lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
tblk->next = jfs_ip->atlhead; if (!tblk->last)
tblk->last = jfs_ip->atltail;
jfs_ip->atlhead = jfs_ip->atltail = 0;
TXN_LOCK();
list_del_init(&jfs_ip->anon_inode_list);
TXN_UNLOCK();
}
/* * acquire transaction lock on on-disk inode page * (become first tlock of the tblk's tlock list)
*/ if (((rc = diWrite(tid, ip)))) goto out;
}
/* * write log records from transaction locks * * txUpdateMap() resets XAD_NEW in XAD.
*/
txLog(log, tblk, &cd);
/* * Ensure that inode isn't reused before * lazy commit thread finishes processing
*/ if (tblk->xflag & COMMIT_DELETE) {
ihold(tblk->u.ip); /* * Avoid a rare deadlock * * If the inode is locked, we may be blocked in * jfs_commit_inode. If so, we don't want the * lazy_commit thread doing the last iput() on the inode * since that may block on the locked inode. Instead, * commit the transaction synchronously, so the last iput * will be done by the calling thread (or later)
*/ /* * I believe this code is no longer needed. Splitting I_LOCK * into two bits, I_NEW and I_SYNC should prevent this * deadlock as well. But since I don't have a JFS testload * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. * Joern
*/ if (tblk->u.ip->i_state & I_SYNC)
tblk->xflag &= ~COMMIT_LAZY;
}
/* initialize as REDOPAGE record format */
lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
/* mark page as homeward bound */
tlck->flag |= tlckWRITEPAGE;
} elseif (tlck->type & tlckFREE) { /* * free inode extent * * (pages of the freed inode extent have been invalidated and * a maplock for free of the extent has been formatted at * txLock() time); * * the tlock had been acquired on the inode allocation map page * (iag) that specifies the freed extent, even though the map * page is not itself logged, to prevent pageout of the map * page before the log;
*/
/* log LOG_NOREDOINOEXT of the freed inode extent for * logredo() to start NoRedoPage filters, and to update * imap and bmap for free of the extent;
*/
lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); /* * For the LOG_NOREDOINOEXT record, we need * to pass the IAG number and inode extent * index (within that IAG) from which the * extent is being released. These have been * passed to us in the iplist[1] and iplist[2].
*/
lrd->log.noredoinoext.iagnum =
cpu_to_le32((u32) (size_t) cd->iplist[1]);
lrd->log.noredoinoext.inoext_idx =
cpu_to_le32((u32) (size_t) cd->iplist[2]);
/* initialize as REDOPAGE record format */
lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
pxd = &lrd->log.redopage.pxd;
/* log after-image for logredo(): */
lrd->type = cpu_to_le16(LOG_REDOPAGE);
if (jfs_dirtable_inline(tlck->ip)) { /* * The table has been truncated, we've must have deleted * the last entry, so don't bother logging this
*/
mp->lid = 0;
grab_metapage(mp);
metapage_homeok(mp);
discard_metapage(mp);
tlck->mp = NULL; return;
}
/* initialize as REDOPAGE/NOREDOPAGE record format */
lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
pxd = &lrd->log.redopage.pxd;
if (tlck->type & tlckBTROOT)
lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
/* * page extension via relocation: entry insertion; * page extension in-place: entry insertion; * new right page from page split, reinitialized in-line * root from root page split: entry insertion;
*/ if (tlck->type & (tlckNEW | tlckEXTEND)) { /* log after-image of the new page for logredo(): * mark log (LOG_NEW) for logredo() to initialize * freelist and update bmap for alloc of the new page;
*/
lrd->type = cpu_to_le16(LOG_REDOPAGE); if (tlck->type & tlckEXTEND)
lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); else
lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
PXDaddress(pxd, mp->index);
PXDlength(pxd,
mp->logical_size >> tblk->sb->s_blocksize_bits);
lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
/* format a maplock for txUpdateMap() to update bPMAP for * alloc of the new page;
*/ if (tlck->type & tlckBTROOT) return;
tlck->flag |= tlckUPDATEMAP;
pxdlock = (struct pxd_lock *) & tlck->lock;
pxdlock->flag = mlckALLOCPXD;
pxdlock->pxd = *pxd;
pxdlock->index = 1;
/* mark page as homeward bound */
tlck->flag |= tlckWRITEPAGE; return;
}
/* * entry insertion/deletion, * sibling page link update (old right page before split);
*/ if (tlck->type & (tlckENTRY | tlckRELINK)) { /* log after-image for logredo(): */
lrd->type = cpu_to_le16(LOG_REDOPAGE);
PXDaddress(pxd, mp->index);
PXDlength(pxd,
mp->logical_size >> tblk->sb->s_blocksize_bits);
lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
/* mark page as homeward bound */
tlck->flag |= tlckWRITEPAGE; return;
}
/* * page deletion: page has been invalidated * page relocation: source extent * * a maplock for free of the page has been formatted * at txLock() time);
*/ if (tlck->type & (tlckFREE | tlckRELOCATE)) { /* log LOG_NOREDOPAGE of the deleted page for logredo() * to start NoRedoPage filter and to update bmap for free * of the deletd page
*/
lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
pxdlock = (struct pxd_lock *) & tlck->lock;
*pxd = pxdlock->pxd;
lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
/* a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time;
*/
tlck->flag |= tlckUPDATEMAP;
} return;
}
/* initialize as REDOPAGE/NOREDOPAGE record format */
lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
page_pxd = &lrd->log.redopage.pxd;
if (tlck->type & tlckBTROOT) {
lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
p = (xtpage_t *) &JFS_IP(ip)->i_xtroot; if (S_ISDIR(ip->i_mode))
lrd->log.redopage.type |=
cpu_to_le16(LOG_DIR_XTREE);
} else
p = (xtpage_t *) mp->data;
next = le16_to_cpu(p->header.nextindex);
/* * entry insertion/extension; * sibling page link update (old right page before split);
*/ if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { /* log after-image for logredo(): * logredo() will update bmap for alloc of new/extended * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from * after-image of XADlist; * logredo() resets (XAD_NEW|XAD_EXTEND) flag when * applying the after-image to the meta-data page.
*/
lrd->type = cpu_to_le16(LOG_REDOPAGE);
PXDaddress(page_pxd, mp->index);
PXDlength(page_pxd,
mp->logical_size >> tblk->sb->s_blocksize_bits);
lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
/* format a maplock for txUpdateMap() to update bPMAP * for alloc of new/extended extents of XAD[lwm:next) * from the page itself; * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
*/
lwm = xtlck->lwm.offset; if (lwm == 0)
lwm = XTPAGEMAXSLOT;
if (lwm == next) goto out; if (lwm > next) {
jfs_err("xtLog: lwm > next"); goto out;
}
tlck->flag |= tlckUPDATEMAP;
xadlock->flag = mlckALLOCXADLIST;
xadlock->count = next - lwm; if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { int i;
pxd_t *pxd; /* * Lazy commit may allow xtree to be modified before * txUpdateMap runs. Copy xad into linelock to * preserve correct data. * * We can fit twice as may pxd's as xads in the lock
*/
xadlock->flag = mlckALLOCPXDLIST;
pxd = xadlock->xdlist = &xtlck->pxdlock; for (i = 0; i < xadlock->count; i++) {
PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
p->xad[lwm + i].flag &=
~(XAD_NEW | XAD_EXTENDED);
pxd++;
}
} else { /* * xdlist will point to into inode's xtree, ensure * that transaction is not committed lazily.
*/
xadlock->flag = mlckALLOCXADLIST;
xadlock->xdlist = &p->xad[lwm];
tblk->xflag &= ~COMMIT_LAZY;
}
jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d",
tlck->ip, mp, tlck, lwm, xadlock->count);
maplock->index = 1;
out: /* mark page as homeward bound */
tlck->flag |= tlckWRITEPAGE;
return;
}
/* * page deletion: file deletion/truncation (ref. xtTruncate()) * * (page will be invalidated after log is written and bmap * is updated from the page);
*/ if (tlck->type & tlckFREE) { /* LOG_NOREDOPAGE log for NoRedoPage filter: * if page free from file delete, NoRedoFile filter from * inode image of zero link count will subsume NoRedoPage * filters for each page; * if page free from file truncattion, write NoRedoPage * filter; * * upadte of block allocation map for the page itself: * if page free from deletion and truncation, LOG_UPDATEMAP * log for the page itself is generated from processing * its parent page xad entries;
*/ /* if page free from file truncation, log LOG_NOREDOPAGE * of the deleted page for logredo() to start NoRedoPage * filter for the page;
*/ if (tblk->xflag & COMMIT_TRUNCATE) { /* write NOREDOPAGE for the page */
lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
PXDaddress(page_pxd, mp->index);
PXDlength(page_pxd,
mp->logical_size >> tblk->sb->
s_blocksize_bits);
lrd->backchain =
cpu_to_le32(lmLog(log, tblk, lrd, NULL));
if (tlck->type & tlckBTROOT) { /* Empty xtree must be logged */
lrd->type = cpu_to_le16(LOG_REDOPAGE);
lrd->backchain =
cpu_to_le32(lmLog(log, tblk, lrd, tlck));
}
}
/* init LOG_UPDATEMAP of the freed extents * XAD[XTENTRYSTART:hwm) from the deleted page itself * for logredo() to update bmap;
*/
lrd->type = cpu_to_le16(LOG_UPDATEMAP);
lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
xtlck = (struct xtlock *) & tlck->lock;
hwm = xtlck->hwm.offset;
lrd->log.updatemap.nxd =
cpu_to_le16(hwm - XTENTRYSTART + 1); /* reformat linelock for lmLog() */
xtlck->header.offset = XTENTRYSTART;
xtlck->header.length = hwm - XTENTRYSTART + 1;
xtlck->index = 1;
lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
/* format a maplock for txUpdateMap() to update bmap * to free extents of XAD[XTENTRYSTART:hwm) from the * deleted page itself;
*/
tlck->flag |= tlckUPDATEMAP;
xadlock->count = hwm - XTENTRYSTART + 1; if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { int i;
pxd_t *pxd; /* * Lazy commit may allow xtree to be modified before * txUpdateMap runs. Copy xad into linelock to * preserve correct data. * * We can fit twice as may pxd's as xads in the lock
*/
xadlock->flag = mlckFREEPXDLIST;
pxd = xadlock->xdlist = &xtlck->pxdlock; for (i = 0; i < xadlock->count; i++) {
PXDaddress(pxd,
addressXAD(&p->xad[XTENTRYSTART + i]));
PXDlength(pxd,
lengthXAD(&p->xad[XTENTRYSTART + i]));
pxd++;
}
} else { /* * xdlist will point to into inode's xtree, ensure * that transaction is not committed lazily.
*/
xadlock->flag = mlckFREEXADLIST;
xadlock->xdlist = &p->xad[XTENTRYSTART];
tblk->xflag &= ~COMMIT_LAZY;
}
jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2",
tlck->ip, mp, xadlock->count);
maplock->index = 1;
/* mark page as invalid */ if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
&& !(tlck->type & tlckBTROOT))
tlck->flag |= tlckFREEPAGE; /* else (tblk->xflag & COMMIT_PMAP) ? release the page;
*/ return;
}
/* * page/entry truncation: file truncation (ref. xtTruncate()) * * |----------+------+------+---------------| * | | | * | | hwm - hwm before truncation * | next - truncation point * lwm - lwm before truncation * header ?
*/ if (tlck->type & tlckTRUNCATE) {
pxd_t pxd; /* truncated extent of xad */ int twm;
/* * For truncation the entire linelock may be used, so it would * be difficult to store xad list in linelock itself. * Therefore, we'll just force transaction to be committed * synchronously, so that xtree pages won't be changed before * txUpdateMap runs.
*/
tblk->xflag &= ~COMMIT_LAZY;
lwm = xtlck->lwm.offset; if (lwm == 0)
lwm = XTPAGEMAXSLOT;
hwm = xtlck->hwm.offset;
twm = xtlck->twm.offset;
/* * write log records
*/ /* log after-image for logredo(): * * logredo() will update bmap for alloc of new/extended * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from * after-image of XADlist; * logredo() resets (XAD_NEW|XAD_EXTEND) flag when * applying the after-image to the meta-data page.
*/
lrd->type = cpu_to_le16(LOG_REDOPAGE);
PXDaddress(page_pxd, mp->index);
PXDlength(page_pxd,
mp->logical_size >> tblk->sb->s_blocksize_bits);
lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
/* * truncate entry XAD[twm == next - 1]:
*/ if (twm == next - 1) { /* init LOG_UPDATEMAP for logredo() to update bmap for * free of truncated delta extent of the truncated * entry XAD[next - 1]: * (xtlck->pxdlock = truncated delta extent);
*/
pxdlock = (struct pxd_lock *) & xtlck->pxdlock; /* assert(pxdlock->type & tlckTRUNCATE); */
lrd->type = cpu_to_le16(LOG_UPDATEMAP);
lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
lrd->log.updatemap.nxd = cpu_to_le16(1);
lrd->log.updatemap.pxd = pxdlock->pxd;
pxd = pxdlock->pxd; /* save to format maplock */
lrd->backchain =
cpu_to_le32(lmLog(log, tblk, lrd, NULL));
}
/* * free entries XAD[next:hwm]:
*/ if (hwm >= next) { /* init LOG_UPDATEMAP of the freed extents * XAD[next:hwm] from the deleted page itself * for logredo() to update bmap;
*/
lrd->type = cpu_to_le16(LOG_UPDATEMAP);
lrd->log.updatemap.type =
cpu_to_le16(LOG_FREEXADLIST);
xtlck = (struct xtlock *) & tlck->lock;
hwm = xtlck->hwm.offset;
lrd->log.updatemap.nxd =
cpu_to_le16(hwm - next + 1); /* reformat linelock for lmLog() */
xtlck->header.offset = next;
xtlck->header.length = hwm - next + 1;
xtlck->index = 1;
lrd->backchain =
cpu_to_le32(lmLog(log, tblk, lrd, tlck));
}
/* * format maplock(s) for txUpdateMap() to update bmap
*/
maplock->index = 0;
/* * allocate entries XAD[lwm:next):
*/ if (lwm < next) { /* format a maplock for txUpdateMap() to update bPMAP * for alloc of new/extended extents of XAD[lwm:next) * from the page itself; * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
*/
tlck->flag |= tlckUPDATEMAP;
xadlock->flag = mlckALLOCXADLIST;
xadlock->count = next - lwm;
xadlock->xdlist = &p->xad[lwm];
/* mark page as homeward bound */
tlck->flag |= tlckWRITEPAGE;
} return;
}
/* * mapLog() * * function: log from maplock of freed data extents;
*/ staticvoid mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck)
{ struct pxd_lock *pxdlock; int i, nlock;
pxd_t *pxd;
/* * page relocation: free the source page extent * * a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time saving the src * relocated page address;
*/ if (tlck->type & tlckRELOCATE) { /* log LOG_NOREDOPAGE of the old relocated page * for logredo() to start NoRedoPage filter;
*/
lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
pxdlock = (struct pxd_lock *) & tlck->lock;
pxd = &lrd->log.redopage.pxd;
*pxd = pxdlock->pxd;
lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
/* (N.B. currently, logredo() does NOT update bmap * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); * if page free from relocation, LOG_UPDATEMAP log is * specifically generated now for logredo() * to update bmap for free of src relocated page; * (new flag LOG_RELOCATE may be introduced which will * inform logredo() to start NORedoPage filter and also * update block allocation map at the same time, thus * avoiding an extra log write);
*/
lrd->type = cpu_to_le16(LOG_UPDATEMAP);
lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
lrd->log.updatemap.nxd = cpu_to_le16(1);
lrd->log.updatemap.pxd = pxdlock->pxd;
lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
/* a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time;
*/
tlck->flag |= tlckUPDATEMAP; return;
} /*
* Otherwise it's not a relocate request *
*/ else { /* log LOG_UPDATEMAP for logredo() to update bmap for * free of truncated/relocated delta extent of the data; * e.g.: external EA extent, relocated/truncated extent * from xtTailgate();
*/
lrd->type = cpu_to_le16(LOG_UPDATEMAP);
pxdlock = (struct pxd_lock *) & tlck->lock;
nlock = pxdlock->index; for (i = 0; i < nlock; i++, pxdlock++) { if (pxdlock->flag & mlckALLOCPXD)
lrd->log.updatemap.type =
cpu_to_le16(LOG_ALLOCPXD); else
lrd->log.updatemap.type =
cpu_to_le16(LOG_FREEPXD);
lrd->log.updatemap.nxd = cpu_to_le16(1);
lrd->log.updatemap.pxd = pxdlock->pxd;
lrd->backchain =
cpu_to_le32(lmLog(log, tblk, lrd, NULL));
jfs_info("mapLog: xaddr:0x%lx xlen:0x%x",
(ulong) addressPXD(&pxdlock->pxd),
lengthPXD(&pxdlock->pxd));
}
/* * format maplock for alloc of new EA extent
*/ if (newea) { /* Since the newea could be a completely zeroed entry we need to * check for the two flags which indicate we should actually * commit new EA data
*/ if (newea->flag & DXD_EXTENT) {
tlck = txMaplock(tid, ip, tlckMAP);
maplock = (struct pxd_lock *) & tlck->lock;
pxdlock = (struct pxd_lock *) maplock;
pxdlock->flag = mlckALLOCPXD;
PXDaddress(&pxdlock->pxd, addressDXD(newea));
PXDlength(&pxdlock->pxd, lengthDXD(newea));
pxdlock++;
maplock->index = 1;
} elseif (newea->flag & DXD_INLINE) {
tlck = NULL;
set_cflag(COMMIT_Inlineea, ip);
}
}
/* * format maplock for free of old EA extent
*/ if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { if (tlck == NULL) {
tlck = txMaplock(tid, ip, tlckMAP);
maplock = (struct pxd_lock *) & tlck->lock;
pxdlock = (struct pxd_lock *) maplock;
maplock->index = 0;
}
pxdlock->flag = mlckFREEPXD;
PXDaddress(&pxdlock->pxd, addressDXD(oldea));
PXDlength(&pxdlock->pxd, lengthDXD(oldea));
maplock->index++;
}
}
/* * txForce() * * function: synchronously write pages locked by transaction * after txLog() but before txUpdateMap();
*/ staticvoid txForce(struct tblock * tblk)
{ struct tlock *tlck;
lid_t lid, next; struct metapage *mp;
/* * reverse the order of transaction tlocks in * careful update order of address index pages * (right to left, bottom up)
*/
tlck = lid_to_tlock(tblk->next);
lid = tlck->next;
tlck->next = 0; while (lid) {
tlck = lid_to_tlock(lid);
next = tlck->next;
tlck->next = tblk->next;
tblk->next = lid;
lid = next;
}
/* * synchronously write the page, and * hold the page for txUpdateMap();
*/ for (lid = tblk->next; lid; lid = next) {
tlck = lid_to_tlock(lid);
next = tlck->next;
if (tlck->flag & tlckWRITEPAGE) {
tlck->flag &= ~tlckWRITEPAGE;
/* do not release page to freelist */
force_metapage(mp); #if 0 /* * The "right" thing to do here is to * synchronously write the metadata. * With the current implementation this * is hard since write_metapage requires * us to kunmap & remap the page. If we * have tlocks pointing into the metadata * pages, we don't want to do this. I think * we can get by with synchronously writing * the pages when they are released.
*/
assert(mp->nohomeok);
set_bit(META_dirty, &mp->flag);
set_bit(META_sync, &mp->flag); #endif
}
}
}
}
/* * update block allocation map * * update allocation state in pmap (and wmap) and * update lsn of the pmap page;
*/ /* * scan each tlock/page of transaction for block allocation/free: * * for each tlock/page of transaction, update map. * ? are there tlock for pmap and pwmap at the same time ?
*/ for (lid = tblk->next; lid; lid = tlck->next) {
tlck = lid_to_tlock(lid);
if ((tlck->flag & tlckUPDATEMAP) == 0) continue;
if (tlck->flag & tlckFREEPAGE) { /* * Another thread may attempt to reuse freed space * immediately, so we want to get rid of the metapage * before anyone else has a chance to get it. * Lock metapage, update maps, then invalidate * the metapage.
*/
mp = tlck->mp;
ASSERT(mp->xflag & COMMIT_PAGE);
grab_metapage(mp);
}
for (k = 0; k < nlock; k++, maplock++) { /* * allocate blocks in persistent map: * * blocks have been allocated from wmap at alloc time;
*/ if (maplock->flag & mlckALLOC) {
txAllocPMap(ipimap, maplock, tblk);
} /* * free blocks in persistent and working map: * blocks will be freed in pmap and then in wmap; * * ? tblock specifies the PMAP/PWMAP based upon * transaction * * free blocks in persistent map: * blocks will be freed from wmap at last reference * release of the object for regular files; * * Alway free blocks from both persistent & working * maps for directories
*/ else { /* (maplock->flag & mlckFREE) */
if (tlck->flag & tlckDIRECTORY)
txFreeMap(ipimap, maplock,
tblk, COMMIT_PWMAP); else
txFreeMap(ipimap, maplock,
tblk, maptype);
}
} if (tlck->flag & tlckFREEPAGE) { if (!(tblk->flag & tblkGC_LAZY)) { /* This is equivalent to txRelease */
ASSERT(mp->lid == lid);
tlck->mp->lid = 0;
}
assert(mp->nohomeok == 1);
metapage_homeok(mp);
discard_metapage(mp);
tlck->mp = NULL;
}
} /* * update inode allocation map * * update allocation state in pmap and * update lsn of the pmap page; * update in-memory inode flag/state * * unlock mapper/write lock
*/ if (tblk->xflag & COMMIT_CREATE) {
diUpdatePMap(ipimap, tblk->ino, false, tblk); /* update persistent block allocation map * for the allocation of inode extent;
*/
pxdlock.flag = mlckALLOCPXD;
pxdlock.pxd = tblk->u.ixpxd;
pxdlock.index = 1;
txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk);
} elseif (tblk->xflag & COMMIT_DELETE) {
ip = tblk->u.ip;
diUpdatePMap(ipimap, ip->i_ino, true, tblk);
iput(ip);
}
}
/* * txAllocPMap() * * function: allocate from persistent map; * * parameter: * ipbmap - * malock - * xad list: * pxd: * * maptype - * allocate from persistent map; * free from persistent map; * (e.g., tmp file - free from working map at releae * of last reference); * free from persistent and working map; * * lsn - log sequence number;
*/ staticvoid txAllocPMap(struct inode *ip, struct maplock * maplock, struct tblock * tblk)
{ struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct xdlistlock *xadlistlock;
xad_t *xad;
s64 xaddr; int xlen; struct pxd_lock *pxdlock; struct xdlistlock *pxdlistlock;
pxd_t *pxd; int n;
if (jfs_ip->atlhead)
jfs_ip->atltail = xlid; else {
jfs_ip->atltail = 0; /* * If inode was on anon_list, remove it
*/
list_del_init(&jfs_ip->anon_inode_list);
}
TXN_UNLOCK();
}
/* * txAbort() * * function: abort tx before commit; * * frees line-locks and segment locks for all * segments in comdata structure. * Optionally sets state of file-system to FM_DIRTY in super-block. * log age of page-frames in memory for which caller has * are reset to 0 (to avoid logwarap).
*/ void txAbort(tid_t tid, int dirty)
{
lid_t lid, next; struct metapage *mp; struct tblock *tblk = tid_to_tblock(tid); struct tlock *tlck;
/* * free tlocks of the transaction
*/ for (lid = tblk->next; lid; lid = next) {
tlck = lid_to_tlock(lid);
next = tlck->next;
mp = tlck->mp;
JFS_IP(tlck->ip)->xtlid = 0;
if (mp) {
mp->lid = 0;
/* * reset lsn of page to avoid logwarap: * * (page may have been previously committed by another * transaction(s) but has not been paged, i.e., * it may be on logsync list even though it has not * been logged for the current tx.)
*/ if (mp->xflag & COMMIT_PAGE && mp->lsn)
LogSyncRelease(mp);
} /* insert tlock at head of freelist */
TXN_LOCK();
txLockFree(lid);
TXN_UNLOCK();
}
/* caller will free the transaction block */
tblk->next = tblk->last = 0;
/* * mark filesystem dirty
*/ if (dirty)
jfs_error(tblk->sb, "\n");
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.