// SPDX-License-Identifier: GPL-2.0+ /* * linux/fs/jbd2/recovery.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 * * Copyright 1999-2000 Red Hat Software --- All Rights Reserved * * Journal recovery routines for the generic filesystem journaling code; * part of the ext2fs journaling system.
*/
/* * Maintain information about the progress of the recovery job, so that * the different passes can carry information between them.
*/ struct recovery_info
{
tid_t start_transaction;
tid_t end_transaction; unsignedlong head_block;
int nr_replays; int nr_revokes; int nr_revoke_hits;
};
/* Release readahead buffers after use */ staticvoid journal_brelse_array(struct buffer_head *b[], int n)
{ while (--n >= 0)
brelse (b[n]);
}
/* * When reading from the journal, we are going through the block device * layer directly and so there is no readahead being done for us. We * need to implement any readahead ourselves if we want it to happen at * all. Recovery is basically one long sequential read, so make sure we * do the IO in reasonably large chunks. * * This is not so critical that we need to be enormously clever about * the readahead size, though. 128K is a purely arbitrary, good-enough * fixed value.
*/
if (err) {
printk(KERN_ERR "JBD2: bad block at offset %u\n",
offset); return err;
}
bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); if (!bh) return -ENOMEM;
if (!buffer_uptodate(bh)) { /* * If this is a brand new buffer, start readahead. * Otherwise, we assume we are already reading it.
*/ bool need_readahead = !buffer_req(bh);
bh_read_nowait(bh, 0); if (need_readahead)
do_readahead(journal, offset);
wait_on_buffer(bh);
}
if (!buffer_uptodate(bh)) {
printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
offset);
brelse(bh); return -EIO;
}
if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) break;
}
return nr;
}
/* Make sure we wrap around the log correctly! */ #define wrap(journal, var) \ do { \ if (var >= (journal)->j_last) \
var -= ((journal)->j_last - (journal)->j_first); \
} while (0)
/** * jbd2_journal_recover - recovers a on-disk journal * @journal: the journal to recover * * The primary function for recovering the log contents when mounting a * journaled device. * * Recovery is done in three passes. In the first pass, we look for the * end of the log. In the second, we assemble the list of revoke * blocks. In the third and final pass, we replay any un-revoked blocks * in the log.
*/ int jbd2_journal_recover(journal_t *journal)
{ int err, err2; struct recovery_info info;
memset(&info, 0, sizeof(info));
/* * The journal superblock's s_start field (the current log head) * is always zero if, and only if, the journal was cleanly * unmounted. We use its in-memory version j_tail here because * jbd2_journal_wipe() could have updated it without updating journal * superblock.
*/ if (!journal->j_tail) {
journal_superblock_t *sb = journal->j_superblock;
jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n",
be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head));
journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
journal->j_head = be32_to_cpu(sb->s_head); return 0;
}
err = do_one_pass(journal, &info, PASS_SCAN); if (!err)
err = do_one_pass(journal, &info, PASS_REVOKE); if (!err)
err = do_one_pass(journal, &info, PASS_REPLAY);
jbd2_debug(1, "JBD2: recovery, exit status %d, " "recovered transactions %u to %u\n",
err, info.start_transaction, info.end_transaction);
jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
/* Restart the log at the next transaction ID, thus invalidating
* any existing commit records in the log. */
journal->j_transaction_sequence = ++info.end_transaction;
journal->j_head = info.head_block;
jbd2_debug(1, "JBD2: last transaction %d, head block %lu\n",
journal->j_transaction_sequence, journal->j_head);
jbd2_journal_clear_revoke(journal); /* Free revoke table allocated for replay */ if (journal->j_revoke != journal->j_revoke_table[0] &&
journal->j_revoke != journal->j_revoke_table[1]) {
jbd2_journal_destroy_revoke_table(journal->j_revoke);
journal->j_revoke = journal->j_revoke_table[1];
}
err2 = sync_blockdev(journal->j_fs_dev); if (!err)
err = err2;
err2 = jbd2_check_fs_dev_write_error(journal); if (!err)
err = err2; /* Make sure all replayed data is on permanent storage */ if (journal->j_flags & JBD2_BARRIER) {
err2 = blkdev_issue_flush(journal->j_fs_dev); if (!err)
err = err2;
} return err;
}
/** * jbd2_journal_skip_recovery - Start journal and wipe exiting records * @journal: journal to startup * * Locate any valid recovery information from the journal and set up the * journal structures in memory to ignore it (presumably because the * caller has evidence that it is out of date). * This function doesn't appear to be exported.. * * We perform one pass over the journal to allow us to tell the user how * much recovery information is being erased, and to let us initialise * the journal transaction sequence numbers to the next unused ID.
*/ int jbd2_journal_skip_recovery(journal_t *journal)
{ int err;
/* * calc_chksums calculates the checksums for the blocks described in the * descriptor block.
*/ staticint calc_chksums(journal_t *journal, struct buffer_head *bh, unsignedlong *next_log_block, __u32 *crc32_sum)
{ int i, num_blks, err; unsignedlong io_block; struct buffer_head *obh;
num_blks = count_tags(journal, bh); /* Calculate checksum of the descriptor block. */
*crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
/* If the block has been revoked, then we're all done here. */ if (jbd2_journal_test_revoke(journal, blocknr,
next_commit_ID)) {
brelse(obh);
++info->nr_revoke_hits; goto skip_write;
}
/* Look for block corruption */ if (!jbd2_block_tag_csum_verify(journal, &tag,
(journal_block_tag3_t *)tagp,
obh->b_data, next_commit_ID)) {
brelse(obh);
ret = -EFSBADCRC;
pr_err("JBD2: Invalid checksum recovering data block %llu in journal block %lu\n",
blocknr, io_block); goto skip_write;
}
/* Find a buffer for the new data being restored */
nbh = __getblk(journal->j_fs_dev, blocknr,
journal->j_blocksize); if (nbh == NULL) {
pr_err("JBD2: Out of memory during recovery.\n");
brelse(obh); return -ENOMEM;
}
/* * First thing is to establish what we expect to find in the log * (in terms of transaction IDs), and where (in terms of log * block offsets): query the superblock.
*/
first_commit_ID = next_commit_ID; if (pass == PASS_SCAN)
info->start_transaction = first_commit_ID; elseif (pass == PASS_REVOKE) { /* * Would the default revoke table have too long hash chains * during replay?
*/ if (info->nr_revokes > JOURNAL_REVOKE_DEFAULT_HASH * 16) { unsignedint hash_size;
/* * Aim for average chain length of 8, limit at 1M * entries to avoid problems with malicious * filesystems.
*/
hash_size = min(roundup_pow_of_two(info->nr_revokes / 8),
1U << 20);
journal->j_revoke =
jbd2_journal_init_revoke_table(hash_size); if (!journal->j_revoke) {
printk(KERN_ERR "JBD2: failed to allocate revoke table for replay with %u entries. " "Journal replay may be slow.\n", hash_size);
journal->j_revoke = journal->j_revoke_table[1];
}
}
}
/* * Now we walk through the log, transaction by transaction, * making sure that each transaction has a commit block in the * expected place. Each complete transaction gets replayed back * into the main filesystem.
*/
while (1) {
cond_resched();
/* If we already know where to stop the log traversal, * check right now that we haven't gone past the end of
* the log. */
if (pass != PASS_SCAN) if (tid_geq(next_commit_ID, info->end_transaction)) break;
jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
next_commit_ID, next_log_block, journal->j_last);
/* Skip over each chunk of the transaction looking * either the next descriptor block or the final commit
* record. */
/* OK, we have a valid descriptor block which matches * all of the sequence number checks. What are we going
* to do with it? That depends on the pass... */
switch(blocktype) { case JBD2_DESCRIPTOR_BLOCK: /* Verify checksum first */ if (!jbd2_descriptor_block_csum_verify(journal,
bh->b_data)) { /* * PASS_SCAN can see stale blocks due to lazy * journal init. Don't error out on those yet.
*/ if (pass != PASS_SCAN) {
pr_err("JBD2: Invalid checksum recovering block %lu in log\n",
next_log_block);
err = -EFSBADCRC; goto failed;
}
need_check_commit_time = true;
jbd2_debug(1, "invalid descriptor block found in %lu\n",
next_log_block);
}
/* If it is a valid descriptor block, replay it * in pass REPLAY; if journal_checksums enabled, then * calculate checksums in PASS_SCAN, otherwise,
* just skip over the blocks it describes. */ if (pass != PASS_REPLAY) { if (pass == PASS_SCAN &&
jbd2_has_feature_checksum(journal) &&
!info->end_transaction) { if (calc_chksums(journal, bh,
&next_log_block,
&crc32_sum)) break; continue;
}
next_log_block += count_tags(journal, bh);
wrap(journal, next_log_block); continue;
}
/* * A descriptor block: we can now write all of the * data blocks. Yay, useful work is finally getting * done here!
*/
err = jbd2_do_replay(journal, info, bh, &next_log_block,
next_commit_ID); if (err) { if (err == -ENOMEM) goto failed;
success = err;
}
continue;
case JBD2_COMMIT_BLOCK: if (pass != PASS_SCAN) {
next_commit_ID++; continue;
}
/* How to differentiate between interrupted commit * and journal corruption ? * * {nth transaction} * Checksum Verification Failed * | * ____________________ * | | * async_commit sync_commit * | | * | GO TO NEXT "Journal Corruption" * | TRANSACTION * | * {(n+1)th transanction} * | * _______|______________ * | | * Commit block found Commit block not found * | | * "Journal Corruption" | * _____________|_________ * | | * nth trans corrupt OR nth trans * and (n+1)th interrupted interrupted * before commit block * could reach the disk. * (Cannot find the difference in above * mentioned conditions. Hence assume * "Interrupted Commit".)
*/
commit_time = be64_to_cpu(
((struct commit_header *)bh->b_data)->h_commit_sec); /* * If need_check_commit_time is set, it means we are in * PASS_SCAN and csum verify failed before. If * commit_time is increasing, it's the same journal, * otherwise it is stale journal block, just end this * recovery.
*/ if (need_check_commit_time) { if (commit_time >= last_trans_commit_time) {
pr_err("JBD2: Invalid checksum found in transaction %u\n",
next_commit_ID);
err = -EFSBADCRC; goto failed;
}
ignore_crc_mismatch: /* * It likely does not belong to same journal, * just end this recovery with success.
*/
jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
next_commit_ID); goto done;
}
/* * Found an expected commit block: if checksums * are present, verify them in PASS_SCAN; else not * much to do other than move on to the next sequence * number.
*/ if (jbd2_has_feature_checksum(journal)) { struct commit_header *cbh =
(struct commit_header *)bh->b_data; unsigned found_chksum =
be32_to_cpu(cbh->h_chksum[0]);
if (info->end_transaction) {
journal->j_failed_commit =
info->end_transaction; break;
}
case JBD2_REVOKE_BLOCK: /* * If we aren't in the SCAN or REVOKE pass, then we can * just skip over this block.
*/ if (pass != PASS_REVOKE && pass != PASS_SCAN) continue;
/* * Check revoke block crc in pass_scan, if csum verify * failed, check commit block time later.
*/ if (pass == PASS_SCAN &&
!jbd2_descriptor_block_csum_verify(journal,
bh->b_data)) {
jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n",
next_log_block);
need_check_commit_time = true;
}
default:
jbd2_debug(3, "Unrecognised magic %d, end of scan.\n",
blocktype); goto done;
}
}
done:
brelse(bh); /* * We broke out of the log scan loop: either we came to the * known end of the log or we found an unexpected block in the * log. If the latter happened, then we know that the "current" * transaction marks the end of the valid log.
*/
if (pass == PASS_SCAN) { if (!info->end_transaction)
info->end_transaction = next_commit_ID; if (!info->head_block)
info->head_block = head_block;
} else { /* It's really bad news if different passes end up at
* different places (but possible due to IO errors). */ if (info->end_transaction != next_commit_ID) {
printk(KERN_ERR "JBD2: recovery pass %d ended at " "transaction %u, expected %u\n",
pass, next_commit_ID, info->end_transaction); if (!success)
success = -EIO;
}
}
if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) {
err = fc_do_one_pass(journal, info, pass); if (err)
success = err;
}
return success;
failed:
brelse(bh); return err;
}
/* Scan a revoke record, marking all blocks mentioned as revoked. */
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.