// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2003 Sistina Software Limited. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. * * This file is released under the GPL.
*/
/* *------------------------------------------------------------------ * Region hash * * The mirror splits itself up into discrete regions. Each * region can be in one of three states: clean, dirty, * nosync. There is no need to put clean regions in the hash. * * In addition to being present in the hash table a region _may_ * be present on one of three lists. * * clean_regions: Regions on this list have no io pending to * them, they are in sync, we are no longer interested in them, * they are dull. dm_rh_update_states() will remove them from the * hash table. * * quiesced_regions: These regions have been spun down, ready * for recovery. rh_recovery_start() will remove regions from * this list and hand them to kmirrord, which will schedule the * recovery io with kcopyd. * * recovered_regions: Regions that kcopyd has successfully * recovered. dm_rh_update_states() will now schedule any delayed * io, up the recovery_count, and remove the region from the * hash. * * There are 2 locks: * A rw spin lock 'hash_lock' protects just the hash table, * this is never held in write mode from interrupt context, * which I believe means that we only have to disable irqs when * doing a write lock. * * An ordinary spin lock 'region_lock' that protects the three * lists in the region_hash, with the 'state', 'list' and * 'delayed_bios' fields of the regions. This is used from irq * context, so all other uses will have to suspend local irqs. *------------------------------------------------------------------
*/ struct dm_region_hash {
uint32_t region_size; unsignedint region_shift;
/* holds persistent region state */ struct dm_dirty_log *log;
/* * FIXME: shall we pass in a structure instead of all these args to * dm_region_hash_create()????
*/ #define RH_HASH_MULT 2654435387U #define RH_HASH_SHIFT 12
/* * The region wasn't in the hash, so we fall back to the * dirty log.
*/
r = rh->log->type->in_sync(rh->log, region, may_block);
/* * Any error from the dirty log (eg. -EWOULDBLOCK) gets * taken as a DM_RH_NOSYNC
*/ return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
}
EXPORT_SYMBOL_GPL(dm_rh_get_state);
/* * Dispatch the bios before we call 'wake_up_all'. * This is important because if we are suspending, * we want to know that recovery is complete and * the work queue is flushed. If we wake_up_all * before we dispatch_bios (queue bios and call wake()), * then we risk suspending before the work queue * has been properly flushed.
*/
rh->dispatch_bios(rh->context, ®->delayed_bios); if (atomic_dec_and_test(&rh->recovery_in_flight))
rh->wakeup_all_recovery_waiters(rh->context);
up(&rh->recovery_count);
}
/* dm_rh_mark_nosync * @ms * @bio * * The bio was written on some mirror(s) but failed on other mirror(s). * We can successfully endio the bio but should avoid the region being * marked clean by setting the state DM_RH_NOSYNC. * * This function is _not_ safe in interrupt context!
*/ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
{ unsignedlong flags; struct dm_dirty_log *log = rh->log; struct dm_region *reg;
region_t region = dm_rh_bio_to_region(rh, bio); int recovering = 0;
if (bio->bi_opf & REQ_PREFLUSH) {
rh->flush_failure = 1; return;
}
if (bio_op(bio) == REQ_OP_DISCARD) return;
/* We must inform the log that the sync count has changed. */
log->type->set_region_sync(log, region, 0);
/* region hash entry should exist because write was in-flight */
BUG_ON(!reg);
BUG_ON(!list_empty(®->list));
spin_lock_irqsave(&rh->region_lock, flags); /* * Possible cases: * 1) DM_RH_DIRTY * 2) DM_RH_NOSYNC: was dirty, other preceding writes failed * 3) DM_RH_RECOVERING: flushing pending writes * Either case, the region should have not been connected to list.
*/
recovering = (reg->state == DM_RH_RECOVERING);
reg->state = DM_RH_NOSYNC;
BUG_ON(!list_empty(®->list));
spin_unlock_irqrestore(&rh->region_lock, flags);
if (recovering)
complete_resync_work(reg, 0);
}
EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
/* * All the regions on the recovered and clean lists have * now been pulled out of the system, so no need to do * any more locking.
*/
list_for_each_entry_safe(reg, next, &recovered, list) {
rh->log->type->clear_region(rh->log, reg->key);
complete_resync_work(reg, 1);
mempool_free(reg, &rh->region_pool);
}
spin_lock_irqsave(&rh->region_lock, flags); if (atomic_dec_and_test(®->pending)) { /* * There is no pending I/O for this region. * We can move the region to corresponding list for next action. * At this point, the region is not yet connected to any list. * * If the state is DM_RH_NOSYNC, the region should be kept off * from clean list. * The hash entry for DM_RH_NOSYNC will remain in memory * until the region is recovered or the map is reloaded.
*/
/* do nothing for DM_RH_NOSYNC */ if (unlikely(rh->flush_failure)) { /* * If a write flush failed some time ago, we * don't know whether or not this write made it * to the disk, so we must resync the device.
*/
reg->state = DM_RH_NOSYNC;
} elseif (reg->state == DM_RH_RECOVERING) {
list_add_tail(®->list, &rh->quiesced_regions);
} elseif (reg->state == DM_RH_DIRTY) {
reg->state = DM_RH_CLEAN;
list_add(®->list, &rh->clean_regions);
}
should_wake = 1;
}
spin_unlock_irqrestore(&rh->region_lock, flags);
if (should_wake)
rh->wakeup_workers(rh->context);
}
EXPORT_SYMBOL_GPL(dm_rh_dec);
/* * Starts quiescing a region in preparation for recovery.
*/ staticint __rh_recovery_prepare(struct dm_region_hash *rh)
{ int r;
region_t region; struct dm_region *reg;
/* * Ask the dirty log what's next.
*/
r = rh->log->type->get_resync_work(rh->log, ®ion); if (r <= 0) return r;
/* * Get this region, and start it quiescing by setting the * recovering flag.
*/
read_lock(&rh->hash_lock);
reg = __rh_find(rh, region);
read_unlock(&rh->hash_lock);
void dm_rh_recovery_prepare(struct dm_region_hash *rh)
{ /* Extra reference to avoid race with dm_rh_stop_recovery */
atomic_inc(&rh->recovery_in_flight);
while (!down_trylock(&rh->recovery_count)) {
atomic_inc(&rh->recovery_in_flight); if (__rh_recovery_prepare(rh) <= 0) {
atomic_dec(&rh->recovery_in_flight);
up(&rh->recovery_count); break;
}
}
/* Drop the extra reference */ if (atomic_dec_and_test(&rh->recovery_in_flight))
rh->wakeup_all_recovery_waiters(rh->context);
}
EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
spin_lock_irq(&rh->region_lock); if (!list_empty(&rh->quiesced_regions)) {
reg = list_entry(rh->quiesced_regions.next, struct dm_region, list);
list_del_init(®->list); /* remove from the quiesced list */
}
spin_unlock_irq(&rh->region_lock);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.