/* * When searching for deduplication records, the index first searches the volume index, and then * searches the chapter index for the relevant chapter. If the chapter has been fully committed to * storage, the chapter pages are loaded into the page cache. If the chapter has not yet been * committed (either the open chapter or a recently closed one), the index searches the in-memory * representation of the chapter. Finally, if the volume index does not find a record and the index * is sparse, the index will search the sparse cache. * * The index send two kinds of messages to coordinate between zones: chapter close messages for the * chapter writer, and sparse cache barrier messages for the sparse cache. * * The chapter writer is responsible for committing chapters of records to storage. Since zones can * get different numbers of records, some zones may fall behind others. Each time a zone fills up * its available space in a chapter, it informs the chapter writer that the chapter is complete, * and also informs all other zones that it has closed the chapter. Each other zone will then close * the chapter immediately, regardless of how full it is, in order to minimize skew between zones. * Once every zone has closed the chapter, the chapter writer will commit that chapter to storage. * * The last zone to close the chapter also removes the oldest chapter from the volume index. * Although that chapter is invalid for zones that have moved on, the existence of the open chapter * means that those zones will never ask the volume index about it. No zone is allowed to get more * than one chapter ahead of any other. If a zone is so far ahead that it tries to close another * chapter before the previous one has been closed by all zones, it is forced to wait. * * The sparse cache relies on having the same set of chapter indexes available to all zones. When a * request wants to add a chapter to the sparse cache, it sends a barrier message to each zone * during the triage stage that acts as a rendezvous. Once every zone has reached the barrier and * paused its operations, the cache membership is changed and each zone is then informed that it * can proceed. More details can be found in the sparse cache documentation. * * If a sparse cache has only one zone, it will not create a triage queue, but it still needs the * barrier message to change the sparse cache membership, so the index simulates the message by * invoking the handler directly.
*/
struct chapter_writer { /* The index to which we belong */ struct uds_index *index; /* The thread to do the writing */ struct thread *thread; /* The lock protecting the following fields */ struct mutex mutex; /* The condition signalled on state changes */ struct cond_var cond; /* Set to true to stop the thread */ bool stop; /* The result from the most recent write */ int result; /* The number of bytes allocated by the chapter writer */
size_t memory_size; /* The number of zones which have submitted a chapter for writing */ unsignedint zones_to_write; /* Open chapter index used by uds_close_open_chapter() */ struct open_chapter_index *open_chapter_index; /* Collated records used by uds_close_open_chapter() */ struct uds_volume_record *collated_records; /* The chapters to write (one per zone) */ struct open_chapter_zone *chapters[];
};
/* * Determine whether this request should trigger a sparse cache barrier message to change the * membership of the sparse cache. If a change in membership is desired, the function returns the * chapter number to add.
*/ static u64 triage_index_request(struct uds_index *index, struct uds_request *request)
{
u64 virtual_chapter; struct index_zone *zone;
virtual_chapter = uds_lookup_volume_index_name(index->volume_index,
&request->record_name); if (virtual_chapter == NO_CHAPTER) return NO_CHAPTER;
zone = index->zones[request->zone_number]; if (!is_zone_chapter_sparse(zone, virtual_chapter)) return NO_CHAPTER;
/* * FIXME: Optimize for a common case by remembering the chapter from the most recent * barrier message and skipping this chapter if is it the same.
*/
return virtual_chapter;
}
/* * Simulate a message to change the sparse cache membership for a single-zone sparse index. This * allows us to forgo the complicated locking required by a multi-zone sparse index. Any other kind * of index does nothing here.
*/ staticint simulate_index_zone_barrier_message(struct index_zone *zone, struct uds_request *request)
{
u64 sparse_virtual_chapter;
if ((zone->index->zone_count > 1) ||
!uds_is_sparse_index_geometry(zone->index->volume->geometry)) return UDS_SUCCESS;
sparse_virtual_chapter = triage_index_request(zone->index, request); if (sparse_virtual_chapter == NO_CHAPTER) return UDS_SUCCESS;
/* This is the request processing function for the triage queue. */ staticvoid triage_request(struct uds_request *request)
{ struct uds_index *index = request->index;
u64 sparse_virtual_chapter = triage_index_request(index, request);
if (sparse_virtual_chapter != NO_CHAPTER)
enqueue_barrier_messages(index, sparse_virtual_chapter);
/* * Inform the chapter writer that this zone is done with this chapter. The chapter won't start * writing until all zones have closed it.
*/ staticunsignedint start_closing_chapter(struct uds_index *index, unsignedint zone_number, struct open_chapter_zone *chapter)
{ unsignedint finished_zones; struct chapter_writer *writer = index->chapter_writer;
result = uds_get_volume_index_record(zone->index->volume_index,
&request->record_name, &record); if (result != UDS_SUCCESS) return result;
if (record.is_found) { if (request->requeued && request->virtual_chapter != record.virtual_chapter)
set_request_location(request, UDS_LOCATION_UNKNOWN);
request->virtual_chapter = record.virtual_chapter;
result = get_record_from_zone(zone, request, &found); if (result != UDS_SUCCESS) return result;
}
if (found)
set_chapter_location(request, zone, record.virtual_chapter);
/* * If a record has overflowed a chapter index in more than one chapter (or overflowed in * one chapter and collided with an existing record), it will exist as a collision record * in the volume index, but we won't find it in the volume. This case needs special * handling.
*/
overflow_record = (record.is_found && record.is_collision && !found);
chapter = zone->newest_virtual_chapter; if (found || overflow_record) { if ((request->type == UDS_QUERY_NO_UPDATE) ||
((request->type == UDS_QUERY) && overflow_record)) { /* There is nothing left to do. */ return UDS_SUCCESS;
}
if (record.virtual_chapter != chapter) { /* * Update the volume index to reference the new chapter for the block. If * the record had been deleted or dropped from the chapter index, it will * be back.
*/
result = uds_set_volume_index_record_chapter(&record, chapter);
} elseif (request->type != UDS_UPDATE) { /* The record is already in the open chapter. */ return UDS_SUCCESS;
}
} else { /* * The record wasn't in the volume index, so check whether the * name is in a cached sparse chapter. If we found the name on * a previous search, use that result instead.
*/ if (request->location == UDS_LOCATION_RECORD_PAGE_LOOKUP) {
found = true;
} elseif (request->location == UDS_LOCATION_UNAVAILABLE) {
found = false;
} elseif (uds_is_sparse_index_geometry(zone->index->volume->geometry) &&
!uds_is_volume_index_sample(zone->index->volume_index,
&request->record_name)) {
result = search_sparse_cache_in_zone(zone, request, NO_CHAPTER,
&found); if (result != UDS_SUCCESS) return result;
}
if (found)
set_request_location(request, UDS_LOCATION_IN_SPARSE);
if ((request->type == UDS_QUERY_NO_UPDATE) ||
((request->type == UDS_QUERY) && !found)) { /* There is nothing left to do. */ return UDS_SUCCESS;
}
/* * Add a new entry to the volume index referencing the open chapter. This needs to * be done both for new records, and for records from cached sparse chapters.
*/
result = uds_put_volume_index_record(&record, chapter);
}
if (result == UDS_OVERFLOW) { /* * The volume index encountered a delta list overflow. The condition was already * logged. We will go on without adding the record to the open chapter.
*/ return UDS_SUCCESS;
}
if (result != UDS_SUCCESS) return result;
if (!found || (request->type == UDS_UPDATE)) { /* This is a new record or we're updating an existing record. */
metadata = &request->new_metadata;
} else { /* Move the existing record to the open chapter. */
metadata = &request->old_metadata;
}
result = uds_get_volume_index_record(zone->index->volume_index,
&request->record_name, &record); if (result != UDS_SUCCESS) return result;
if (!record.is_found) return UDS_SUCCESS;
/* If the request was requeued, check whether the saved state is still valid. */
if (record.is_collision) {
set_chapter_location(request, zone, record.virtual_chapter);
} else { /* Non-collision records are hints, so resolve the name in the chapter. */ bool found;
if (request->requeued && request->virtual_chapter != record.virtual_chapter)
set_request_location(request, UDS_LOCATION_UNKNOWN);
request->virtual_chapter = record.virtual_chapter;
result = get_record_from_zone(zone, request, &found); if (result != UDS_SUCCESS) return result;
if (!found) { /* There is no record to remove. */ return UDS_SUCCESS;
}
}
/* * Delete the volume index entry for the named record only. Note that a later search might * later return stale advice if there is a colliding name in the same chapter, but it's a * very rare case (1 in 2^21).
*/
result = uds_remove_volume_index_record(&record); if (result != UDS_SUCCESS) return result;
/* * If the record is in the open chapter, we must remove it or mark it deleted to avoid * trouble if the record is added again later.
*/ if (request->location == UDS_LOCATION_IN_OPEN_CHAPTER)
uds_remove_from_open_chapter(zone->open_chapter, &request->record_name);
/* This is the request processing function invoked by each zone's thread. */ staticvoid execute_zone_request(struct uds_request *request)
{ int result; struct uds_index *index = request->index;
if (request->zone_message.type != UDS_MESSAGE_NONE) {
result = dispatch_index_zone_control_request(request); if (result != UDS_SUCCESS) {
vdo_log_error_strerror(result, "error executing message: %d",
request->zone_message.type);
}
/* Once the message is processed it can be freed. */
vdo_free(vdo_forget(request)); return;
}
for (i = 0; i < index->zone_count; i++) {
result = uds_make_request_queue("indexW", &execute_zone_request,
&index->zone_queues[i]); if (result != UDS_SUCCESS) return result;
}
/* The triage queue is only needed for sparse multi-zone indexes. */ if ((index->zone_count > 1) && uds_is_sparse_index_geometry(geometry)) {
result = uds_make_request_queue("triageW", &triage_request,
&index->triage_queue); if (result != UDS_SUCCESS) return result;
}
return UDS_SUCCESS;
}
/* This is the driver function for the chapter writer thread. */ staticvoid close_chapters(void *arg)
{ int result; struct chapter_writer *writer = arg; struct uds_index *index = writer->index;
vdo_log_debug("chapter writer starting");
mutex_lock(&writer->mutex); for (;;) { while (writer->zones_to_write < index->zone_count) { if (writer->stop && (writer->zones_to_write == 0)) { /* * We've been told to stop, and all of the zones are in the same * open chapter, so we can exit now.
*/
mutex_unlock(&writer->mutex);
vdo_log_debug("chapter writer stopping"); return;
}
uds_wait_cond(&writer->cond, &writer->mutex);
}
/* * Release the lock while closing a chapter. We probably don't need to do this, but * it seems safer in principle. It's OK to access the chapter and chapter_number * fields without the lock since those aren't allowed to change until we're done.
*/
mutex_unlock(&writer->mutex);
if (index->has_saved_open_chapter) { /* * Remove the saved open chapter the first time we close an open chapter * after loading from a clean shutdown, or after doing a clean save. The * lack of the saved open chapter will indicate that a recovery is * necessary.
*/
index->has_saved_open_chapter = false;
result = uds_discard_open_chapter(index->layout); if (result == UDS_SUCCESS)
vdo_log_debug("Discarding saved open chapter");
}
result = uds_close_open_chapter(writer->chapters, index->zone_count,
index->volume,
writer->open_chapter_index,
writer->collated_records,
index->newest_virtual_chapter);
vdo_log_info("loaded index from chapter %llu through chapter %llu",
(unsignedlonglong) index->oldest_virtual_chapter,
(unsignedlonglong) last_save_chapter);
if (will_be_sparse_chapter &&
!uds_is_volume_index_sample(index->volume_index, name)) { /* * This entry will be in a sparse chapter after the rebuild completes, and it is * not a sample, so just skip over it.
*/ return UDS_SUCCESS;
}
result = uds_get_volume_index_record(index->volume_index, name, &record); if (result != UDS_SUCCESS) return result;
if (record.is_found) { if (record.is_collision) { if (record.virtual_chapter == virtual_chapter) { /* The record is already correct. */ return UDS_SUCCESS;
}
update_record = true;
} elseif (record.virtual_chapter == virtual_chapter) { /* * There is a volume index entry pointing to the current chapter, but we * don't know if it is for the same name as the one we are currently * working on or not. For now, we're just going to assume that it isn't. * This will create one extra collision record if there was a deleted * record in the current chapter.
*/
update_record = false;
} else { /* * If we're rebuilding, we don't normally want to go to disk to see if the * record exists, since we will likely have just read the record from disk * (i.e. we know it's there). The exception to this is when we find an * entry in the volume index that has a different chapter. In this case, we * need to search that chapter to determine if the volume index entry was * for the same record or a different one.
*/
result = uds_search_volume_page_cache_for_rebuild(index->volume,
name,
record.virtual_chapter,
&update_record); if (result != UDS_SUCCESS) return result;
}
} else {
update_record = false;
}
if (update_record) { /* * Update the volume index to reference the new chapter for the block. If the * record had been deleted or dropped from the chapter index, it will be back.
*/
result = uds_set_volume_index_record_chapter(&record, virtual_chapter);
} else { /* * Add a new entry to the volume index referencing the open chapter. This should be * done regardless of whether we are a brand new record or a sparse record, i.e. * one that doesn't exist in the index but does on disk, since for a sparse record, * we would want to un-sparsify if it did exist.
*/
result = uds_put_volume_index_record(&record, virtual_chapter);
}
if ((result == UDS_DUPLICATE_NAME) || (result == UDS_OVERFLOW)) { /* The rebuilt index will lose these records. */ return UDS_SUCCESS;
}
mutex_lock(&index->load_context->mutex); if (index->load_context->status != INDEX_SUSPENDING) {
mutex_unlock(&index->load_context->mutex); returnfalse;
}
/* Notify that we are suspended and wait for the resume. */
index->load_context->status = INDEX_SUSPENDED;
uds_broadcast_cond(&index->load_context->cond);
while ((index->load_context->status != INDEX_OPENING) &&
(index->load_context->status != INDEX_FREEING))
uds_wait_cond(&index->load_context->cond, &index->load_context->mutex);
result = rebuild_index_page_map(index, virtual); if (result != UDS_SUCCESS) { return vdo_log_error_strerror(result, "could not rebuild index page map for chapter %u",
physical_chapter);
}
for (i = 0; i < geometry->record_pages_per_chapter; i++) {
u8 *record_page;
u32 record_page_number;
record_page_number = geometry->index_pages_per_chapter + i;
result = uds_get_volume_record_page(index->volume, physical_chapter,
record_page_number, &record_page); if (result != UDS_SUCCESS) { return vdo_log_error_strerror(result, "could not get page %d",
record_page_number);
}
vdo_log_info("Replaying volume from chapter %llu through chapter %llu",
(unsignedlonglong) from_virtual,
(unsignedlonglong) upto_virtual);
/* * The index failed to load, so the volume index is empty. Add records to the volume index * in order, skipping non-hooks in chapters which will be sparse to save time. * * Go through each record page of each chapter and add the records back to the volume * index. This should not cause anything to be written to either the open chapter or the * on-disk volume. Also skip the on-disk chapter corresponding to upto_virtual, as this * would have already been purged from the volume index when the chapter was opened. * * Also, go through each index page for each chapter and rebuild the index page map.
*/
old_map_update = index->volume->index_page_map->last_update; for (virtual = from_virtual; virtual < upto_virtual; virtual++) {
will_be_sparse = uds_is_chapter_sparse(index->volume->geometry,
from_virtual, upto_virtual, virtual);
result = replay_chapter(index, virtual, will_be_sparse); if (result != UDS_SUCCESS) return result;
}
/* Also reap the chapter being replaced by the open chapter. */
uds_set_volume_index_open_chapter(index->volume_index, upto_virtual);
new_map_update = index->volume->index_page_map->last_update; if (new_map_update != old_map_update) {
vdo_log_info("replay changed index page map update from %llu to %llu",
(unsignedlonglong) old_map_update,
(unsignedlonglong) new_map_update);
}
result = vdo_allocate_extended(struct uds_index, config->zone_count, struct uds_request_queue *, "index", &index); if (result != VDO_SUCCESS) return result;
index->zone_count = config->zone_count;
result = uds_make_index_layout(config, new, &index->layout); if (result != UDS_SUCCESS) {
uds_free_index(index); return result;
}
result = vdo_allocate(index->zone_count, struct index_zone *, "zones",
&index->zones); if (result != VDO_SUCCESS) {
uds_free_index(index); return result;
}
result = uds_make_volume(config, index->layout, &index->volume); if (result != UDS_SUCCESS) {
uds_free_index(index); return result;
}
index->volume->lookup_mode = LOOKUP_NORMAL; for (z = 0; z < index->zone_count; z++) {
result = make_index_zone(index, z); if (result != UDS_SUCCESS) {
uds_free_index(index); return vdo_log_error_strerror(result, "Could not create index zone");
}
}
nonce = uds_get_volume_nonce(index->layout);
result = uds_make_volume_index(config, nonce, &index->volume_index); if (result != UDS_SUCCESS) {
uds_free_index(index); return vdo_log_error_strerror(result, "could not make volume index");
}
result = initialize_index_queues(index, config->geometry); if (result != UDS_SUCCESS) {
uds_free_index(index); return result;
}
result = make_chapter_writer(index, &index->chapter_writer); if (result != UDS_SUCCESS) {
uds_free_index(index); return result;
}
if (!new) {
result = load_index(index); switch (result) { case UDS_SUCCESS:
loaded = true; break; case -ENOMEM: /* We should not try a rebuild for this error. */
vdo_log_error_strerror(result, "index could not be loaded"); break; default:
vdo_log_error_strerror(result, "index could not be loaded"); if (open_type == UDS_LOAD) {
result = rebuild_index(index); if (result != UDS_SUCCESS) {
vdo_log_error_strerror(result, "index could not be rebuilt");
}
} break;
}
}
if (result != UDS_SUCCESS) {
uds_free_index(index); return vdo_log_error_strerror(result, "fatal error in %s()", __func__);
}
for (z = 0; z < index->zone_count; z++) {
zone = index->zones[z];
zone->oldest_virtual_chapter = index->oldest_virtual_chapter;
zone->newest_virtual_chapter = index->newest_virtual_chapter;
}
if (index->load_context != NULL) {
mutex_lock(&index->load_context->mutex);
index->load_context->status = INDEX_READY; /* * If we get here, suspend is meaningless, but notify any thread trying to suspend * us so it doesn't hang.
*/
uds_broadcast_cond(&index->load_context->cond);
mutex_unlock(&index->load_context->mutex);
}
uds_request_queue_finish(index->triage_queue); for (i = 0; i < index->zone_count; i++)
uds_request_queue_finish(index->zone_queues[i]);
free_chapter_writer(index->chapter_writer);
uds_free_volume_index(index->volume_index); if (index->zones != NULL) { for (i = 0; i < index->zone_count; i++)
free_index_zone(index->zones[i]);
vdo_free(index->zones);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.