/* * The block map is responsible for tracking all the logical to physical mappings of a VDO. It * consists of a collection of 60 radix trees gradually allocated as logical addresses are used. * Each tree is assigned to a logical zone such that it is easy to compute which zone must handle * each logical address. Each logical zone also has a dedicated portion of the leaf page cache. * * Each logical zone has a single dedicated queue and thread for performing all updates to the * radix trees assigned to that zone. The concurrency guarantees of this single-threaded model * allow the code to omit more fine-grained locking for the block map structures. * * Load operations must be performed on the admin thread. Normal operations, such as reading and * updating mappings, must be performed on the appropriate logical zone thread. Save operations * must be launched from the same admin thread as the original load operation.
*/
/* The VDO Page Cache abstraction. */ struct vdo_page_cache { /* the VDO which owns this cache */ struct vdo *vdo; /* number of pages in cache */
page_count_t page_count; /* number of pages to write in the current batch */
page_count_t pages_in_batch; /* Whether the VDO is doing a read-only rebuild */ bool rebuilding;
/* array of page information entries */ struct page_info *infos; /* raw memory for pages */ char *pages; /* cache last found page info */ struct page_info *last_found; /* map of page number to info */ struct int_map *page_map; /* main LRU list (all infos) */ struct list_head lru_list; /* free page list (oldest first) */ struct list_head free_list; /* outgoing page list */ struct list_head outgoing_list; /* number of read I/O operations pending */
page_count_t outstanding_reads; /* number of write I/O operations pending */
page_count_t outstanding_writes; /* number of pages covered by the current flush */
page_count_t pages_in_flush; /* number of pages waiting to be included in the next flush */
page_count_t pages_to_flush; /* number of discards in progress */ unsignedint discard_count; /* how many VPCs waiting for free page */ unsignedint waiter_count; /* queue of waiters who want a free page */ struct vdo_wait_queue free_waiters; /* * Statistics are only updated on the logical zone thread, but are accessed from other * threads.
*/ struct block_map_statistics stats; /* counter for pressure reports */
u32 pressure_report; /* the block map zone to which this cache belongs */ struct block_map_zone *zone;
};
/* * The state of a page buffer. If the page buffer is free no particular page is bound to it, * otherwise the page buffer is bound to particular page whose absolute pbn is in the pbn field. If * the page is resident or dirty the page data is stable and may be accessed. Otherwise the page is * in flight (incoming or outgoing) and its data should not be accessed. * * @note Update the static data in get_page_state_name() if you change this enumeration.
*/ enum vdo_page_buffer_state { /* this page buffer is not being used */
PS_FREE, /* this page is being read from store */
PS_INCOMING, /* attempt to load this page failed */
PS_FAILED, /* this page is valid and un-modified */
PS_RESIDENT, /* this page is valid and modified */
PS_DIRTY, /* this page is being written and should not be used */
PS_OUTGOING, /* not a state */
PAGE_STATE_COUNT,
} __packed;
/* * The write status of page
*/ enum vdo_page_write_status {
WRITE_STATUS_NORMAL,
WRITE_STATUS_DISCARD,
WRITE_STATUS_DEFERRED,
} __packed;
/* Per-page-slot information. */ struct page_info { /* Preallocated page struct vio */ struct vio *vio; /* back-link for references */ struct vdo_page_cache *cache; /* the pbn of the page */
physical_block_number_t pbn; /* page is busy (temporarily locked) */
u16 busy; /* the write status the page */ enum vdo_page_write_status write_status; /* page state */ enum vdo_page_buffer_state state; /* queue of completions awaiting this item */ struct vdo_wait_queue waiting; /* state linked list entry */ struct list_head state_entry; /* LRU entry */ struct list_head lru_entry; /* * The earliest recovery journal block containing uncommitted updates to the block map page * associated with this page_info. A reference (lock) is held on that block to prevent it * from being reaped. When this value changes, the reference on the old value must be * released and a reference on the new value must be acquired.
*/
sequence_number_t recovery_lock;
};
/* * A completion awaiting a specific page. Also a live reference into the page once completed, until * freed.
*/ struct vdo_page_completion { /* The generic completion */ struct vdo_completion completion; /* The cache involved */ struct vdo_page_cache *cache; /* The waiter for the pending list */ struct vdo_waiter waiter; /* The absolute physical block number of the page on disk */
physical_block_number_t pbn; /* Whether the page may be modified */ bool writable; /* Whether the page is available */ bool ready; /* The info structure for the page, only valid when ready */ struct page_info *info;
};
struct forest;
struct tree_page { struct vdo_waiter waiter;
/* Dirty list entry */ struct list_head entry;
/* If dirty, the tree zone flush generation in which it was last dirtied. */
u8 generation;
/* Whether this page is an interior tree page being written out. */ bool writing;
/* If writing, the tree zone flush generation of the copy being written. */
u8 writing_generation;
/* * Sequence number of the earliest recovery journal block containing uncommitted updates to * this page
*/
sequence_number_t recovery_lock;
/* The value of recovery_lock when the this page last started writing */
sequence_number_t writing_recovery_lock;
struct dirty_lists { /* The number of periods after which an element will be expired */
block_count_t maximum_age; /* The oldest period which has unexpired elements */
sequence_number_t oldest_period; /* One more than the current period */
sequence_number_t next_period; /* The offset in the array of lists of the oldest period */
block_count_t offset; /* Expired pages */
dirty_era_t expired; /* The lists of dirty pages */
dirty_era_t eras[];
};
struct block_map_zone {
zone_count_t zone_number;
thread_id_t thread_id; struct admin_state state; struct block_map *block_map; /* Dirty pages, by era*/ struct dirty_lists *dirty_lists; struct vdo_page_cache page_cache;
data_vio_count_t active_lookups; struct int_map *loading_pages; struct vio_pool *vio_pool; /* The tree page which has issued or will be issuing a flush */ struct tree_page *flusher; struct vdo_wait_queue flush_waiters; /* The generation after the most recent flush */
u8 generation;
u8 oldest_generation; /* The counts of dirty pages in each generation */
u32 dirty_page_counts[256];
};
struct block_map { struct vdo *vdo; struct action_manager *action_manager; /* The absolute PBN of the first root of the tree part of the block map */
physical_block_number_t root_origin;
block_count_t root_count;
/* The era point we are currently distributing to the zones */
sequence_number_t current_era_point; /* The next era point */
sequence_number_t pending_era_point;
/* The number of entries in block map */
block_count_t entry_count;
nonce_t nonce; struct recovery_journal *journal;
/* The trees for finding block map pages */ struct forest *forest; /* The expanded trees awaiting growth */ struct forest *next_forest; /* The number of entries after growth */
block_count_t next_entry_count;
/** * typedef vdo_entry_callback_fn - A function to be called for each allocated PBN when traversing * the forest. * @pbn: A PBN of a tree node. * @completion: The parent completion of the traversal. * * Return: VDO_SUCCESS or an error.
*/ typedefint (*vdo_entry_callback_fn)(physical_block_number_t pbn, struct vdo_completion *completion);
/** * vdo_convert_maximum_age() - Convert the maximum age to reflect the new recovery journal format * @age: The configured maximum age * * Return: The converted age * * In the old recovery journal format, each journal block held 311 entries, and every write bio * made two entries. The old maximum age was half the usable journal length. In the new format, * each block holds only 217 entries, but each bio only makes one entry. We convert the configured * age so that the number of writes in a block map era is the same in the old and new formats. This * keeps the bound on the amount of work required to recover the block map from the recovery * journal the same across the format change. It also keeps the amortization of block map page * writes to write bios the same.
*/ staticinline block_count_t vdo_convert_maximum_age(block_count_t age)
{ return DIV_ROUND_UP(age * RECOVERY_JOURNAL_1_ENTRIES_PER_BLOCK,
2 * RECOVERY_JOURNAL_ENTRIES_PER_BLOCK);
}
#endif/* VDO_BLOCK_MAP_H */
Messung V0.5
¤ Dauer der Verarbeitung: 0.14 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.