spin_lock_irq(&migf->list_lock);
list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
list_del_init(&buf->buf_elm); if (buf->allocated_length >= length) {
spin_unlock_irq(&migf->list_lock); goto found;
} /* * Prevent holding redundant buffers. Put in a free * list and call at the end not under the spin lock * (&migf->list_lock) to minimize its scope usage.
*/
list_add(&buf->buf_elm, &free_list);
}
spin_unlock_irq(&migf->list_lock);
buf = virtiovf_alloc_data_buffer(migf, length);
/* * This function is called in all state_mutex unlock cases to * handle a 'deferred_reset' if exists.
*/ staticvoid virtiovf_state_mutex_unlock(struct virtiovf_pci_core_device *virtvdev)
{
again:
spin_lock(&virtvdev->reset_lock); if (virtvdev->deferred_reset) {
virtvdev->deferred_reset = false;
spin_unlock(&virtvdev->reset_lock);
virtvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
virtiovf_disable_fds(virtvdev); goto again;
}
mutex_unlock(&virtvdev->state_mutex);
spin_unlock(&virtvdev->reset_lock);
}
/* * As the higher VFIO layers are holding locks across reset and using * those same locks with the mm_lock we need to prevent ABBA deadlock * with the state_mutex and mm_lock. * In case the state_mutex was taken already we defer the cleanup work * to the unlock flow of the other running context.
*/
spin_lock(&virtvdev->reset_lock);
virtvdev->deferred_reset = true; if (!mutex_trylock(&virtvdev->state_mutex)) {
spin_unlock(&virtvdev->reset_lock); return;
}
spin_unlock(&virtvdev->reset_lock);
virtiovf_state_mutex_unlock(virtvdev);
}
mutex_lock(&virtvdev->state_mutex); if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
ret = -EINVAL; goto err_state_unlock;
}
/* * The virtio specification does not include a PRE_COPY concept. * Since we can expect the data to remain the same for a certain period, * we use a rate limiter mechanism before making a call to the device.
*/ if (__ratelimit(&migf->pre_copy_rl_state)) {
ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
&ctx_size); if (ret) goto err_state_unlock;
}
mutex_lock(&migf->lock); if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
ret = -ENODEV; goto err_migf_unlock;
}
if (!end_of_data || !ctx_size) {
mutex_unlock(&migf->lock); goto done;
}
mutex_unlock(&migf->lock); /* * We finished transferring the current state and the device has a * dirty state, read a new state.
*/
ret = virtiovf_read_device_context_chunk(migf, ctx_size); if (ret) /* * The machine is running, and context size could be grow, so no reason to mark * the device state as VIRTIOVF_MIGF_STATE_ERROR.
*/ goto err_state_unlock;
buf = virtiovf_get_data_buffer(migf, ctx_size); if (IS_ERR(buf)) return PTR_ERR(buf);
/* Find the total count of SG entries which satisfies the size */
nent = sg_nents_for_len(buf->table.sgt.sgl, ctx_size); if (nent <= 0) {
ret = -EINVAL; goto out;
}
/* * Iterate to that SG entry and mark it as last (if it's not already) * to let underlay layers iterate only till that entry.
*/
for_each_sg(buf->table.sgt.sgl, sg, nent - 1, i)
;
if (!sg_is_last(sg)) {
unmark_end = true;
sg_mark_end(sg);
}
ret = virtio_pci_admin_dev_parts_get(migf->virtvdev->core_device.pdev,
VIRTIO_RESOURCE_OBJ_DEV_PARTS,
migf->obj_id,
VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL,
buf->table.sgt.sgl, &res_size); /* Restore the original SG mark end */ if (unmark_end)
sg_unmark_end(sg); if (ret) goto out;
buf->length = res_size;
header_buf = virtiovf_get_data_buffer(migf, sizeof(struct virtiovf_migration_header)); if (IS_ERR(header_buf)) {
ret = PTR_ERR(header_buf); goto out;
}
ret = virtiovf_add_buf_header(header_buf, res_size); if (ret) goto out_header;
lockdep_assert_held(&virtvdev->state_mutex);
ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
&obj_id); if (ret) goto out;
migf->obj_id = obj_id; /* Mark as having a valid obj id which can be even 0 */
migf->has_obj_id = true;
ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
&ctx_size); if (ret) goto out_clean;
if (!ctx_size) {
ret = -EINVAL; goto out_clean;
}
ret = virtiovf_read_device_context_chunk(migf, ctx_size); if (ret) goto out_clean;
if (pre_copy) {
migf->pre_copy_initial_bytes = migf->max_pos; /* Arbitrarily set the pre-copy rate limit to 1-second intervals */
ratelimit_state_init(&migf->pre_copy_rl_state, 1 * HZ, 1); /* Prevent any rate messages upon its usage */
ratelimit_set_flags(&migf->pre_copy_rl_state,
RATELIMIT_MSG_ON_RELEASE);
migf->state = VIRTIOVF_MIGF_STATE_PRECOPY;
} else {
migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
}
/* * Set the required object header at the beginning of the buffer. * The actual device parts data will be written post of the header offset.
*/ staticint virtiovf_set_obj_cmd_header(struct virtiovf_data_buffer *vhca_buf)
{ struct virtio_admin_cmd_resource_obj_cmd_hdr obj_hdr = {}; struct page *page;
u8 *to_buff;
if (vhca_buf->include_header_object) /* The buffer holds the object header, update the offset accordingly */
offset += sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
page_offset = offset % PAGE_SIZE;
page = virtiovf_get_migration_page(vhca_buf, offset - page_offset); if (!page) return -EINVAL;
while (len || has_work) {
has_work = false; switch (migf->load_state) { case VIRTIOVF_LOAD_STATE_READ_HEADER:
ret = virtiovf_resume_read_header(migf, vhca_buf_header, &buf,
&len, pos, &done, &has_work); if (ret) goto out_unlock; break; case VIRTIOVF_LOAD_STATE_PREP_HEADER_DATA: if (vhca_buf_header->allocated_length < migf->record_size) {
virtiovf_free_data_buffer(vhca_buf_header);
migf->buf_header = virtiovf_alloc_data_buffer(migf,
migf->record_size); if (IS_ERR(migf->buf_header)) {
ret = PTR_ERR(migf->buf_header);
migf->buf_header = NULL; goto out_unlock;
}
vhca_buf_header = migf->buf_header;
}
vhca_buf_header->start_pos = migf->max_pos;
migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER_DATA; break; case VIRTIOVF_LOAD_STATE_READ_HEADER_DATA:
ret = virtiovf_resume_read_header_data(migf, vhca_buf_header,
&buf, &len, pos, &done); if (ret) goto out_unlock; break; case VIRTIOVF_LOAD_STATE_PREP_CHUNK:
{
u32 cmd_size = migf->record_size + sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr);
/* * The DMA map/unmap is managed in virtio layer, we just need to extend * the SG pages to hold the extra required chunk data.
*/ if (vhca_buf->allocated_length < cmd_size) {
ret = virtiovf_add_migration_pages(vhca_buf,
DIV_ROUND_UP_ULL(cmd_size - vhca_buf->allocated_length,
PAGE_SIZE)); if (ret) goto out_unlock;
}
vhca_buf->start_pos = migf->max_pos;
migf->load_state = VIRTIOVF_LOAD_STATE_READ_CHUNK; break;
} case VIRTIOVF_LOAD_STATE_READ_CHUNK:
ret = virtiovf_resume_read_chunk(migf, vhca_buf, migf->record_size,
&buf, &len, pos, &done, &has_work); if (ret) goto out_unlock; break; case VIRTIOVF_LOAD_STATE_LOAD_CHUNK: /* Mark the last SG entry and set its length */
sg_mark_end(vhca_buf->last_offset_sg);
orig_length = vhca_buf->last_offset_sg->length; /* Length should include the resource object command header */
vhca_buf->last_offset_sg->length = vhca_buf->length + sizeof(struct virtio_admin_cmd_resource_obj_cmd_hdr) -
vhca_buf->last_offset;
ret = virtio_pci_admin_dev_parts_set(migf->virtvdev->core_device.pdev,
vhca_buf->table.sgt.sgl); /* Restore the original SG data */
vhca_buf->last_offset_sg->length = orig_length;
sg_unmark_end(vhca_buf->last_offset_sg); if (ret) goto out_unlock;
migf->load_state = VIRTIOVF_LOAD_STATE_READ_HEADER; /* be ready for reading the next chunk */
vhca_buf->length = 0; break; default: break;
}
}
out_unlock: if (ret)
migf->state = VIRTIOVF_MIGF_STATE_ERROR;
mutex_unlock(&migf->lock);
virtiovf_state_mutex_unlock(migf->virtvdev); return ret ? ret : done;
}
migf->virtvdev = virtvdev;
ret = virtiovf_pci_alloc_obj_id(virtvdev, VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET,
&obj_id); if (ret) goto out_clean;
migf->obj_id = obj_id; /* Mark as having a valid obj id which can be even 0 */
migf->has_obj_id = true;
ret = virtiovf_set_obj_cmd_header(migf->buf); if (ret) goto out_clean;
if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) {
ret = virtiovf_pci_save_device_final_data(virtvdev); return ret ? ERR_PTR(ret) : NULL;
}
/* * vfio_mig_get_next_state() does not use arcs other than the above
*/
WARN_ON(true); return ERR_PTR(-EINVAL);
}
mutex_lock(&virtvdev->state_mutex);
obj_id_exists = virtvdev->saving_migf && virtvdev->saving_migf->has_obj_id; if (!obj_id_exists) {
ret = virtiovf_pci_alloc_obj_id(virtvdev,
VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET,
&obj_id); if (ret) goto end;
} else {
obj_id = virtvdev->saving_migf->obj_id;
}
ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id,
VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
&res_size); if (!ret)
*stop_copy_length = res_size;
/* * We can't leave this obj_id alive if didn't exist before, otherwise, it might * stay alive, even without an active migration flow (e.g. migration was cancelled)
*/ if (!obj_id_exists)
virtiovf_pci_free_obj_id(virtvdev, obj_id);
end:
virtiovf_state_mutex_unlock(virtvdev); return ret;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.