staticunsignedint num_request_queues;
module_param(num_request_queues, uint, 0644);
MODULE_PARM_DESC(num_request_queues, "Limit the number of request queues to use for blk device. " "0 for no limit. " "Values > nr_cpu_ids truncated to nr_cpu_ids.");
staticunsignedint poll_queues;
module_param(poll_queues, uint, 0644);
MODULE_PARM_DESC(poll_queues, "The number of dedicated virtqueues for polling I/O");
struct virtio_blk { /* * This mutex must be held by anything that may run after * virtblk_remove() sets vblk->vdev to NULL. * * blk-mq, virtqueue processing, and sysfs attribute code paths are * shut down before vblk->vdev is set to NULL and therefore do not need * to hold this mutex.
*/ struct mutex vdev_mutex; struct virtio_device *vdev;
/* The disk structure for the kernel. */ struct gendisk *disk;
/* Process context for config space updates */ struct work_struct config_work;
/* Ida index - used to track minor number allocations. */ int index;
/* num of vqs */ int num_vqs; int io_queues[HCTX_MAX_TYPES]; struct virtio_blk_vq *vqs;
/* For zoned device */ unsignedint zone_sectors;
};
struct virtblk_req { /* Out header */ struct virtio_blk_outhdr out_hdr;
/* In header */ union {
u8 status;
/* * The zone append command has an extended in header. * The status field in zone_append_in_hdr must always * be the last byte.
*/ struct {
__virtio64 sector;
u8 status;
} zone_append;
} in_hdr;
if (unmap)
flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); if (!range) return -ENOMEM;
/* * Single max discard segment means multi-range discard isn't * supported, and block layer only runs contiguity merge like * normal RW request. So we can't reply on bio for retrieving * each range info.
*/ if (queue_max_discard_segments(req->q) == 1) {
range[0].flags = cpu_to_le32(flags);
range[0].num_sectors = cpu_to_le32(blk_rq_sectors(req));
range[0].sector = cpu_to_le64(blk_rq_pos(req));
n = 1;
} else {
__rq_for_each_bio(bio, req) {
u64 sector = bio->bi_iter.bi_sector;
u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) && op_is_zone_mgmt(req_op(req))) return BLK_STS_NOTSUPP;
/* Set fields for all request types */
vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req));
switch (req_op(req)) { case REQ_OP_READ:
type = VIRTIO_BLK_T_IN;
sector = blk_rq_pos(req); break; case REQ_OP_WRITE:
type = VIRTIO_BLK_T_OUT;
sector = blk_rq_pos(req); break; case REQ_OP_FLUSH:
type = VIRTIO_BLK_T_FLUSH; break; case REQ_OP_DISCARD:
type = VIRTIO_BLK_T_DISCARD; break; case REQ_OP_WRITE_ZEROES:
type = VIRTIO_BLK_T_WRITE_ZEROES;
unmap = !(req->cmd_flags & REQ_NOUNMAP); break; case REQ_OP_SECURE_ERASE:
type = VIRTIO_BLK_T_SECURE_ERASE; break; case REQ_OP_ZONE_OPEN:
type = VIRTIO_BLK_T_ZONE_OPEN;
sector = blk_rq_pos(req); break; case REQ_OP_ZONE_CLOSE:
type = VIRTIO_BLK_T_ZONE_CLOSE;
sector = blk_rq_pos(req); break; case REQ_OP_ZONE_FINISH:
type = VIRTIO_BLK_T_ZONE_FINISH;
sector = blk_rq_pos(req); break; case REQ_OP_ZONE_APPEND:
type = VIRTIO_BLK_T_ZONE_APPEND;
sector = blk_rq_pos(req);
in_hdr_len = sizeof(vbr->in_hdr.zone_append); break; case REQ_OP_ZONE_RESET:
type = VIRTIO_BLK_T_ZONE_RESET;
sector = blk_rq_pos(req); break; case REQ_OP_ZONE_RESET_ALL:
type = VIRTIO_BLK_T_ZONE_RESET_ALL; break; case REQ_OP_DRV_IN: /* * Out header has already been prepared by the caller (virtblk_get_id() * or virtblk_submit_zone_report()), nothing to do here.
*/ return 0; default:
WARN_ON_ONCE(1); return BLK_STS_IOERR;
}
/* Set fields for non-REQ_OP_DRV_IN request types */
vbr->in_hdr_len = in_hdr_len;
vbr->out_hdr.type = cpu_to_virtio32(vdev, type);
vbr->out_hdr.sector = cpu_to_virtio64(vdev, sector);
if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES ||
type == VIRTIO_BLK_T_SECURE_ERASE) { if (virtblk_setup_discard_write_zeroes_erase(req, unmap)) return BLK_STS_RESOURCE;
}
return 0;
}
/* * The status byte is always the last byte of the virtblk request * in-header. This helper fetches its value for all in-header formats * that are currently defined.
*/ staticinline u8 virtblk_vbr_status(struct virtblk_req *vbr)
{ return *((u8 *)&vbr->in_hdr + vbr->in_hdr_len - 1);
}
spin_lock_irqsave(&vblk->vqs[qid].lock, flags); do {
virtqueue_disable_cb(vq); while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { struct request *req = blk_mq_rq_from_pdu(vbr);
if (likely(!blk_should_fake_timeout(req->q)))
blk_mq_complete_request(req);
req_done = true;
}
} while (!virtqueue_enable_cb(vq));
/* In case queue is stopped waiting for more buffers. */ if (req_done)
blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
}
status = virtblk_prep_rq(hctx, vblk, req, vbr); if (unlikely(status)) return status;
spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
err = virtblk_add_req(vblk->vqs[qid].vq, vbr); if (err) {
virtqueue_kick(vblk->vqs[qid].vq); /* Don't stop the queue if -ENOMEM: we may have failed to * bounce the buffer due to global resource outage.
*/ if (err == -ENOSPC)
blk_mq_stop_hw_queue(hctx);
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
virtblk_unmap_data(req, vbr); return virtblk_fail_to_queue(req, err);
}
if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
notify = true;
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
if (notify)
virtqueue_notify(vblk->vqs[qid].vq); return BLK_STS_OK;
}
/* * virtio ZBD specification doesn't require zones to be a power of * two sectors in size, but the code in this driver expects that.
*/
virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors,
&vblk->zone_sectors); if (vblk->zone_sectors == 0 || !is_power_of_2(vblk->zone_sectors)) {
dev_err(&vdev->dev, "zoned device with non power of two zone size %u\n",
vblk->zone_sectors); return -ENODEV;
}
lim->chunk_sectors = vblk->zone_sectors;
dev_dbg(&vdev->dev, "zone sectors = %u\n", vblk->zone_sectors);
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
dev_warn(&vblk->vdev->dev, "ignoring negotiated F_DISCARD for zoned device\n");
lim->max_hw_discard_sectors = 0;
}
/* We provide getgeo only to please some old bootloader/partitioning tools */ staticint virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
{ struct virtio_blk *vblk = bd->bd_disk->private_data; int ret = 0;
mutex_lock(&vblk->vdev_mutex);
if (!vblk->vdev) {
ret = -ENXIO; goto out;
}
/* see if the host passed in geometry config */ if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) {
virtio_cread(vblk->vdev, struct virtio_blk_config,
geometry.cylinders, &geo->cylinders);
virtio_cread(vblk->vdev, struct virtio_blk_config,
geometry.heads, &geo->heads);
virtio_cread(vblk->vdev, struct virtio_blk_config,
geometry.sectors, &geo->sectors);
} else { /* some standard values, similar to sd */
geo->heads = 1 << 6;
geo->sectors = 1 << 5;
geo->cylinders = get_capacity(bd->bd_disk) >> 11;
}
out:
mutex_unlock(&vblk->vdev_mutex); return ret;
}
for (i = 0; i < num_vqs - num_poll_vqs; i++) {
vqs_info[i].callback = virtblk_done;
snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%u", i);
vqs_info[i].name = vblk->vqs[i].name;
}
for (; i < num_vqs; i++) {
snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req_poll.%u", i);
vqs_info[i].name = vblk->vqs[i].name;
}
/* Discover virtqueues and write information to configuration. */
err = virtio_find_vqs(vdev, num_vqs, vqs, vqs_info, &desc); if (err) goto out;
for (i = 0; i < num_vqs; i++) {
spin_lock_init(&vblk->vqs[i].lock);
vblk->vqs[i].vq = vqs[i];
}
vblk->num_vqs = num_vqs;
out:
kfree(vqs);
kfree(vqs_info); if (err)
kfree(vblk->vqs); return err;
}
/* * Legacy naming scheme used for virtio devices. We are stuck with it for * virtio blk but don't ever use it for any new driver.
*/ staticint virtblk_name_format(char *prefix, int index, char *buf, int buflen)
{ constint base = 'z' - 'a' + 1; char *begin = buf + strlen(prefix); char *end = buf + buflen; char *p; int unit;
p = end - 1;
*p = '\0';
unit = base; do { if (p == begin) return -EINVAL;
*--p = 'a' + (index % unit);
index = (index / unit) - 1;
} while (index >= 0);
memmove(begin, p, end - p);
memcpy(buf, prefix, strlen(prefix));
return 0;
}
staticint virtblk_get_cache_mode(struct virtio_device *vdev)
{
u8 writeback; int err;
/* * If WCE is not configurable and flush is not available, * assume no writeback cache is in use.
*/ if (err)
writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH);
/* * Regular queues have interrupts and hence CPU affinity is * defined by the core virtio code, but polling queues have * no interrupts so we let the block layer assign CPU affinity.
*/ if (i == HCTX_TYPE_POLL)
blk_mq_map_queues(&set->map[i]); else
blk_mq_map_hw_queues(&set->map[i],
&vblk->vdev->dev, 0);
}
}
/* We need to know how many segments before we allocate. */
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX, struct virtio_blk_config, seg_max,
&sg_elems);
/* We need at least one SG element, whatever they say. */ if (err || !sg_elems)
sg_elems = 1;
/* Prevent integer overflows and honor max vq size */
sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2);
/* We can handle whatever the host told us to handle. */
lim->max_segments = sg_elems;
/* No real sector limit. */
lim->max_hw_sectors = UINT_MAX;
/* Host can optionally specify maximum segment size and number of
* segments. */
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX, struct virtio_blk_config, size_max, &v); if (!err)
max_size = min(max_size, v);
lim->max_segment_size = max_size;
/* Host can optionally specify the block size of the device */
virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, struct virtio_blk_config, blk_size,
&lim->logical_block_size);
/* Use topology information if available */
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, physical_block_exp,
&physical_block_exp); if (!err && physical_block_exp)
lim->physical_block_size =
lim->logical_block_size * (1 << physical_block_exp);
if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
virtio_cread(vdev, struct virtio_blk_config,
max_write_zeroes_sectors, &v);
lim->max_write_zeroes_sectors = v ? v : UINT_MAX;
}
/* The discard and secure erase limits are combined since the Linux * block layer uses the same limit for both commands. * * If both VIRTIO_BLK_F_SECURE_ERASE and VIRTIO_BLK_F_DISCARD features * are negotiated, we will use the minimum between the limits. * * discard sector alignment is set to the minimum between discard_sector_alignment * and secure_erase_sector_alignment. * * max discard sectors is set to the minimum between max_discard_seg and * max_secure_erase_seg.
*/ if (virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) {
/* secure_erase_sector_alignment should not be zero, the device should set a * valid number of sectors.
*/ if (!v) {
dev_err(&vdev->dev, "virtio_blk: secure_erase_sector_alignment can't be 0\n"); return -EINVAL;
}
/* max_secure_erase_sectors should not be zero, the device should set a * valid number of sectors.
*/ if (!v) {
dev_err(&vdev->dev, "virtio_blk: max_secure_erase_sectors can't be 0\n"); return -EINVAL;
}
/* max_secure_erase_seg should not be zero, the device should set a * valid number of segments
*/ if (!v) {
dev_err(&vdev->dev, "virtio_blk: max_secure_erase_seg can't be 0\n"); return -EINVAL;
}
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD) ||
virtio_has_feature(vdev, VIRTIO_BLK_F_SECURE_ERASE)) { /* max_discard_seg and discard_granularity will be 0 only * if max_discard_seg and discard_sector_alignment fields in the virtio * config are 0 and VIRTIO_BLK_F_SECURE_ERASE feature is not negotiated. * In this case, we use default values.
*/ if (!max_discard_segs)
max_discard_segs = sg_elems;
/* Default queue sizing is to fill the ring. */ if (!virtblk_queue_depth) {
queue_depth = vblk->vqs[0].vq->num_free; /* ... but without indirect descs, we use 2 descs per req */ if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
queue_depth /= 2;
} else {
queue_depth = virtblk_queue_depth;
}
/* * All steps that follow use the VQs therefore they need to be * placed after the virtio_device_ready() call above.
*/ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
(lim.features & BLK_FEAT_ZONED)) {
err = blk_revalidate_disk_zones(vblk->disk); if (err) goto out_cleanup_disk;
}
err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); if (err) goto out_cleanup_disk;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.