// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2025, Christoph Hellwig. * Copyright (c) 2025, Western Digital Corporation or its affiliates. * * Zoned Loop Device driver - exports a zoned block device using one file per * zone as backing storage.
*/ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
ret = vfs_getattr(&zone->file->f_path, &stat, STATX_SIZE, 0); if (ret < 0) {
pr_err("Failed to get zone %u file stat (err=%d)\n",
zone_no, ret);
set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); return ret;
}
file_sectors = stat.size >> SECTOR_SHIFT; if (file_sectors > zlo->zone_capacity) {
pr_err("Zone %u file too large (%llu sectors > %llu)\n",
zone_no, file_sectors, zlo->zone_capacity); return -EINVAL;
}
if (file_sectors & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
pr_err("Zone %u file size not aligned to block size %u\n",
zone_no, zlo->block_size); return -EINVAL;
}
staticint zloop_open_zone(struct zloop_device *zlo, unsignedint zone_no)
{ struct zloop_zone *zone = &zlo->zones[zone_no]; int ret = 0;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) return -EIO;
mutex_lock(&zone->lock);
if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
ret = zloop_update_seq_zone(zlo, zone_no); if (ret) goto unlock;
}
switch (zone->cond) { case BLK_ZONE_COND_EXP_OPEN: break; case BLK_ZONE_COND_EMPTY: case BLK_ZONE_COND_CLOSED: case BLK_ZONE_COND_IMP_OPEN:
zone->cond = BLK_ZONE_COND_EXP_OPEN; break; case BLK_ZONE_COND_FULL: default:
ret = -EIO; break;
}
unlock:
mutex_unlock(&zone->lock);
return ret;
}
staticint zloop_close_zone(struct zloop_device *zlo, unsignedint zone_no)
{ struct zloop_zone *zone = &zlo->zones[zone_no]; int ret = 0;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) return -EIO;
mutex_lock(&zone->lock);
if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
ret = zloop_update_seq_zone(zlo, zone_no); if (ret) goto unlock;
}
switch (zone->cond) { case BLK_ZONE_COND_CLOSED: break; case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_EXP_OPEN: if (zone->wp == zone->start)
zone->cond = BLK_ZONE_COND_EMPTY; else
zone->cond = BLK_ZONE_COND_CLOSED; break; case BLK_ZONE_COND_EMPTY: case BLK_ZONE_COND_FULL: default:
ret = -EIO; break;
}
unlock:
mutex_unlock(&zone->lock);
return ret;
}
staticint zloop_reset_zone(struct zloop_device *zlo, unsignedint zone_no)
{ struct zloop_zone *zone = &zlo->zones[zone_no]; int ret = 0;
if (test_bit(ZLOOP_ZONE_CONV, &zone->flags)) return -EIO;
mutex_lock(&zone->lock);
if (!test_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags) &&
zone->cond == BLK_ZONE_COND_EMPTY) goto unlock;
if (vfs_truncate(&zone->file->f_path, 0)) {
set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags);
ret = -EIO; goto unlock;
}
/* We should never get an I/O beyond the device capacity. */ if (WARN_ON_ONCE(zone_no >= zlo->nr_zones)) {
ret = -EIO; goto out;
}
zone = &zlo->zones[zone_no];
zone_end = zone->start + zlo->zone_capacity;
/* * The block layer should never send requests that are not fully * contained within the zone.
*/ if (WARN_ON_ONCE(sector + nr_sectors > zone->start + zlo->zone_size)) {
ret = -EIO; goto out;
}
if (test_and_clear_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags)) {
mutex_lock(&zone->lock);
ret = zloop_update_seq_zone(zlo, zone_no);
mutex_unlock(&zone->lock); if (ret) goto out;
}
if (!test_bit(ZLOOP_ZONE_CONV, &zone->flags) && is_write) {
mutex_lock(&zone->lock);
if (is_append) {
sector = zone->wp;
cmd->sector = sector;
}
/* * Write operations must be aligned to the write pointer and * fully contained within the zone capacity.
*/ if (sector != zone->wp || zone->wp + nr_sectors > zone_end) {
pr_err("Zone %u: unaligned write: sect %llu, wp %llu\n",
zone_no, sector, zone->wp);
ret = -EIO; goto unlock;
}
/* Implicitly open the target zone. */ if (zone->cond == BLK_ZONE_COND_CLOSED ||
zone->cond == BLK_ZONE_COND_EMPTY)
zone->cond = BLK_ZONE_COND_IMP_OPEN;
/* * Advance the write pointer of sequential zones. If the write * fails, the wp position will be corrected when the next I/O * copmpletes.
*/
zone->wp += nr_sectors; if (zone->wp == zone_end)
zone->cond = BLK_ZONE_COND_FULL;
}
rq_for_each_bvec(tmp, rq, rq_iter)
nr_bvec++;
if (rq->bio != rq->biotail) { struct bio_vec *bvec;
cmd->bvec = kmalloc_array(nr_bvec, sizeof(*cmd->bvec), GFP_NOIO); if (!cmd->bvec) {
ret = -EIO; goto unlock;
}
/* * The bios of the request may be started from the middle of * the 'bvec' because of bio splitting, so we can't directly * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec * API will take care of all details for us.
*/
bvec = cmd->bvec;
rq_for_each_bvec(tmp, rq, rq_iter) {
*bvec = tmp;
bvec++;
}
iov_iter_bvec(&iter, rw, cmd->bvec, nr_bvec, blk_rq_bytes(rq));
} else { /* * Same here, this bio may be started from the middle of the * 'bvec' because of bio splitting, so offset from the bvec * must be passed to iov iterator
*/
iov_iter_bvec(&iter, rw,
__bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter),
nr_bvec, blk_rq_bytes(rq));
iter.iov_offset = rq->bio->bi_iter.bi_bvec_done;
}
if (cmd->ret < 0 && !test_bit(ZLOOP_ZONE_CONV, &zone->flags)) { /* * A write to a sequential zone file failed: mark the * zone as having an error. This will be corrected and * cleared when the next IO is submitted.
*/
set_bit(ZLOOP_ZONE_SEQ_ERROR, &zone->flags); break;
} if (req_op(rq) == REQ_OP_ZONE_APPEND)
rq->__sector = cmd->sector;
break; default: break;
}
if (cmd->ret < 0)
sts = errno_to_blk_status(cmd->ret);
blk_mq_end_request(rq, sts);
}
/* * If the FS block size is lower than or equal to 4K, use that as the * device block size. Otherwise, fallback to the FS direct IO alignment * constraint if that is provided, and to the FS underlying device * physical block size if the direct IO alignment is unknown.
*/ if (file_inode(zone->file)->i_sb->s_blocksize <= SZ_4K)
zlo->block_size = file_inode(zone->file)->i_sb->s_blocksize; elseif (!vfs_getattr(&zone->file->f_path, &st, STATX_DIOALIGN, 0) &&
(st.result_mask & STATX_DIOALIGN))
zlo->block_size = st.dio_offset_align; elseif (sb_bdev)
zlo->block_size = bdev_physical_block_size(sb_bdev); else
zlo->block_size = SECTOR_SIZE;
if (zlo->zone_capacity & ((zlo->block_size >> SECTOR_SHIFT) - 1)) {
pr_err("Zone capacity is not aligned to block size %u\n",
zlo->block_size); return -EINVAL;
}
nr_zones = opts->capacity >> ilog2(opts->zone_size); if (opts->nr_conv_zones >= nr_zones) {
pr_err("Invalid number of conventional zones %u\n",
opts->nr_conv_zones); goto out;
}
zlo = kvzalloc(struct_size(zlo, zones, nr_zones), GFP_KERNEL); if (!zlo) {
ret = -ENOMEM; goto out;
}
zlo->state = Zlo_creating;
ret = mutex_lock_killable(&zloop_ctl_mutex); if (ret) goto out_free_dev;
/* Allocate id, if @opts->id >= 0, we're requesting that specific id */ if (opts->id >= 0) {
ret = idr_alloc(&zloop_index_idr, zlo,
opts->id, opts->id + 1, GFP_KERNEL); if (ret == -ENOSPC)
ret = -EEXIST;
} else {
ret = idr_alloc(&zloop_index_idr, zlo, 0, 0, GFP_KERNEL);
}
mutex_unlock(&zloop_ctl_mutex); if (ret < 0) goto out_free_dev;
zlo->workqueue = alloc_workqueue("zloop%d", WQ_UNBOUND | WQ_FREEZABLE,
opts->nr_queues * opts->queue_depth, zlo->id); if (!zlo->workqueue) {
ret = -ENOMEM; goto out_free_idr;
}
if (opts->base_dir)
zlo->base_dir = kstrdup(opts->base_dir, GFP_KERNEL); else
zlo->base_dir = kstrdup(ZLOOP_DEF_BASE_DIR, GFP_KERNEL); if (!zlo->base_dir) {
ret = -ENOMEM; goto out_destroy_workqueue;
}
zlo->data_dir = zloop_filp_open_fmt(O_RDONLY | O_DIRECTORY, 0, "%s/%u",
zlo->base_dir, zlo->id); if (IS_ERR(zlo->data_dir)) {
ret = PTR_ERR(zlo->data_dir);
pr_warn("Failed to open directory %s/%u (err=%d)\n",
zlo->base_dir, zlo->id, ret); goto out_free_base_dir;
}
/* * If we already have zone files, we are restoring a device created by a * previous add operation. In this case, zloop_init_zone() will check * that the zone files are consistent with the zone configuration given.
*/
restore = zloop_dev_exists(zlo); for (i = 0; i < nr_zones; i++) {
ret = zloop_init_zone(zlo, opts, i, restore); if (ret) goto out_close_files;
}
buf = memdup_user_nul(ubuf, count); if (IS_ERR(buf)) return PTR_ERR(buf);
for (i = 0; i < ARRAY_SIZE(zloop_ctl_ops); i++) {
op = &zloop_ctl_ops[i]; if (!op->name) {
pr_err("Invalid operation\n");
ret = -EINVAL; goto out;
} if (!strncmp(buf, op->name, strlen(op->name))) break;
}
ret = zloop_parse_options(&opts, opts_buf); if (ret) {
pr_err("Failed to parse options\n"); goto out;
}
switch (op->code) { case ZLOOP_CTL_ADD:
ret = zloop_ctl_add(&opts); break; case ZLOOP_CTL_REMOVE:
ret = zloop_ctl_remove(&opts); break; default:
pr_err("Invalid operation\n");
ret = -EINVAL; goto out;
}
out:
kfree(opts.base_dir);
kfree(buf); return ret ? ret : count;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.