/* * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved. * Copyright (C) 2008 - 2011 Bart Van Assche <bvanassche@acm.org>. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. *
*/
staticint srpt_set_u64_x(constchar *buffer, conststruct kernel_param *kp)
{ return kstrtou64(buffer, 16, (u64 *)kp->arg);
} staticint srpt_get_u64_x(char *buffer, conststruct kernel_param *kp)
{ return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg);
}
module_param_call(srpt_service_guid, srpt_set_u64_x, srpt_get_u64_x,
&srpt_service_guid, 0444);
MODULE_PARM_DESC(srpt_service_guid, "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA.");
staticstruct ib_client srpt_client; /* Protects both rdma_cm_port and rdma_cm_id. */ static DEFINE_MUTEX(rdma_cm_mutex); /* Port number RDMA/CM will bind to. */ static u16 rdma_cm_port; staticstruct rdma_cm_id *rdma_cm_id; staticvoid srpt_release_cmd(struct se_cmd *se_cmd); staticvoid srpt_free_ch(struct kref *kref); staticint srpt_queue_status(struct se_cmd *cmd); staticvoid srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); staticvoid srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); staticvoid srpt_process_wait_list(struct srpt_rdma_ch *ch);
/* Type of the entries in srpt_memory_caches. */ struct srpt_memory_cache_entry {
refcount_t ref; struct kmem_cache *c;
};
guard(mutex)(&srpt_mc_mutex);
xa_for_each(&srpt_memory_caches, object_size, e) if (e->c == c) break; if (WARN_ON_ONCE(!e)) return; if (!refcount_dec_and_test(&e->ref)) return;
WARN_ON_ONCE(xa_erase(&srpt_memory_caches, object_size) != e);
kmem_cache_destroy(e->c);
kfree(e);
}
/* * The only allowed channel state changes are those that change the channel * state into a state with a higher numerical value. Hence the new > prev test.
*/ staticbool srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new)
{ unsignedlong flags; enum rdma_ch_state prev; bool changed = false;
spin_lock_irqsave(&ch->spinlock, flags);
prev = ch->state; if (new > prev) {
ch->state = new;
changed = true;
}
spin_unlock_irqrestore(&ch->spinlock, flags);
return changed;
}
/** * srpt_event_handler - asynchronous IB event callback function * @handler: IB event handler registered by ib_register_event_handler(). * @event: Description of the event that occurred. * * Callback function called by the InfiniBand core when an asynchronous IB * event occurs. This callback may occur in interrupt context. See also * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand * Architecture Specification.
*/ staticvoid srpt_event_handler(struct ib_event_handler *handler, struct ib_event *event)
{ struct srpt_device *sdev =
container_of(handler, struct srpt_device, event_handler); struct srpt_port *sport;
u8 port_num;
pr_debug("ASYNC event= %d on device= %s\n", event->event,
dev_name(&sdev->device->dev));
switch (event->event) { case IB_EVENT_PORT_ERR:
port_num = event->element.port_num - 1; if (port_num < sdev->device->phys_port_cnt) {
sport = &sdev->port[port_num];
sport->lid = 0;
sport->sm_lid = 0;
} else {
WARN(true, "event %d: port_num %d out of range 1..%d\n",
event->event, port_num + 1,
sdev->device->phys_port_cnt);
} break; case IB_EVENT_PORT_ACTIVE: case IB_EVENT_LID_CHANGE: case IB_EVENT_PKEY_CHANGE: case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: case IB_EVENT_GID_CHANGE: /* Refresh port data asynchronously. */
port_num = event->element.port_num - 1; if (port_num < sdev->device->phys_port_cnt) {
sport = &sdev->port[port_num]; if (!sport->lid && !sport->sm_lid)
schedule_work(&sport->work);
} else {
WARN(true, "event %d: port_num %d out of range 1..%d\n",
event->event, port_num + 1,
sdev->device->phys_port_cnt);
} break; default:
pr_err("received unrecognized IB event %d\n", event->event); break;
}
}
/** * srpt_srq_event - SRQ event callback function * @event: Description of the event that occurred. * @ctx: Context pointer specified at SRQ creation time.
*/ staticvoid srpt_srq_event(struct ib_event *event, void *ctx)
{
pr_debug("SRQ event %d\n", event->event);
}
staticconstchar *get_ch_state_name(enum rdma_ch_state s)
{ switch (s) { case CH_CONNECTING: return"connecting"; case CH_LIVE: return"live"; case CH_DISCONNECTING: return"disconnecting"; case CH_DRAINING: return"draining"; case CH_DISCONNECTED: return"disconnected";
} return"???";
}
/** * srpt_qp_event - QP event callback function * @event: Description of the event that occurred. * @ptr: SRPT RDMA channel.
*/ staticvoid srpt_qp_event(struct ib_event *event, void *ptr)
{ struct srpt_rdma_ch *ch = ptr;
switch (event->event) { case IB_EVENT_COMM_EST: if (ch->using_rdma_cm)
rdma_notify(ch->rdma_cm.cm_id, event->event); else
ib_cm_notify(ch->ib_cm.cm_id, event->event); break; case IB_EVENT_QP_LAST_WQE_REACHED:
pr_debug("%s-%d, state %s: received Last WQE event.\n",
ch->sess_name, ch->qp->qp_num,
get_ch_state_name(ch->state)); break; default:
pr_err("received unrecognized IB QP event %d\n", event->event); break;
}
}
/** * srpt_set_ioc - initialize a IOUnitInfo structure * @c_list: controller list. * @slot: one-based slot number. * @value: four-bit value. * * Copies the lowest four bits of value in element slot of the array of four * bit elements called c_list (controller list). The index slot is one-based.
*/ staticvoid srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
{
u16 id;
u8 tmp;
/** * srpt_get_class_port_info - copy ClassPortInfo to a management datagram * @mad: Datagram that will be sent as response to DM_ATTR_CLASS_PORT_INFO. * * See also section 16.3.3.1 ClassPortInfo in the InfiniBand Architecture * Specification.
*/ staticvoid srpt_get_class_port_info(struct ib_dm_mad *mad)
{ struct ib_class_port_info *cif;
/** * srpt_get_iou - write IOUnitInfo to a management datagram * @mad: Datagram that will be sent as response to DM_ATTR_IOU_INFO. * * See also section 16.3.3.3 IOUnitInfo in the InfiniBand Architecture * Specification. See also section B.7, table B.6 in the SRP r16a document.
*/ staticvoid srpt_get_iou(struct ib_dm_mad *mad)
{ struct ib_dm_iou_info *ioui;
u8 slot; int i;
/* set present for slot 1 and empty for the rest */
srpt_set_ioc(ioui->controller_list, 1, 1); for (i = 1, slot = 2; i < 16; i++, slot++)
srpt_set_ioc(ioui->controller_list, slot, 0);
mad->mad_hdr.status = 0;
}
/** * srpt_get_ioc - write IOControllerprofile to a management datagram * @sport: HCA port through which the MAD has been received. * @slot: Slot number specified in DM_ATTR_IOC_PROFILE query. * @mad: Datagram that will be sent as response to DM_ATTR_IOC_PROFILE. * * See also section 16.3.3.4 IOControllerProfile in the InfiniBand * Architecture Specification. See also section B.7, table B.7 in the SRP * r16a document.
*/ staticvoid srpt_get_ioc(struct srpt_port *sport, u32 slot, struct ib_dm_mad *mad)
{ struct srpt_device *sdev = sport->sdev; struct ib_dm_ioc_profile *iocp; int send_queue_depth;
/** * srpt_get_svc_entries - write ServiceEntries to a management datagram * @ioc_guid: I/O controller GUID to use in reply. * @slot: I/O controller number. * @hi: End of the range of service entries to be specified in the reply. * @lo: Start of the range of service entries to be specified in the reply.. * @mad: Datagram that will be sent as response to DM_ATTR_SVC_ENTRIES. * * See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture * Specification. See also section B.7, table B.8 in the SRP r16a document.
*/ staticvoid srpt_get_svc_entries(u64 ioc_guid,
u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
{ struct ib_dm_svc_entries *svc_entries;
/** * srpt_mgmt_method_get - process a received management datagram * @sp: HCA port through which the MAD has been received. * @rq_mad: received MAD. * @rsp_mad: response MAD.
*/ staticvoid srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad, struct ib_dm_mad *rsp_mad)
{
u16 attr_id;
u32 slot;
u8 hi, lo;
/** * srpt_refresh_port - configure a HCA port * @sport: SRPT HCA port. * * Enable InfiniBand management datagram processing, update the cached sm_lid, * lid and gid values, and register a callback function for processing MADs * on the specified port. * * Note: It is safe to call this function more than once for the same port.
*/ staticint srpt_refresh_port(struct srpt_port *sport)
{ struct ib_mad_agent *mad_agent; struct ib_mad_reg_req reg_req; struct ib_port_modify port_modify; struct ib_port_attr port_attr; int ret;
ret = ib_query_port(sport->sdev->device, sport->port, &port_attr); if (ret) return ret;
ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); if (ret) {
pr_warn("%s-%d: enabling device management failed (%d). Note: this is expected if SR-IOV is enabled.\n",
dev_name(&sport->sdev->device->dev), sport->port, ret); return 0;
}
mad_agent = ib_register_mad_agent(sport->sdev->device,
sport->port,
IB_QPT_GSI,
®_req, 0,
srpt_mad_send_handler,
srpt_mad_recv_handler,
sport, 0); if (IS_ERR(mad_agent)) {
pr_err("%s-%d: MAD agent registration failed (%ld). Note: this is expected if SR-IOV is enabled.\n",
dev_name(&sport->sdev->device->dev), sport->port,
PTR_ERR(mad_agent));
sport->mad_agent = NULL;
memset(&port_modify, 0, sizeof(port_modify));
port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
ib_modify_port(sport->sdev->device, sport->port, 0,
&port_modify); return 0;
}
sport->mad_agent = mad_agent;
}
return 0;
}
/** * srpt_unregister_mad_agent - unregister MAD callback functions * @sdev: SRPT HCA pointer. * @port_cnt: number of ports with registered MAD * * Note: It is safe to call this function more than once for the same device.
*/ staticvoid srpt_unregister_mad_agent(struct srpt_device *sdev, int port_cnt)
{ struct ib_port_modify port_modify = {
.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
}; struct srpt_port *sport; int i;
for (i = 1; i <= port_cnt; i++) {
sport = &sdev->port[i - 1];
WARN_ON(sport->port != i); if (sport->mad_agent) {
ib_modify_port(sdev->device, i, 0, &port_modify);
ib_unregister_mad_agent(sport->mad_agent);
sport->mad_agent = NULL;
}
}
}
/** * srpt_alloc_ioctx_ring - allocate a ring of SRPT I/O context structures * @sdev: Device to allocate the I/O context ring for. * @ring_size: Number of elements in the I/O context ring. * @ioctx_size: I/O context size. * @buf_cache: I/O buffer cache. * @alignment_offset: Offset in each ring buffer at which the SRP information * unit starts. * @dir: DMA data direction.
*/ staticstruct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, int ring_size, int ioctx_size, struct kmem_cache *buf_cache, int alignment_offset, enum dma_data_direction dir)
{ struct srpt_ioctx **ring; int i;
ring = kvmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL); if (!ring) goto out; for (i = 0; i < ring_size; ++i) {
ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, buf_cache, dir); if (!ring[i]) goto err;
ring[i]->index = i;
ring[i]->offset = alignment_offset;
} goto out;
err: while (--i >= 0)
srpt_free_ioctx(sdev, ring[i], buf_cache, dir);
kvfree(ring);
ring = NULL;
out: return ring;
}
/** * srpt_free_ioctx_ring - free the ring of SRPT I/O context structures * @ioctx_ring: I/O context ring to be freed. * @sdev: SRPT HCA pointer. * @ring_size: Number of ring elements. * @buf_cache: I/O buffer cache. * @dir: DMA data direction.
*/ staticvoid srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, struct srpt_device *sdev, int ring_size, struct kmem_cache *buf_cache, enum dma_data_direction dir)
{ int i;
if (!ioctx_ring) return;
for (i = 0; i < ring_size; ++i)
srpt_free_ioctx(sdev, ioctx_ring[i], buf_cache, dir);
kvfree(ioctx_ring);
}
/** * srpt_set_cmd_state - set the state of a SCSI command * @ioctx: Send I/O context. * @new: New I/O context state. * * Does not modify the state of aborted commands. Returns the previous command * state.
*/ staticenum srpt_command_state srpt_set_cmd_state(struct srpt_send_ioctx *ioctx, enum srpt_command_state new)
{ enum srpt_command_state previous;
previous = ioctx->state; if (previous != SRPT_STATE_DONE)
ioctx->state = new;
return previous;
}
/** * srpt_test_and_set_cmd_state - test and set the state of a command * @ioctx: Send I/O context. * @old: Current I/O context state. * @new: New I/O context state. * * Returns true if and only if the previous command state was equal to 'old'.
*/ staticbool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx, enum srpt_command_state old, enum srpt_command_state new)
{ enum srpt_command_state previous;
/** * srpt_zerolength_write - perform a zero-length RDMA write * @ch: SRPT RDMA channel. * * A quote from the InfiniBand specification: C9-88: For an HCA responder * using Reliable Connection service, for each zero-length RDMA READ or WRITE * request, the R_Key shall not be validated, even if the request includes * Immediate data.
*/ staticint srpt_zerolength_write(struct srpt_rdma_ch *ch)
{ struct ib_rdma_wr wr = {
.wr = {
.next = NULL,
{ .wr_cqe = &ch->zw_cqe, },
.opcode = IB_WR_RDMA_WRITE,
.send_flags = IB_SEND_SIGNALED,
}
};
if (ioctx->rw_ctxs != &ioctx->s_rw_ctx)
kfree(ioctx->rw_ctxs);
}
staticinlinevoid *srpt_get_desc_buf(struct srp_cmd *srp_cmd)
{ /* * The pointer computations below will only be compiled correctly * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check * whether srp_cmd::add_data has been declared as a byte pointer.
*/
BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0) &&
!__same_type(srp_cmd->add_data[0], (u8)0));
/* * According to the SRP spec, the lower two bits of the 'ADDITIONAL * CDB LENGTH' field are reserved and the size in bytes of this field * is four times the value specified in bits 3..7. Hence the "& ~3".
*/ return srp_cmd->add_data + (srp_cmd->add_cdb_len & ~3);
}
/** * srpt_get_desc_tbl - parse the data descriptors of a SRP_CMD request * @recv_ioctx: I/O context associated with the received command @srp_cmd. * @ioctx: I/O context that will be used for responding to the initiator. * @srp_cmd: Pointer to the SRP_CMD request data. * @dir: Pointer to the variable to which the transfer direction will be * written. * @sg: [out] scatterlist for the parsed SRP_CMD. * @sg_cnt: [out] length of @sg. * @data_len: Pointer to the variable to which the total data length of all * descriptors in the SRP_CMD request will be written. * @imm_data_offset: [in] Offset in SRP_CMD requests at which immediate data * starts. * * This function initializes ioctx->nrbuf and ioctx->r_bufs. * * Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors; * -ENOMEM when memory allocation fails and zero upon success.
*/ staticint srpt_get_desc_tbl(struct srpt_recv_ioctx *recv_ioctx, struct srpt_send_ioctx *ioctx, struct srp_cmd *srp_cmd, enum dma_data_direction *dir, struct scatterlist **sg, unsignedint *sg_cnt, u64 *data_len,
u16 imm_data_offset)
{
BUG_ON(!dir);
BUG_ON(!data_len);
/* * The lower four bits of the buffer format field contain the DATA-IN * buffer descriptor format, and the highest four bits contain the * DATA-OUT buffer descriptor format.
*/ if (srp_cmd->buf_fmt & 0xf) /* DATA-IN: transfer data from target to initiator (read). */
*dir = DMA_FROM_DEVICE; elseif (srp_cmd->buf_fmt >> 4) /* DATA-OUT: transfer data from initiator to target (write). */
*dir = DMA_TO_DEVICE; else
*dir = DMA_NONE;
/* initialize data_direction early as srpt_alloc_rw_ctxs needs it */
ioctx->cmd.data_direction = *dir;
/** * srpt_ch_qp_rtr - change the state of a channel to 'ready to receive' (RTR) * @ch: channel of the queue pair. * @qp: queue pair to change the state of. * * Returns zero upon success and a negative value upon failure. * * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system. * If this structure ever becomes larger, it might be necessary to allocate * it dynamically instead of on the stack.
*/ staticint srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
{ struct ib_qp_attr qp_attr; int attr_mask; int ret;
WARN_ON_ONCE(ch->using_rdma_cm);
qp_attr.qp_state = IB_QPS_RTR;
ret = ib_cm_init_qp_attr(ch->ib_cm.cm_id, &qp_attr, &attr_mask); if (ret) goto out;
qp_attr.max_dest_rd_atomic = 4;
ret = ib_modify_qp(qp, &qp_attr, attr_mask);
out: return ret;
}
/** * srpt_ch_qp_rts - change the state of a channel to 'ready to send' (RTS) * @ch: channel of the queue pair. * @qp: queue pair to change the state of. * * Returns zero upon success and a negative value upon failure. * * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system. * If this structure ever becomes larger, it might be necessary to allocate * it dynamically instead of on the stack.
*/ staticint srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
{ struct ib_qp_attr qp_attr; int attr_mask; int ret;
qp_attr.qp_state = IB_QPS_RTS;
ret = ib_cm_init_qp_attr(ch->ib_cm.cm_id, &qp_attr, &attr_mask); if (ret) goto out;
qp_attr.max_rd_atomic = 4;
ret = ib_modify_qp(qp, &qp_attr, attr_mask);
out: return ret;
}
/** * srpt_ch_qp_err - set the channel queue pair state to 'error' * @ch: SRPT RDMA channel.
*/ staticint srpt_ch_qp_err(struct srpt_rdma_ch *ch)
{ struct ib_qp_attr qp_attr;
/** * srpt_get_send_ioctx - obtain an I/O context for sending to the initiator * @ch: SRPT RDMA channel.
*/ staticstruct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
{ struct srpt_send_ioctx *ioctx; int tag, cpu;
BUG_ON(!ch);
tag = sbitmap_queue_get(&ch->sess->sess_tag_pool, &cpu); if (tag < 0) return NULL;
ioctx = ch->ioctx_ring[tag];
BUG_ON(ioctx->ch != ch);
ioctx->state = SRPT_STATE_NEW;
WARN_ON_ONCE(ioctx->recv_ioctx);
ioctx->n_rdma = 0;
ioctx->n_rw_ctx = 0;
ioctx->queue_status_only = false; /* * transport_init_se_cmd() does not initialize all fields, so do it * here.
*/
memset(&ioctx->cmd, 0, sizeof(ioctx->cmd));
memset(&ioctx->sense_data, 0, sizeof(ioctx->sense_data));
ioctx->cmd.map_tag = tag;
ioctx->cmd.map_cpu = cpu;
return ioctx;
}
/** * srpt_abort_cmd - abort a SCSI command * @ioctx: I/O context associated with the SCSI command.
*/ staticint srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
{ enum srpt_command_state state;
BUG_ON(!ioctx);
/* * If the command is in a state where the target core is waiting for * the ib_srpt driver, change the state to the next state.
*/
state = ioctx->state; switch (state) { case SRPT_STATE_NEED_DATA:
ioctx->state = SRPT_STATE_DATA_IN; break; case SRPT_STATE_CMD_RSP_SENT: case SRPT_STATE_MGMT_RSP_SENT:
ioctx->state = SRPT_STATE_DONE; break; default:
WARN_ONCE(true, "%s: unexpected I/O context state %d\n",
__func__, state); break;
}
pr_debug("Aborting cmd with state %d -> %d and tag %lld\n", state,
ioctx->state, ioctx->cmd.tag);
switch (state) { case SRPT_STATE_NEW: case SRPT_STATE_DATA_IN: case SRPT_STATE_MGMT: case SRPT_STATE_DONE: /* * Do nothing - defer abort processing until * srpt_queue_response() is invoked.
*/ break; case SRPT_STATE_NEED_DATA:
pr_debug("tag %#llx: RDMA read error\n", ioctx->cmd.tag);
transport_generic_request_failure(&ioctx->cmd,
TCM_CHECK_CONDITION_ABORT_CMD); break; case SRPT_STATE_CMD_RSP_SENT: /* * SRP_RSP sending failed or the SRP_RSP send completion has * not been received in time.
*/
transport_generic_free_cmd(&ioctx->cmd, 0); break; case SRPT_STATE_MGMT_RSP_SENT:
transport_generic_free_cmd(&ioctx->cmd, 0); break; default:
WARN(1, "Unexpected command state (%d)", state); break;
}
return state;
}
/** * srpt_rdma_read_done - RDMA read completion callback * @cq: Completion queue. * @wc: Work completion. * * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping * the data that has been transferred via IB RDMA had to be postponed until the * check_stop_free() callback. None of this is necessary anymore and needs to * be cleaned up.
*/ staticvoid srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
{ struct srpt_rdma_ch *ch = wc->qp->qp_context; struct srpt_send_ioctx *ioctx =
container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
if (unlikely(wc->status != IB_WC_SUCCESS)) {
pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
ioctx, wc->status);
srpt_abort_cmd(ioctx); return;
}
if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
SRPT_STATE_DATA_IN))
target_execute_cmd(&ioctx->cmd); else
pr_err("%s[%d]: wrong state = %d\n", __func__,
__LINE__, ioctx->state);
}
/** * srpt_build_cmd_rsp - build a SRP_RSP response * @ch: RDMA channel through which the request has been received. * @ioctx: I/O context associated with the SRP_CMD request. The response will * be built in the buffer ioctx->buf points at and hence this function will * overwrite the request data. * @tag: tag of the request for which this response is being generated. * @status: value for the STATUS field of the SRP_RSP information unit. * * Returns the size in bytes of the SRP_RSP response. * * An SRP_RSP response contains a SCSI status or service response. See also * section 6.9 in the SRP r16a document for the format of an SRP_RSP * response. See also SPC-2 for more information about sense data.
*/ staticint srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx, u64 tag, int status)
{ struct se_cmd *cmd = &ioctx->cmd; struct srp_rsp *srp_rsp; const u8 *sense_data; int sense_data_len, max_sense_len;
u32 resid = cmd->residual_count;
/* * The lowest bit of all SAM-3 status codes is zero (see also * paragraph 5.3 in SAM-3).
*/
WARN_ON(status & 1);
if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { if (cmd->data_direction == DMA_TO_DEVICE) { /* residual data from an underflow write */
srp_rsp->flags = SRP_RSP_FLAG_DOUNDER;
srp_rsp->data_out_res_cnt = cpu_to_be32(resid);
} elseif (cmd->data_direction == DMA_FROM_DEVICE) { /* residual data from an underflow read */
srp_rsp->flags = SRP_RSP_FLAG_DIUNDER;
srp_rsp->data_in_res_cnt = cpu_to_be32(resid);
}
} elseif (cmd->se_cmd_flags & SCF_OVERFLOW_BIT) { if (cmd->data_direction == DMA_TO_DEVICE) { /* residual data from an overflow write */
srp_rsp->flags = SRP_RSP_FLAG_DOOVER;
srp_rsp->data_out_res_cnt = cpu_to_be32(resid);
} elseif (cmd->data_direction == DMA_FROM_DEVICE) { /* residual data from an overflow read */
srp_rsp->flags = SRP_RSP_FLAG_DIOVER;
srp_rsp->data_in_res_cnt = cpu_to_be32(resid);
}
}
if (sense_data_len) {
BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp));
max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); if (sense_data_len > max_sense_len) {
pr_warn("truncated sense data from %d to %d bytes\n",
sense_data_len, max_sense_len);
sense_data_len = max_sense_len;
}
/** * srpt_build_tskmgmt_rsp - build a task management response * @ch: RDMA channel through which the request has been received. * @ioctx: I/O context in which the SRP_RSP response will be built. * @rsp_code: RSP_CODE that will be stored in the response. * @tag: Tag of the request for which this response is being generated. * * Returns the size in bytes of the SRP_RSP response. * * An SRP_RSP response contains a SCSI status or service response. See also * section 6.9 in the SRP r16a document for the format of an SRP_RSP * response.
*/ staticint srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx,
u8 rsp_code, u64 tag)
{ struct srp_rsp *srp_rsp; int resp_data_len; int resp_len;
staticint srp_tmr_to_tcm(int fn)
{ switch (fn) { case SRP_TSK_ABORT_TASK: return TMR_ABORT_TASK; case SRP_TSK_ABORT_TASK_SET: return TMR_ABORT_TASK_SET; case SRP_TSK_CLEAR_TASK_SET: return TMR_CLEAR_TASK_SET; case SRP_TSK_LUN_RESET: return TMR_LUN_RESET; case SRP_TSK_CLEAR_ACA: return TMR_CLEAR_ACA; default: return -1;
}
}
/** * srpt_handle_tsk_mgmt - process a SRP_TSK_MGMT information unit * @ch: SRPT RDMA channel. * @recv_ioctx: Receive I/O context. * @send_ioctx: Send I/O context. * * Returns 0 if and only if the request will be processed by the target core. * * For more information about SRP_TSK_MGMT information units, see also section * 6.7 in the SRP r16a document.
*/ staticvoid srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *recv_ioctx, struct srpt_send_ioctx *send_ioctx)
{ struct srp_tsk_mgmt *srp_tsk; struct se_cmd *cmd; struct se_session *sess = ch->sess; int tcm_tmr; int rc;
/** * srpt_handle_new_iu - process a newly received information unit * @ch: RDMA channel through which the information unit has been received. * @recv_ioctx: Receive I/O context associated with the information unit.
*/ staticbool
srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *recv_ioctx)
{ struct srpt_send_ioctx *send_ioctx = NULL; struct srp_cmd *srp_cmd; bool res = false;
u8 opcode;
BUG_ON(!ch);
BUG_ON(!recv_ioctx);
if (unlikely(ch->state == CH_CONNECTING)) goto push;
req_lim = atomic_dec_return(&ch->req_lim); if (unlikely(req_lim < 0))
pr_err("req_lim = %d < 0\n", req_lim);
ioctx->byte_len = wc->byte_len;
srpt_handle_new_iu(ch, ioctx);
} else {
pr_info_ratelimited("receiving failed for ioctx %p with status %d\n",
ioctx, wc->status);
}
}
/* * This function must be called from the context in which RDMA completions are * processed because it accesses the wait list without protection against * access from other threads.
*/ staticvoid srpt_process_wait_list(struct srpt_rdma_ch *ch)
{ struct srpt_recv_ioctx *recv_ioctx, *tmp;
/** * srpt_send_done - send completion callback * @cq: Completion queue. * @wc: Work completion. * * Note: Although this has not yet been observed during tests, at least in * theory it is possible that the srpt_get_send_ioctx() call invoked by * srpt_handle_new_iu() fails. This is possible because the req_lim_delta * value in each response is set to one, and it is possible that this response * makes the initiator send a new request before the send completion for that * response has been processed. This could e.g. happen if the call to * srpt_put_send_iotcx() is delayed because of a higher priority interrupt or * if IB retransmission causes generation of the send completion to be * delayed. Incoming information units for which srpt_get_send_ioctx() fails * are queued on cmd_wait_list. The code below processes these delayed * requests one at a time.
*/ staticvoid srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
{ struct srpt_rdma_ch *ch = wc->qp->qp_context; struct srpt_send_ioctx *ioctx =
container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe); enum srpt_command_state state;
state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
state != SRPT_STATE_MGMT_RSP_SENT);
atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail);
if (wc->status != IB_WC_SUCCESS)
pr_info("sending response for ioctx 0x%p failed with status %d\n",
ioctx, wc->status);
if (state != SRPT_STATE_DONE) {
transport_generic_free_cmd(&ioctx->cmd, 0);
} else {
pr_err("IB completion has been received too late for wr_id = %u.\n",
ioctx->ioctx.index);
}
qp_init->qp_context = (void *)ch;
qp_init->event_handler = srpt_qp_event;
qp_init->send_cq = ch->cq;
qp_init->recv_cq = ch->cq;
qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
qp_init->qp_type = IB_QPT_RC; /* * We divide up our send queue size into half SEND WRs to send the * completions, and half R/W contexts to actually do the RDMA * READ/WRITE transfers. Note that we need to allocate CQ slots for * both both, as RDMA contexts will also post completions for the * RDMA READ case.
*/
qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr);
qp_init->cap.max_rdma_ctxs = sq_size / 2;
qp_init->cap.max_send_sge = attrs->max_send_sge;
qp_init->cap.max_recv_sge = 1;
qp_init->port_num = ch->sport->port; if (sdev->use_srq)
qp_init->srq = sdev->srq; else
qp_init->cap.max_recv_wr = ch->rq_size;
if (ch->using_rdma_cm) {
ret = rdma_create_qp(ch->rdma_cm.cm_id, sdev->pd, qp_init);
ch->qp = ch->rdma_cm.cm_id->qp;
} else {
ch->qp = ib_create_qp(sdev->pd, qp_init); if (!IS_ERR(ch->qp)) {
ret = srpt_init_ch_qp(ch, ch->qp); if (ret)
ib_destroy_qp(ch->qp);
} else {
ret = PTR_ERR(ch->qp);
}
} if (ret) { bool retry = sq_size > MIN_SRPT_SQ_SIZE;
if (retry) {
pr_debug("failed to create queue pair with sq_size = %d (%d) - retrying\n",
sq_size, ret);
ib_cq_pool_put(ch->cq, ch->cq_size);
sq_size = max(sq_size / 2, MIN_SRPT_SQ_SIZE); goto retry;
} else {
pr_err("failed to create queue pair with sq_size = %d (%d)\n",
sq_size, ret); goto err_destroy_cq;
}
}
/** * srpt_close_ch - close a RDMA channel * @ch: SRPT RDMA channel. * * Make sure all resources associated with the channel will be deallocated at * an appropriate time. * * Returns true if and only if the channel state has been modified into * CH_DRAINING.
*/ staticbool srpt_close_ch(struct srpt_rdma_ch *ch)
{ int ret;
if (!srpt_set_ch_state(ch, CH_DRAINING)) {
pr_debug("%s: already closed\n", ch->sess_name); returnfalse;
}
kref_get(&ch->kref);
ret = srpt_ch_qp_err(ch); if (ret < 0)
pr_err("%s-%d: changing queue pair into error state failed: %d\n",
ch->sess_name, ch->qp->qp_num, ret);
ret = srpt_zerolength_write(ch); if (ret < 0) {
pr_err("%s-%d: queuing zero-length write failed: %d\n",
ch->sess_name, ch->qp->qp_num, ret); if (srpt_set_ch_state(ch, CH_DISCONNECTED))
schedule_work(&ch->release_work); else
WARN_ON_ONCE(true);
}
kref_put(&ch->kref, srpt_free_ch);
returntrue;
}
/* * Change the channel state into CH_DISCONNECTING. If a channel has not yet * reached the connected state, close it. If a channel is in the connected * state, send a DREQ. If a DREQ has been received, send a DREP. Note: it is * the responsibility of the caller to ensure that this function is not * invoked concurrently with the code that accepts a connection. This means * that this function must either be invoked from inside a CM callback * function or that it must be invoked with the srpt_port.mutex held.
*/ staticint srpt_disconnect_ch(struct srpt_rdma_ch *ch)
{ int ret;
if (!srpt_set_ch_state(ch, CH_DISCONNECTING)) return -ENOTCONN;
if (ch->using_rdma_cm) {
ret = rdma_disconnect(ch->rdma_cm.cm_id);
} else {
ret = ib_send_cm_dreq(ch->ib_cm.cm_id, NULL, 0); if (ret < 0)
ret = ib_send_cm_drep(ch->ib_cm.cm_id, NULL, 0);
}
if (ret < 0 && srpt_close_ch(ch))
ret = 0;
return ret;
}
/* Send DREQ and wait for DREP. */ staticvoid srpt_disconnect_ch_sync(struct srpt_rdma_ch *ch)
{
DECLARE_COMPLETION_ONSTACK(closed); struct srpt_port *sport = ch->sport;
pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num,
ch->state);
/* * Shut down the SCSI target session, tell the connection manager to * disconnect the associated RDMA channel, transition the QP to the error * state and remove the channel from the channel list. This function is * typically called from inside srpt_zerolength_write_done(). Concurrent * srpt_zerolength_write() calls from inside srpt_close_ch() are possible * as long as the channel is on sport->nexus_list.
*/ staticvoid srpt_release_channel_work(struct work_struct *w)
{ struct srpt_rdma_ch *ch; struct srpt_device *sdev; struct srpt_port *sport; struct se_session *se_sess;
/** * srpt_cm_req_recv - process the event IB_CM_REQ_RECEIVED * @sdev: HCA through which the login request was received. * @ib_cm_id: IB/CM connection identifier in case of IB/CM. * @rdma_cm_id: RDMA/CM connection identifier in case of RDMA/CM. * @port_num: Port through which the REQ message was received. * @pkey: P_Key of the incoming connection. * @req: SRP login request. * @src_addr: GID (IB/CM) or IP address (RDMA/CM) of the port that submitted * the login request. * * Ownership of the cm_id is transferred to the target session if this * function returns zero. Otherwise the caller remains the owner of cm_id.
*/ staticint srpt_cm_req_recv(struct srpt_device *const sdev, struct ib_cm_id *ib_cm_id, struct rdma_cm_id *rdma_cm_id,
u8 port_num, __be16 pkey, conststruct srp_login_req *req, constchar *src_addr)
{ struct srpt_port *sport = &sdev->port[port_num - 1]; struct srpt_nexus *nexus; struct srp_login_rsp *rsp = NULL; struct srp_login_rej *rej = NULL; union { struct rdma_conn_param rdma_cm; struct ib_cm_rep_param ib_cm;
} *rep_param = NULL; struct srpt_rdma_ch *ch = NULL; char i_port_id[36];
u32 it_iu_len; int i, tag_num, tag_size, ret; struct srpt_tpg *stpg;
WARN_ON_ONCE(irqs_disabled());
it_iu_len = be32_to_cpu(req->req_it_iu_len);
pr_info("Received SRP_LOGIN_REQ with i_port_id %pI6, t_port_id %pI6 and it_iu_len %d on port %d (guid=%pI6); pkey %#04x\n",
req->initiator_port_id, req->target_port_id, it_iu_len,
port_num, &sport->gid, be16_to_cpu(pkey));
nexus = srpt_get_nexus(sport, req->initiator_port_id,
req->target_port_id); if (IS_ERR(nexus)) {
ret = PTR_ERR(nexus); goto out;
}
ret = -EINVAL; if (it_iu_len > srp_max_req_size || it_iu_len < 64) {
rej->reason = cpu_to_be32(
SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
pr_err("rejected SRP_LOGIN_REQ because its length (%d bytes) is out of range (%d .. %d)\n",
it_iu_len, 64, srp_max_req_size); goto reject;
}
if (!sport->enabled) {
rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
pr_info("rejected SRP_LOGIN_REQ because target port %s_%d has not yet been enabled\n",
dev_name(&sport->sdev->device->dev), port_num); goto reject;
}
if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid)
|| *(__be64 *)(req->target_port_id + 8) !=
cpu_to_be64(srpt_service_guid)) {
rej->reason = cpu_to_be32(
SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
pr_err("rejected SRP_LOGIN_REQ because it has an invalid target port identifier.\n"); goto reject;
}
ret = -ENOMEM;
ch = kzalloc(sizeof(*ch), GFP_KERNEL); if (!ch) {
rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
pr_err("rejected SRP_LOGIN_REQ because out of memory.\n"); goto reject;
}
kref_init(&ch->kref);
ch->pkey = be16_to_cpu(pkey);
ch->nexus = nexus;
ch->zw_cqe.done = srpt_zerolength_write_done;
INIT_WORK(&ch->release_work, srpt_release_channel_work);
ch->sport = sport; if (rdma_cm_id) {
ch->using_rdma_cm = true;
ch->rdma_cm.cm_id = rdma_cm_id;
rdma_cm_id->context = ch;
} else {
ch->ib_cm.cm_id = ib_cm_id;
ib_cm_id->context = ch;
} /* * ch->rq_size should be at least as large as the initiator queue * depth to avoid that the initiator driver has to report QUEUE_FULL * to the SCSI mid-layer.
*/
ch->rq_size = min(MAX_SRPT_RQ_SIZE, sdev->device->attrs.max_qp_wr);
spin_lock_init(&ch->spinlock);
ch->state = CH_CONNECTING;
INIT_LIST_HEAD(&ch->cmd_wait_list);
ch->max_rsp_size = ch->sport->port_attrib.srp_max_rsp_size;
ch->rsp_buf_cache = srpt_cache_get(ch->max_rsp_size); if (!ch->rsp_buf_cache) goto free_ch;
ch->ioctx_ring = (struct srpt_send_ioctx **)
srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, sizeof(*ch->ioctx_ring[0]),
ch->rsp_buf_cache, 0, DMA_TO_DEVICE); if (!ch->ioctx_ring) {
pr_err("rejected SRP_LOGIN_REQ because creating a new QP SQ ring failed.\n");
rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); goto free_rsp_cache;
}
for (i = 0; i < ch->rq_size; i++)
ch->ioctx_ring[i]->ch = ch; if (!sdev->use_srq) {
u16 imm_data_offset = req->req_flags & SRP_IMMED_REQUESTED ?
be16_to_cpu(req->imm_data_offset) : 0;
u16 alignment_offset;
u32 req_sz;
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.15 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.