#include"hfi.h" #include"sdma.h" #include"user_sdma.h" #include"verbs.h"/* for the headers */ #include"common.h"/* for struct hfi1_tid_info */ #include"trace.h"
static uint hfi1_sdma_comp_ring_size = 128;
module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128");
write_seqlock(&sde->waitlock);
trace_hfi1_usdma_defer(pq, sde, &pq->busy); if (sdma_progress(sde, seq, txreq)) goto eagain; /* * We are assuming that if the list is enqueued somewhere, it * is to the dmawait list since that is the only place where * it is supposed to be enqueued.
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED); if (list_empty(&pq->busy.list)) {
pq->busy.lock = &sde->waitlock;
iowait_get_priority(&pq->busy);
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
}
write_sequnlock(&sde->waitlock); return -EBUSY;
eagain:
write_sequnlock(&sde->waitlock); return -EAGAIN;
}
spin_lock(&fd->pq_rcu_lock);
pq = srcu_dereference_check(fd->pq, &fd->pq_srcu,
lockdep_is_held(&fd->pq_rcu_lock)); if (pq) {
rcu_assign_pointer(fd->pq, NULL);
spin_unlock(&fd->pq_rcu_lock);
synchronize_srcu(&fd->pq_srcu); /* at this point there can be no more new requests */
iowait_sdma_drain(&pq->busy); /* Wait until all requests have been freed. */
wait_event_interruptible(
pq->wait,
!atomic_read(&pq->n_reqs));
kfree(pq->reqs);
hfi1_free_system_pinning(pq);
bitmap_free(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
flush_pq_iowait(pq);
kfree(pq);
} else {
spin_unlock(&fd->pq_rcu_lock);
} if (fd->cq) {
vfree(fd->cq->comps);
kfree(fd->cq);
fd->cq = NULL;
} return 0;
}
/* * Sanity check the header io vector count. Need at least 1 vector * (header) and cannot be larger than the actual io vector count.
*/ if (req_iovcnt(info.ctrl) < 1 || req_iovcnt(info.ctrl) > dim) {
hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Invalid iov count %d, dim %ld",
dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx,
req_iovcnt(info.ctrl), dim); return -EINVAL;
}
if (!info.fragsize) {
hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Request does not specify fragsize",
dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); return -EINVAL;
}
/* Try to claim the request. */ if (test_and_set_bit(info.comp_idx, pq->req_in_use)) {
hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use",
dd->unit, uctxt->ctxt, fd->subctxt,
info.comp_idx); return -EBADSLT;
} /* * All safety checks have been done and this request has been claimed.
*/
trace_hfi1_sdma_user_process_request(dd, uctxt->ctxt, fd->subctxt,
info.comp_idx);
req = pq->reqs + info.comp_idx;
req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
req->data_len = 0;
req->pq = pq;
req->cq = cq;
req->ahg_idx = -1;
req->iov_idx = 0;
req->sent = 0;
req->seqnum = 0;
req->seqcomp = 0;
req->seqsubmitted = 0;
req->tids = NULL;
req->has_error = 0;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
/* The request is initialized, count it */
atomic_inc(&pq->n_reqs);
if (req_opcode(info.ctrl) == EXPECTED) { /* expected must have a TID info and at least one data vector */ if (req->data_iovs < 2) {
SDMA_DBG(req, "Not enough vectors for expected request");
ret = -EINVAL; goto free_req;
}
req->data_iovs--;
}
if (!info.npkts || req->data_iovs > MAX_VECTORS_PER_REQ) {
SDMA_DBG(req, "Too many vectors (%u/%u)", req->data_iovs,
MAX_VECTORS_PER_REQ);
ret = -EINVAL; goto free_req;
}
/* Copy the header from the user buffer */
ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info), sizeof(req->hdr)); if (ret) {
SDMA_DBG(req, "Failed to copy header template (%d)", ret);
ret = -EFAULT; goto free_req;
}
/* If Static rate control is not enabled, sanitize the header. */ if (!HFI1_CAP_IS_USET(STATIC_RATE_CTRL))
req->hdr.pbc[2] = 0;
/* Validate the opcode. Do not trust packets from user space blindly. */
opcode = (be32_to_cpu(req->hdr.bth[0]) >> 24) & 0xff; if ((opcode & USER_OPCODE_CHECK_MASK) !=
USER_OPCODE_CHECK_VAL) {
SDMA_DBG(req, "Invalid opcode (%d)", opcode);
ret = -EINVAL; goto free_req;
} /* * Validate the vl. Do not trust packets from user space blindly. * VL comes from PBC, SC comes from LRH, and the VL needs to * match the SC look up.
*/
vl = (le16_to_cpu(req->hdr.pbc[0]) >> 12) & 0xF;
sc = (((be16_to_cpu(req->hdr.lrh[0]) >> 12) & 0xF) |
(((le16_to_cpu(req->hdr.pbc[1]) >> 14) & 0x1) << 4)); if (vl >= dd->pport->vls_operational ||
vl != sc_to_vlt(dd, sc)) {
SDMA_DBG(req, "Invalid SC(%u)/VL(%u)", sc, vl);
ret = -EINVAL; goto free_req;
}
/* Checking P_KEY for requests from user-space */
pkey = (u16)be32_to_cpu(req->hdr.bth[0]);
slid = be16_to_cpu(req->hdr.lrh[3]); if (egress_pkey_check(dd->pport, slid, pkey, sc, PKEY_CHECK_INVALID)) {
ret = -EINVAL; goto free_req;
}
/* * Also should check the BTH.lnh. If it says the next header is GRH then * the RXE parsing will be off and will land in the middle of the KDETH * or miss it entirely.
*/ if ((be16_to_cpu(req->hdr.lrh[0]) & 0x3) == HFI1_LRH_GRH) {
SDMA_DBG(req, "User tried to pass in a GRH");
ret = -EINVAL; goto free_req;
}
req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]); /* * Calculate the initial TID offset based on the values of * KDETH.OFFSET and KDETH.OM that are passed in.
*/
req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) *
(KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
KDETH_OM_LARGE : KDETH_OM_SMALL);
trace_hfi1_sdma_user_initial_tidoffset(dd, uctxt->ctxt, fd->subctxt,
info.comp_idx, req->tidoffset);
idx++;
/* Save all the IO vector structures */ for (i = 0; i < req->data_iovs; i++) {
req->iovs[i].offset = 0;
INIT_LIST_HEAD(&req->iovs[i].list);
memcpy(&req->iovs[i].iov,
iovec + idx++, sizeof(req->iovs[i].iov)); if (req->iovs[i].iov.iov_len == 0) {
ret = -EINVAL; goto free_req;
}
req->data_len += req->iovs[i].iov.iov_len;
}
trace_hfi1_sdma_user_data_length(dd, uctxt->ctxt, fd->subctxt,
info.comp_idx, req->data_len); if (pcount > req->info.npkts)
pcount = req->info.npkts; /* * Copy any TID info * User space will provide the TID info only when the * request type is EXPECTED. This is true even if there is * only one packet in the request and the header is already * setup. The reason for the singular TID case is that the * driver needs to perform safety checks.
*/ if (req_opcode(req->info.ctrl) == EXPECTED) {
u16 ntids = iovec[idx].iov_len / sizeof(*req->tids);
u32 *tmp;
if (!ntids || ntids > MAX_TID_PAIR_ENTRIES) {
ret = -EINVAL; goto free_req;
}
/* * We have to copy all of the tids because they may vary * in size and, therefore, the TID count might not be * equal to the pkt count. However, there is no way to * tell at this point.
*/
tmp = memdup_array_user(iovec[idx].iov_base,
ntids, sizeof(*req->tids)); if (IS_ERR(tmp)) {
ret = PTR_ERR(tmp);
SDMA_DBG(req, "Failed to copy %d TIDs (%d)",
ntids, ret); goto free_req;
}
req->tids = tmp;
req->n_tids = ntids;
req->tididx = 0;
idx++;
}
if (!req->sde || !sdma_running(req->sde)) {
ret = -ECOMM; goto free_req;
}
/* We don't need an AHG entry if the request contains only one packet */ if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG))
req->ahg_idx = sdma_ahg_alloc(req->sde);
/* * This is a somewhat blocking send implementation. * The driver will block the caller until all packets of the * request have been submitted to the SDMA engine. However, it * will not wait for send completions.
*/ while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount); if (ret < 0) { int we_ret;
if (ret != -EBUSY) goto free_req;
we_ret = wait_event_interruptible_timeout(
pq->busy.wait_dma,
pq->state == SDMA_PKT_Q_ACTIVE,
msecs_to_jiffies(
SDMA_IOWAIT_TIMEOUT));
trace_hfi1_usdma_we(pq, we_ret); if (we_ret <= 0)
flush_pq_iowait(pq);
}
}
*count += idx; return 0;
free_req: /* * If the submitted seqsubmitted == npkts, the completion routine * controls the final state. If sequbmitted < npkts, wait for any * outstanding packets to finish before cleaning up.
*/ if (req->seqsubmitted < req->info.npkts) { if (req->seqsubmitted)
wait_event(pq->busy.wait_dma,
(req->seqcomp == req->seqsubmitted - 1));
user_sdma_free_request(req);
pq_update(pq);
set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
} return ret;
}
staticinline u32 compute_data_length(struct user_sdma_request *req, struct user_sdma_txreq *tx)
{ /* * Determine the proper size of the packet data. * The size of the data of the first packet is in the header * template. However, it includes the header and ICRC, which need * to be subtracted. * The minimum representable packet data length in a header is 4 bytes, * therefore, when the data length request is less than 4 bytes, there's * only one packet, and the packet data length is equal to that of the * request data length. * The size of the remaining packets is the minimum of the frag * size (MTU) or remaining data in the request.
*/
u32 len;
if (!req->seqnum) { if (req->data_len < sizeof(u32))
len = req->data_len; else
len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) -
(sizeof(tx->hdr) - 4));
} elseif (req_opcode(req->info.ctrl) == EXPECTED) {
u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) *
PAGE_SIZE; /* * Get the data length based on the remaining space in the * TID pair.
*/
len = min(tidlen - req->tidoffset, (u32)req->info.fragsize); /* If we've filled up the TID pair, move to the next one. */ if (unlikely(!len) && ++req->tididx < req->n_tids &&
req->tids[req->tididx]) {
tidlen = EXP_TID_GET(req->tids[req->tididx],
LEN) * PAGE_SIZE;
req->tidoffset = 0;
len = min_t(u32, tidlen, req->info.fragsize);
} /* * Since the TID pairs map entire pages, make sure that we * are not going to try to send more data that we have * remaining.
*/
len = min(len, req->data_len - req->sent);
} else {
len = min(req->data_len - req->sent, (u32)req->info.fragsize);
}
trace_hfi1_sdma_user_compute_length(req->pq->dd,
req->pq->ctxt,
req->pq->subctxt,
req->info.comp_idx,
len); return len;
}
/* * Copy the request header into the tx header * because the HW needs a cacheline-aligned * address. * This copy can be optimized out if the hdr * member of user_sdma_request were also * cacheline aligned.
*/
memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr)); if (PBC2LRH(pbclen) != lrhlen) {
pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen);
tx->hdr.pbc[0] = cpu_to_le16(pbclen);
}
ret = check_header_template(req, &tx->hdr, lrhlen, datalen); if (ret) return ret;
ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY, sizeof(tx->hdr) + datalen, req->ahg_idx,
0, NULL, 0, user_sdma_txreq_cb); if (ret) return ret;
ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq, &tx->hdr, sizeof(tx->hdr)); if (ret)
sdma_txclean(pq->dd, &tx->txreq); return ret;
}
/* If tx completion has reported an error, we are done. */ if (READ_ONCE(req->has_error)) return -EFAULT;
/* * Check if we might have sent the entire request already
*/ if (unlikely(req->seqnum == req->info.npkts)) { if (!list_empty(&req->txps)) goto dosend; return ret;
}
/* * Check whether any of the completions have come back * with errors. If so, we are not going to process any * more packets from this request.
*/ if (READ_ONCE(req->has_error)) return -EFAULT;
tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL); if (!tx) return -ENOMEM;
/* * For the last packet set the ACK request * and disable header suppression.
*/ if (req->seqnum == req->info.npkts - 1)
tx->flags |= (TXREQ_FLAGS_REQ_ACK |
TXREQ_FLAGS_REQ_DISABLE_SH);
/* * Calculate the payload size - this is min of the fragment * (MTU) size or the remaining bytes in the request but only * if we have payload data.
*/ if (req->data_len) {
iovec = &req->iovs[req->iov_idx]; if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) { if (++req->iov_idx == req->data_iovs) {
ret = -EFAULT; goto free_tx;
}
iovec = &req->iovs[req->iov_idx];
WARN_ON(iovec->offset);
}
datalen = compute_data_length(req, tx);
/* * Disable header suppression for the payload <= 8DWS. * If there is an uncorrectable error in the receive * data FIFO when the received payload size is less than * or equal to 8DWS then the RxDmaDataFifoRdUncErr is * not reported.There is set RHF.EccErr if the header * is not suppressed.
*/ if (!datalen) {
SDMA_DBG(req, "Request has data but pkt len is 0");
ret = -EFAULT; goto free_tx;
} elseif (datalen <= 32) {
tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH;
}
}
if (req->ahg_idx >= 0) { if (!req->seqnum) {
ret = user_sdma_txadd_ahg(req, tx, datalen); if (ret) goto free_tx;
} else { int changes;
changes = set_txreq_header_ahg(req, tx,
datalen); if (changes < 0) {
ret = changes; goto free_tx;
}
}
} else {
ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
datalen, user_sdma_txreq_cb); if (ret) goto free_tx; /* * Modify the header for this packet. This only needs * to be done if we are not going to use AHG. Otherwise, * the HW will do it based on the changes we gave it * during sdma_txinit_ahg().
*/
ret = set_txreq_header(req, tx, datalen); if (ret) goto free_txreq;
}
req->koffset += datalen; if (req_opcode(req->info.ctrl) == EXPECTED)
req->tidoffset += datalen;
req->sent += datalen; while (datalen) {
ret = hfi1_add_pages_to_sdma_packet(req, tx, iovec,
&datalen); if (ret) goto free_txreq;
iovec = &req->iovs[req->iov_idx];
}
list_add_tail(&tx->txreq.list, &req->txps); /* * It is important to increment this here as it is used to * generate the BTH.PSN and, therefore, can't be bulk-updated * outside of the loop.
*/
tx->seqnum = req->seqnum++;
npkts++;
}
dosend:
ret = sdma_send_txlist(req->sde,
iowait_get_ib_work(&pq->busy),
&req->txps, &count);
req->seqsubmitted += count; if (req->seqsubmitted == req->info.npkts) { /* * The txreq has already been submitted to the HW queue * so we can free the AHG entry now. Corruption will not * happen due to the sequential manner in which * descriptors are processed.
*/ if (req->ahg_idx >= 0)
sdma_ahg_free(req->sde, req->ahg_idx);
} return ret;
staticint check_header_template(struct user_sdma_request *req, struct hfi1_pkt_header *hdr, u32 lrhlen,
u32 datalen)
{ /* * Perform safety checks for any type of packet: * - transfer size is multiple of 64bytes * - packet length is multiple of 4 bytes * - packet length is not larger than MTU size * * These checks are only done for the first packet of the * transfer since the header is "given" to us by user space. * For the remainder of the packets we compute the values.
*/ if (req->info.fragsize % PIO_BLOCK_SIZE || lrhlen & 0x3 ||
lrhlen > get_lrh_len(*hdr, req->info.fragsize)) return -EINVAL;
if (req_opcode(req->info.ctrl) == EXPECTED) { /* * The header is checked only on the first packet. Furthermore, * we ensure that at least one TID entry is copied when the * request is submitted. Therefore, we don't have to verify that * tididx points to something sane.
*/
u32 tidval = req->tids[req->tididx],
tidlen = EXP_TID_GET(tidval, LEN) * PAGE_SIZE,
tididx = EXP_TID_GET(tidval, IDX),
tidctrl = EXP_TID_GET(tidval, CTRL),
tidoff;
__le32 kval = hdr->kdeth.ver_tid_offset;
tidoff = KDETH_GET(kval, OFFSET) *
(KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
KDETH_OM_LARGE : KDETH_OM_SMALL); /* * Expected receive packets have the following * additional checks: * - offset is not larger than the TID size * - TIDCtrl values match between header and TID array * - TID indexes match between header and TID array
*/ if ((tidoff + datalen > tidlen) ||
KDETH_GET(kval, TIDCTRL) != tidctrl ||
KDETH_GET(kval, TID) != tididx) return -EINVAL;
} return 0;
}
/* * Correctly set the BTH.PSN field based on type of * transfer - eager packets can just increment the PSN but * expected packets encode generation and sequence in the * BTH.PSN field so just incrementing will result in errors.
*/ staticinline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags)
{
u32 val = be32_to_cpu(bthpsn),
mask = (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffffull :
0xffffffull),
psn = val & mask; if (expct)
psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) |
((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK); else
psn = psn + frags; return psn & mask;
}
/* Copy the header template to the request before modification */
memcpy(hdr, &req->hdr, sizeof(*hdr));
/* * Check if the PBC and LRH length are mismatched. If so * adjust both in the header.
*/
pbclen = le16_to_cpu(hdr->pbc[0]); if (PBC2LRH(pbclen) != lrhlen) {
pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen);
hdr->pbc[0] = cpu_to_le16(pbclen);
hdr->lrh[2] = cpu_to_be16(lrhlen >> 2); /* * Third packet * This is the first packet in the sequence that has * a "static" size that can be used for the rest of * the packets (besides the last one).
*/ if (unlikely(req->seqnum == 2)) { /* * From this point on the lengths in both the * PBC and LRH are the same until the last * packet. * Adjust the template so we don't have to update * every packet
*/
req->hdr.pbc[0] = hdr->pbc[0];
req->hdr.lrh[2] = hdr->lrh[2];
}
} /* * We only have to modify the header if this is not the * first packet in the request. Otherwise, we use the * header given to us.
*/ if (unlikely(!req->seqnum)) {
ret = check_header_template(req, hdr, lrhlen, datalen); if (ret) return ret; goto done;
}
/* Set ACK request on last packet */ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
hdr->bth[2] |= cpu_to_be32(1UL << 31);
/* Set the new offset */
hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset); /* Expected packets have to fill in the new TID information */ if (req_opcode(req->info.ctrl) == EXPECTED) {
tidval = req->tids[req->tididx]; /* * If the offset puts us at the end of the current TID, * advance everything.
*/ if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
PAGE_SIZE)) {
req->tidoffset = 0; /* * Since we don't copy all the TIDs, all at once, * we have to check again.
*/ if (++req->tididx > req->n_tids - 1 ||
!req->tids[req->tididx]) { return -EINVAL;
}
tidval = req->tids[req->tididx];
}
omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT :
KDETH_OM_SMALL_SHIFT; /* Set KDETH.TIDCtrl based on value for this TID. */
KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL,
EXP_TID_GET(tidval, CTRL)); /* Set KDETH.TID based on value for this TID */
KDETH_SET(hdr->kdeth.ver_tid_offset, TID,
EXP_TID_GET(tidval, IDX)); /* Clear KDETH.SH when DISABLE_SH flag is set */ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH))
KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0); /* * Set the KDETH.OFFSET and KDETH.OM based on size of * transfer.
*/
trace_hfi1_sdma_user_tid_info(
pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx,
req->tidoffset, req->tidoffset >> omfactor,
omfactor != KDETH_OM_SMALL_SHIFT);
KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
req->tidoffset >> omfactor);
KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
omfactor != KDETH_OM_SMALL_SHIFT);
}
done:
trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
req->info.comp_idx, hdr, tidval); return sdma_txadd_kvaddr(pq->dd, &tx->txreq, hdr, sizeof(*hdr));
}
/** * user_sdma_txreq_cb() - SDMA tx request completion callback. * @txreq: valid sdma tx request * @status: success/failure of request * * Called when the SDMA progress state machine gets notification that * the SDMA descriptors for this tx request have been processed by the * DMA engine. Called in interrupt context. * Only do work on completed sequences.
*/ staticvoid user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
{ struct user_sdma_txreq *tx =
container_of(txreq, struct user_sdma_txreq, txreq); struct user_sdma_request *req; struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq; enum hfi1_sdma_comp_state state = COMPLETE;
if (!tx->req) return;
req = tx->req;
pq = req->pq;
cq = req->cq;
if (status != SDMA_TXREQ_S_OK) {
SDMA_DBG(req, "SDMA completion with error %d",
status);
WRITE_ONCE(req->has_error, 1);
state = ERROR;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.