// SPDX-License-Identifier: GPL-2.0 /* * IOMMU API for ARM architected SMMUv3 implementations. * * Copyright (C) 2015 ARM Limited * * Author: Will Deacon <will.deacon@arm.com> * * This driver is powered by bad coffee and bombay mix.
*/
staticvoid queue_sync_cons_out(struct arm_smmu_queue *q)
{ /* * Ensure that all CPU accesses (reads and writes) to the queue * are complete before we update the cons pointer.
*/
__iomb();
writel_relaxed(q->llq.cons, q->cons_reg);
}
staticint queue_sync_prod_in(struct arm_smmu_queue *q)
{
u32 prod; int ret = 0;
/* * We can't use the _relaxed() variant here, as we must prevent * speculative reads of the queue before we have determined that * prod has indeed moved.
*/
prod = readl(q->prod_reg);
if (Q_OVF(prod) != Q_OVF(q->llq.prod))
ret = -EOVERFLOW;
/* * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI * payload, so the write will zero the entire command on that platform.
*/ if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
q->ent_dwords * 8;
}
arm_smmu_cmdq_build_cmd(cmd, &ent); if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
}
switch (idx) { case CMDQ_ERR_CERROR_ABT_IDX:
dev_err(smmu->dev, "retrying command fetch\n"); return; case CMDQ_ERR_CERROR_NONE_IDX: return; case CMDQ_ERR_CERROR_ATC_INV_IDX: /* * ATC Invalidation Completion timeout. CONS is still pointing * at the CMD_SYNC. Attempt to complete other pending commands * by repeating the CMD_SYNC, though we might well end up back * here since the ATC invalidation may still be pending.
*/ return; case CMDQ_ERR_CERROR_ILL_IDX: default: break;
}
/* * We may have concurrent producers, so we need to be careful * not to touch any of the shadow cmdq state.
*/
queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
dev_err(smmu->dev, "skipping command in error state:\n"); for (i = 0; i < ARRAY_SIZE(cmd); ++i)
dev_err(smmu->dev, "\t0x%016llx\n", (unsignedlonglong)cmd[i]);
/* Convert the erroneous command into a CMD_SYNC */
arm_smmu_cmdq_build_cmd(cmd, &cmd_sync); if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
/* * Command queue locking. * This is a form of bastardised rwlock with the following major changes: * * - The only LOCK routines are exclusive_trylock() and shared_lock(). * Neither have barrier semantics, and instead provide only a control * dependency. * * - The UNLOCK routines are supplemented with shared_tryunlock(), which * fails if the caller appears to be the last lock holder (yes, this is * racy). All successful UNLOCK routines have RELEASE semantics.
*/ staticvoid arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
{ int val;
/* * We can try to avoid the cmpxchg() loop by simply incrementing the * lock counter. When held in exclusive state, the lock counter is set * to INT_MIN so these increments won't hurt as the value will remain * negative.
*/ if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0) return;
do {
val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
}
/* * Command queue insertion. * This is made fiddly by our attempts to achieve some sort of scalability * since there is one queue shared amongst all of the CPUs in the system. If * you like mixed-size concurrency, dependency ordering and relaxed atomics, * then you'll *love* this monstrosity. * * The basic idea is to split the queue up into ranges of commands that are * owned by a given CPU; the owner may not have written all of the commands * itself, but is responsible for advancing the hardware prod pointer when * the time comes. The algorithm is roughly: * * 1. Allocate some space in the queue. At this point we also discover * whether the head of the queue is currently owned by another CPU, * or whether we are the owner. * * 2. Write our commands into our allocated slots in the queue. * * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map. * * 4. If we are an owner: * a. Wait for the previous owner to finish. * b. Mark the queue head as unowned, which tells us the range * that we are responsible for publishing. * c. Wait for all commands in our owned range to become valid. * d. Advance the hardware prod pointer. * e. Tell the next owner we've finished. * * 5. If we are inserting a CMD_SYNC (we may or may not have been an * owner), then we need to stick around until it has completed: * a. If we have MSIs, the SMMU can write back into the CMD_SYNC * to clear the first 4 bytes. * b. Otherwise, we spin waiting for the hardware cons pointer to * advance past our command. * * The devil is in the details, particularly the use of locking for handling * SYNC completion and freeing up space in the queue before we think that it is * full.
*/ staticvoid __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
u32 sprod, u32 eprod, bool set)
{
u32 swidx, sbidx, ewidx, ebidx; struct arm_smmu_ll_queue llq = {
.max_n_shift = cmdq->q.llq.max_n_shift,
.prod = sprod,
};
/* * The valid bit is the inverse of the wrap bit. This means * that a zero-initialised queue is invalid and, after marking * all entries as valid, they become invalid again when we * wrap.
*/ if (set) {
atomic_long_xor(mask, ptr);
} else { /* Poll */ unsignedlong valid;
/* Mark all entries in the range [sprod, eprod) as valid */ staticvoid arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
u32 sprod, u32 eprod)
{
__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
}
/* Wait for all entries in the range [sprod, eprod) to become valid */ staticvoid arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
u32 sprod, u32 eprod)
{
__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
}
/* Wait for the command queue to become non-full */ staticint arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq)
{ unsignedlong flags; struct arm_smmu_queue_poll qp; int ret = 0;
/* * Try to update our copy of cons by grabbing exclusive cmdq access. If * that fails, spin until somebody else updates it for us.
*/ if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
llq->val = READ_ONCE(cmdq->q.llq.val); return 0;
}
queue_poll_init(smmu, &qp); do {
llq->val = READ_ONCE(cmdq->q.llq.val); if (!queue_full(llq)) break;
ret = queue_poll(&qp);
} while (!ret);
return ret;
}
/* * Wait until the SMMU signals a CMD_SYNC completion MSI. * Must be called with the cmdq lock held in some capacity.
*/ staticint __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq)
{ int ret = 0; struct arm_smmu_queue_poll qp;
u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
queue_poll_init(smmu, &qp);
/* * The MSI won't generate an event, since it's being written back * into the command queue.
*/
qp.wfe = false;
smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1); return ret;
}
/* * Wait until the SMMU cons index passes llq->prod. * Must be called with the cmdq lock held in some capacity.
*/ staticint __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq)
{ struct arm_smmu_queue_poll qp;
u32 prod = llq->prod; int ret = 0;
queue_poll_init(smmu, &qp);
llq->val = READ_ONCE(cmdq->q.llq.val); do { if (queue_consumed(llq, prod)) break;
ret = queue_poll(&qp);
/* * This needs to be a readl() so that our subsequent call * to arm_smmu_cmdq_shared_tryunlock() can fail accurately. * * Specifically, we need to ensure that we observe all * shared_lock()s by other CMD_SYNCs that share our owner, * so that a failing call to tryunlock() means that we're * the last one out and therefore we can safely advance * cmdq->q.llq.cons. Roughly speaking: * * CPU 0 CPU1 CPU2 (us) * * if (sync) * shared_lock(); * * dma_wmb(); * set_valid_map(); * * if (owner) { * poll_valid_map(); * <control dependency> * writel(prod_reg); * * readl(cons_reg); * tryunlock(); * * Requires us to see CPU 0's shared_lock() acquisition.
*/
llq->cons = readl(cmdq->q.cons_reg);
} while (!ret);
/* * This is the actual insertion function, and provides the following * ordering guarantees to callers: * * - There is a dma_wmb() before publishing any commands to the queue. * This can be relied upon to order prior writes to data structures * in memory (such as a CD or an STE) before the command. * * - On completion of a CMD_SYNC, there is a control dependency. * This can be relied upon to order subsequent writes to memory (e.g. * freeing an IOVA) after completion of the CMD_SYNC. * * - Command insertion is totally ordered, so if two CPUs each race to * insert their own list of commands then all of the commands from one * CPU will appear before any of the commands from the other CPU.
*/ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, u64 *cmds, int n, bool sync)
{
u64 cmd_sync[CMDQ_ENT_DWORDS];
u32 prod; unsignedlong flags; bool owner; struct arm_smmu_ll_queue llq, head; int ret = 0;
llq.max_n_shift = cmdq->q.llq.max_n_shift;
/* 1. Allocate some space in the queue */
local_irq_save(flags);
llq.val = READ_ONCE(cmdq->q.llq.val); do {
u64 old;
while (!queue_has_space(&llq, n + sync)) {
local_irq_restore(flags); if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
local_irq_save(flags);
}
/* * 2. Write our commands into the queue * Dependency ordering from the cmpxchg() loop above.
*/
arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); if (sync) {
prod = queue_inc_prod_n(&llq, n);
arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
/* * In order to determine completion of our CMD_SYNC, we must * ensure that the queue can't wrap twice without us noticing. * We achieve that by taking the cmdq lock as shared before * marking our slot as valid.
*/
arm_smmu_cmdq_shared_lock(cmdq);
}
/* 3. Mark our slots as valid, ensuring commands are visible first */
dma_wmb();
arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
/* 4. If we are the owner, take control of the SMMU hardware */ if (owner) { /* a. Wait for previous owner to finish */
atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
/* b. Stop gathering work by clearing the owned flag */
prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
&cmdq->q.llq.atomic.prod);
prod &= ~CMDQ_PROD_OWNED_FLAG;
/* * c. Wait for any gathered work to be written to the queue. * Note that we read our own entries so that we have the control * dependency required by (d).
*/
arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
/* * d. Advance the hardware prod pointer * Control dependency ordering from the entries becoming valid.
*/
writel_relaxed(prod, cmdq->q.prod_reg);
/* * e. Tell the next owner we're done * Make sure we've updated the hardware first, so that we don't * race to update prod and potentially move it backwards.
*/
atomic_set_release(&cmdq->owner_prod, prod);
}
/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ if (sync) {
llq.prod = queue_inc_prod_n(&llq, n);
ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq); if (ret) {
dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
llq.prod,
readl_relaxed(cmdq->q.prod_reg),
readl_relaxed(cmdq->q.cons_reg));
}
/* * Try to unlock the cmdq lock. This will fail if we're the last * reader, in which case we can safely update cmdq->q.llq.cons
*/ if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
arm_smmu_cmdq_shared_unlock(cmdq);
}
}
cmd.opcode = CMDQ_OP_RESUME;
cmd.resume.sid = sid;
cmd.resume.stag = resp->grpid; switch (resp->code) { case IOMMU_PAGE_RESP_INVALID: case IOMMU_PAGE_RESP_FAILURE:
cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT; break; case IOMMU_PAGE_RESP_SUCCESS:
cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY; break; default: break;
}
arm_smmu_cmdq_issue_cmd(master->smmu, &cmd); /* * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP. * RESUME consumption guarantees that the stalled transaction will be * terminated... at some point in the future. PRI_RESP is fire and * forget.
*/
}
/* * Based on the value of ent report which bits of the STE the HW will access. It * would be nice if this was complete according to the spec, but minimally it * has to capture the bits this driver uses.
*/
VISIBLE_IF_KUNIT void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
{ unsignedint cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
used_bits[0] = cpu_to_le64(STRTAB_STE_0_V); if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V))) return;
/* * See 13.5 Summary of attribute/permission configuration fields * for the SHCFG behavior.
*/ if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
STRTAB_STE_1_S1DSS_BYPASS)
used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
}
if (cfg == STRTAB_STE_0_CFG_BYPASS)
used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);
/* * Figure out if we can do a hitless update of entry to become target. Returns a * bit mask where 1 indicates that qword needs to be set disruptively. * unused_update is an intermediate value of entry that has unused bits set to * their new values.
*/ static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer, const __le64 *entry, const __le64 *target,
__le64 *unused_update)
{
__le64 target_used[NUM_ENTRY_QWORDS] = {};
__le64 cur_used[NUM_ENTRY_QWORDS] = {};
u8 used_qword_diff = 0; unsignedint i;
for (i = 0; i != NUM_ENTRY_QWORDS; i++) { /* * Check that masks are up to date, the make functions are not * allowed to set a bit to 1 if the used function doesn't say it * is used.
*/
WARN_ON_ONCE(target[i] & ~target_used[i]);
/* Bits can change because they are not currently being used */
unused_update[i] = (entry[i] & cur_used[i]) |
(target[i] & ~cur_used[i]); /* * Each bit indicates that a used bit in a qword needs to be * changed after unused_update is applied.
*/ if ((unused_update[i] & target_used[i]) != target[i])
used_qword_diff |= 1 << i;
} return used_qword_diff;
}
for (i = start; len != 0; len--, i++) { if (entry[i] != target[i]) {
WRITE_ONCE(entry[i], target[i]);
changed = true;
}
}
if (changed)
writer->ops->sync(writer); return changed;
}
/* * Update the STE/CD to the target configuration. The transition from the * current entry to the target entry takes place over multiple steps that * attempts to make the transition hitless if possible. This function takes care * not to create a situation where the HW can perceive a corrupted entry. HW is * only required to have a 64 bit atomicity with stores from the CPU, while * entries are many 64 bit values big. * * The difference between the current value and the target value is analyzed to * determine which of three updates are required - disruptive, hitless or no * change. * * In the most general disruptive case we can make any update in three steps: * - Disrupting the entry (V=0) * - Fill now unused qwords, execpt qword 0 which contains V * - Make qword 0 have the final value and valid (V=1) with a single 64 * bit store * * However this disrupts the HW while it is happening. There are several * interesting cases where a STE/CD can be updated without disturbing the HW * because only a small number of bits are changing (S1DSS, CONFIG, etc) or * because the used bits don't intersect. We can detect this by calculating how * many 64 bit values need update after adjusting the unused bits and skip the * V=0 process. This relies on the IGNORED behavior described in the * specification.
*/
VISIBLE_IF_KUNIT void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry, const __le64 *target)
{
__le64 unused_update[NUM_ENTRY_QWORDS];
u8 used_qword_diff;
used_qword_diff =
arm_smmu_entry_qword_diff(writer, entry, target, unused_update); if (hweight8(used_qword_diff) == 1) { /* * Only one qword needs its used bits to be changed. This is a * hitless update, update all bits the current STE/CD is * ignoring to their new values, then update a single "critical * qword" to change the STE/CD and finally 0 out any bits that * are now unused in the target configuration.
*/ unsignedint critical_qword_index = ffs(used_qword_diff) - 1;
/* * Skip writing unused bits in the critical qword since we'll be * writing it in the next step anyways. This can save a sync * when the only change is in that qword.
*/
unused_update[critical_qword_index] =
entry[critical_qword_index];
entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS);
entry_set(writer, entry, target, critical_qword_index, 1);
entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);
} elseif (used_qword_diff) { /* * At least two qwords need their inuse bits to be changed. This * requires a breaking update, zero the V bit, write all qwords * but 0, then set qword 0
*/
unused_update[0] = 0;
entry_set(writer, entry, unused_update, 0, 1);
entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1);
entry_set(writer, entry, target, 0, 1);
} else { /* * No inuse bit changed. Sanity check that all unused bits are 0 * in the entry. The target was already sanity checked by * compute_qword_diff().
*/
WARN_ON_ONCE(
entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));
}
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);
/* * If EPD0 is set by the make function it means * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
*/ if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
used_bits[0] &= ~cpu_to_le64(
CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
CTXDESC_CD_0_TCR_SH0);
used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
}
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used);
/* To enable dirty flag update, set both Access flag and dirty state update */ if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA |
CTXDESC_CD_0_TCR_HD);
if (smmu->features & ARM_SMMU_FEAT_E2H) { /* * To support BTM the streamworld needs to match the * configuration of the CPU so that the ASID broadcasts are * properly matched. This means either S/NS-EL2-E2H (hypervisor) * or NS-EL1 (guest). Since an SVA domain can be installed in a * PASID this should always use a BTM compatible configuration * if the HW supports it.
*/
target->data[1] |= cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
} else {
target->data[1] |= cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
/* * VMID 0 is reserved for stage-2 bypass EL1 STEs, see * arm_smmu_domain_alloc_id()
*/
target->data[2] =
cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
}
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
/* * This can safely directly manipulate the STE memory without a sync sequence * because the STE table has not been installed in the SMMU yet.
*/ staticvoid arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab, unsignedint nent)
{ unsignedint i;
for (i = 0; i < nent; ++i) {
arm_smmu_make_abort_ste(strtab);
strtab++;
}
}
switch (event->id) { case EVT_ID_BAD_STE_CONFIG: case EVT_ID_STREAM_DISABLED_FAULT: case EVT_ID_BAD_SUBSTREAMID_CONFIG: case EVT_ID_BAD_CD_CONFIG: case EVT_ID_TRANSLATION_FAULT: case EVT_ID_ADDR_SIZE_FAULT: case EVT_ID_ACCESS_FAULT: case EVT_ID_PERMISSION_FAULT: break; default: return -EOPNOTSUPP;
}
if (event->stall) { if (event->read)
perm |= IOMMU_FAULT_PERM_READ; else
perm |= IOMMU_FAULT_PERM_WRITE;
if (event->instruction)
perm |= IOMMU_FAULT_PERM_EXEC;
if (event->privileged)
perm |= IOMMU_FAULT_PERM_PRIV;
mutex_lock(&smmu->streams_mutex);
master = arm_smmu_find_master(smmu, event->sid); if (!master) {
ret = -EINVAL; goto out_unlock;
}
if (event->stall)
ret = iommu_report_device_fault(master->dev, &fault_evt); elseif (master->vmaster && !event->s2)
ret = arm_vmaster_report_event(master->vmaster, evt); else
ret = -EOPNOTSUPP; /* Unhandled events should be pinned */
out_unlock:
mutex_unlock(&smmu->streams_mutex); return ret;
}
do { while (!queue_remove_raw(q, evt)) {
arm_smmu_decode_event(smmu, evt, &event); if (arm_smmu_handle_event(smmu, evt, &event))
arm_smmu_dump_event(smmu, evt, &event, &rs);
put_device(event.dev);
cond_resched();
}
/* * Not much we can do on overflow, so scream and pretend we're * trying harder.
*/ if (queue_sync_prod_in(q) == -EOVERFLOW)
dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
} while (!queue_empty(llq));
/* Sync our overflow flag, as we believe we're up to speed */
queue_sync_cons_ovf(q); return IRQ_HANDLED;
}
/* * ATS and PASID: * * If substream_valid is clear, the PCIe TLP is sent without a PASID * prefix. In that case all ATC entries within the address range are * invalidated, including those that were requested with a PASID! There * is no way to invalidate only entries without PASID. * * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID * traffic), translation requests without PASID create ATC entries * without PASID, which must be invalidated with substream_valid clear. * This has the unpleasant side-effect of invalidating all PASID-tagged * ATC entries within the address range.
*/
*cmd = (struct arm_smmu_cmdq_ent) {
.opcode = CMDQ_OP_ATC_INV,
.substream_valid = (ssid != IOMMU_NO_PASID),
.atc.ssid = ssid,
};
if (!size) {
cmd->atc.size = ATC_INV_SIZE_ALL; return;
}
/* * In an ATS Invalidate Request, the address must be aligned on the * range size, which must be a power of two number of page sizes. We * thus have to choose between grossly over-invalidating the region, or * splitting the invalidation into multiple commands. For simplicity * we'll go with the first solution, but should refine it in the future * if multiple commands are shown to be more efficient. * * Find the smallest power of two that covers the range. The most * significant differing bit between the start and end addresses, * fls(start ^ end), indicates the required span. For example: * * We want to invalidate pages [8; 11]. This is already the ideal range: * x = 0b1000 ^ 0b1011 = 0b11 * span = 1 << fls(x) = 4 * * To invalidate pages [7; 10], we need to invalidate [0; 15]: * x = 0b0111 ^ 0b1010 = 0b1101 * span = 1 << fls(x) = 16
*/
log2_span = fls_long(page_start ^ page_end);
span_mask = (1ULL << log2_span) - 1;
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) return 0;
/* * Ensure that we've completed prior invalidation of the main TLBs * before we read 'nr_ats_masters' in case of a concurrent call to * arm_smmu_enable_ats(): * * // unmap() // arm_smmu_enable_ats() * TLBI+SYNC atomic_inc(&nr_ats_masters); * smp_mb(); [...] * atomic_read(&nr_ats_masters); pci_enable_ats() // writel() * * Ensures that we always see the incremented 'nr_ats_masters' count if * ATS was enabled at the PCI device before completion of the TLBI.
*/
smp_mb(); if (!atomic_read(&smmu_domain->nr_ats_masters)) return 0;
if (master_domain->nested_ats_flush) { /* * If a S2 used as a nesting parent is changed we have * no option but to completely flush the ATC.
*/
arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
} else {
arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size,
&cmd);
}
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.13 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.