// SPDX-License-Identifier: GPL-2.0-or-later /* * spu_switch.c * * (C) Copyright IBM Corp. 2005 * * Author: Mark Nutter <mnutter@us.ibm.com> * * Host-side part of SPU context switch sequence outlined in * Synergistic Processor Element, Book IV. * * A fully premptive switch of an SPE is very expensive in terms * of time and system resources. SPE Book IV indicates that SPE * allocation should follow a "serially reusable device" model, * in which the SPE is assigned a task until it completes. When * this is not possible, this sequence may be used to premptively * save, and then later (optionally) restore the context of a * program executing on an SPE.
*/
/* Save, Step 2: * Save, Step 6: * If SPU_Status[E,L,IS] any field is '1', this * SPU is in isolate state and cannot be context * saved at this time.
*/
isolate_state = SPU_STATUS_ISOLATED_STATE |
SPU_STATUS_ISOLATED_LOAD_STATUS | SPU_STATUS_ISOLATED_EXIT_STATUS; return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0;
}
staticinlinevoid disable_interrupts(struct spu_state *csa, struct spu *spu)
{ /* Save, Step 3: * Restore, Step 2: * Save INT_Mask_class0 in CSA. * Write INT_MASK_class0 with value of 0. * Save INT_Mask_class1 in CSA. * Write INT_MASK_class1 with value of 0. * Save INT_Mask_class2 in CSA. * Write INT_MASK_class2 with value of 0. * Synchronize all three interrupts to be sure * we no longer execute a handler on another CPU.
*/
spin_lock_irq(&spu->register_lock); if (csa) {
csa->priv1.int_mask_class0_RW = spu_int_mask_get(spu, 0);
csa->priv1.int_mask_class1_RW = spu_int_mask_get(spu, 1);
csa->priv1.int_mask_class2_RW = spu_int_mask_get(spu, 2);
}
spu_int_mask_set(spu, 0, 0ul);
spu_int_mask_set(spu, 1, 0ul);
spu_int_mask_set(spu, 2, 0ul);
eieio();
spin_unlock_irq(&spu->register_lock);
/* * This flag needs to be set before calling synchronize_irq so * that the update will be visible to the relevant handlers * via a simple load.
*/
set_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
clear_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
synchronize_irq(spu->irqs[0]);
synchronize_irq(spu->irqs[1]);
synchronize_irq(spu->irqs[2]);
}
staticinlinevoid set_watchdog_timer(struct spu_state *csa, struct spu *spu)
{ /* Save, Step 4: * Restore, Step 25. * Set a software watchdog timer, which specifies the * maximum allowable time for a context save sequence. * * For present, this implementation will not set a global * watchdog timer, as virtualization & variable system load * may cause unpredictable execution times.
*/
}
staticinlinevoid inhibit_user_access(struct spu_state *csa, struct spu *spu)
{ /* Save, Step 5: * Restore, Step 3: * Inhibit user-space access (if provided) to this * SPU by unmapping the virtual pages assigned to * the SPU memory-mapped I/O (MMIO) for problem * state. TBD.
*/
}
/* Save, Step 9: * Save SPU_Runcntl in the CSA. This value contains * the "Application Desired State".
*/
csa->prob.spu_runcntl_RW = in_be32(&prob->spu_runcntl_RW);
}
staticinlinevoid save_mfc_sr1(struct spu_state *csa, struct spu *spu)
{ /* Save, Step 10: * Save MFC_SR1 in the CSA.
*/
csa->priv1.mfc_sr1_RW = spu_mfc_sr1_get(spu);
}
/* Save, Step 13: * Write MFC_CNTL[Dh] set to a '1' to halt * the decrementer.
*/
out_be64(&priv2->mfc_control_RW,
MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK);
eieio();
}
staticinlinevoid save_timebase(struct spu_state *csa, struct spu *spu)
{ /* Save, Step 14: * Read PPE Timebase High and Timebase low registers * and save in CSA. TBD.
*/
csa->suspend_time = get_cycles();
}
staticinlinevoid remove_other_spu_access(struct spu_state *csa, struct spu *spu)
{ /* Save, Step 15: * Remove other SPU access to this SPU by unmapping * this SPU's pages from their address space. TBD.
*/
}
staticinlinevoid handle_pending_interrupts(struct spu_state *csa, struct spu *spu)
{ /* Save, Step 18: * Handle any pending interrupts from this SPU * here. This is OS or hypervisor specific. One * option is to re-enable interrupts to handle any * pending interrupts, with the interrupt handlers * recognizing the software Context Switch Pending * flag, to ensure the SPU execution or MFC command * queue is not restarted. TBD.
*/
}
/* Save the Prxy_TagStatus register in the CSA. * * It is unnecessary to restore dma_tagstatus_R, however, * dma_tagstatus_R in the CSA is accessed via backing_ops, so * we must save it.
*/
csa->prob.dma_tagstatus_R = in_be32(&prob->dma_tagstatus_R);
}
/* Save, Step 23: * Save the MFC_CSR_CMD1 and MFC_CSR_CMD2 * registers in the CSA.
*/
csa->priv2.spu_cmd_buf1_RW = in_be64(&priv2->spu_cmd_buf1_RW);
csa->priv2.spu_cmd_buf2_RW = in_be64(&priv2->spu_cmd_buf2_RW);
}
/* Save, Step 28: * Poll MFC_CNTL[Ps] until value '11' is read * (purge complete).
*/
POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
MFC_CNTL_PURGE_DMA_STATUS_MASK) ==
MFC_CNTL_PURGE_DMA_COMPLETE);
}
staticinlinevoid setup_mfc_sr1(struct spu_state *csa, struct spu *spu)
{ /* Save, Step 30: * Restore, Step 18: * Write MFC_SR1 with MFC_SR1[D=0,S=1] and * MFC_SR1[TL,R,Pr,T] set correctly for the * OS specific environment. * * Implementation note: The SPU-side code * for save/restore is privileged, so the * MFC_SR1[Pr] bit is not set. *
*/
spu_mfc_sr1_set(spu, (MFC_STATE1_MASTER_RUN_CONTROL_MASK |
MFC_STATE1_RELOCATE_MASK |
MFC_STATE1_BUS_TLBIE_MASK));
}
/* Save, Step 36: * Save SPU_Cfg in the CSA.
*/
csa->priv2.spu_cfg_RW = in_be64(&priv2->spu_cfg_RW);
}
staticinlinevoid save_pm_trace(struct spu_state *csa, struct spu *spu)
{ /* Save, Step 37: * Save PM_Trace_Tag_Wait_Mask in the CSA. * Not performed by this implementation.
*/
}
staticinlinevoid save_mfc_rag(struct spu_state *csa, struct spu *spu)
{ /* Save, Step 38: * Save RA_GROUP_ID register and the * RA_ENABLE reigster in the CSA.
*/
csa->priv1.resource_allocation_groupID_RW =
spu_resource_allocation_groupID_get(spu);
csa->priv1.resource_allocation_enable_RW =
spu_resource_allocation_enable_get(spu);
}
staticinlinevoid setup_mfc_slbs(struct spu_state *csa, struct spu *spu, unsignedint *code, int code_size)
{ /* Save, Step 47: * Restore, Step 30. * If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All * register, then initialize SLB_VSID and SLB_ESID * to provide access to SPU context save code and * LSCSA. * * This implementation places both the context * switch code and LSCSA in kernel address space. * * Further this implementation assumes that the * MFC_SR1[R]=1 (in other words, assume that * translation is desired by OS environment).
*/
spu_invalidate_slbs(spu);
spu_setup_kernel_slbs(spu, csa->lscsa, code, code_size);
}
staticinlinevoid set_switch_active(struct spu_state *csa, struct spu *spu)
{ /* Save, Step 48: * Restore, Step 23. * Change the software context switch pending flag * to context switch active. This implementation does * not uses a switch active flag. * * Now that we have saved the mfc in the csa, we can add in the * restart command if an exception occurred.
*/ if (test_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags))
csa->priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
mb();
}
/* Save, Step 50: * Issue a DMA command to copy the first 16K bytes * of local storage to the CSA.
*/
send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
}
/* Save, Step 51: * Restore, Step 31. * Write SPU_NPC[IE]=0 and SPU_NPC[LSA] to entry * point address of context save code in local * storage. * * This implementation uses SPU-side save/restore * programs with entry points at LSA of 0.
*/
out_be32(&prob->spu_npc_RW, 0);
eieio();
}
/* Save, Step 54: * Issue a DMA command to copy context save code * to local storage and start SPU.
*/
send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
}
/* Save, Step 56: * Restore, Step 39. * Restore, Step 39. * Restore, Step 46. * Poll PPU_TagStatus[gn] until 01 (Tag group 0 complete) * or write PPU_QueryType[TS]=01 and wait for Tag Group * Complete Interrupt. Write INT_Stat_Class0 or * INT_Stat_Class2 with value of 'handled'.
*/
POLL_WHILE_FALSE(in_be32(&prob->dma_tagstatus_R) & mask);
/* Save, Step 57: * Restore, Step 40. * Poll until SPU_Status[R]=0 or wait for SPU Class 0 * or SPU Class 2 interrupt. Write INT_Stat_class0 * or INT_Stat_class2 with value of handled.
*/
POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
/* Save, Step 54: * If SPU_Status[P]=1 and SPU_Status[SC] = "success", * context save succeeded, otherwise context save * failed.
*/
complete = ((SPU_SAVE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
SPU_STATUS_STOPPED_BY_STOP); return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
}
staticinlinevoid terminate_spu_app(struct spu_state *csa, struct spu *spu)
{ /* Restore, Step 4: * If required, notify the "using application" that * the SPU task has been terminated. TBD.
*/
}
/* Restore, Step 7: * Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend * the queue and halt the decrementer.
*/
out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE |
MFC_CNTL_DECREMENTER_HALTED);
eieio();
}
/* Restore, Step 27: * If the CSA.SPU_Status[I,S,H,P]=1 then add the correct * instruction sequence to the end of the SPU based restore * code (after the "context restored" stop and signal) to * restore the correct SPU status. * * NOTE: Rather than modifying the SPU executable, we * instead add a new 'stopped_status' field to the * LSCSA. The SPU-side restore reads this field and * takes the appropriate action when exiting.
*/
/* SPU_Status[P,I]=1 - Illegal Instruction followed * by Stop and Signal instruction, followed by 'br -4'. *
*/
csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_I;
csa->lscsa->stopped_status.slot[1] = status_code;
/* SPU_Status[P,H]=1 - Halt Conditional, followed * by Stop and Signal instruction, followed by * 'br -4'.
*/
csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_H;
csa->lscsa->stopped_status.slot[1] = status_code;
/* SPU_Status[S,P]=1 - Stop and Signal instruction * followed by 'br -4'.
*/
csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_P;
csa->lscsa->stopped_status.slot[1] = status_code;
/* SPU_Status[P]=1 - Stop and Signal instruction * followed by 'br -4'.
*/
csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P;
csa->lscsa->stopped_status.slot[1] = status_code;
/* Restore, Step 28: * If the CSA.SPU_Status[I,S,H,P,R]=0 then * add a 'br *' instruction to the end of * the SPU based restore code. * * NOTE: Rather than modifying the SPU executable, we * instead add a new 'stopped_status' field to the * LSCSA. The SPU-side restore reads this field and * takes the appropriate action when exiting.
*/
mask = SPU_STATUS_INVALID_INSTR |
SPU_STATUS_SINGLE_STEP |
SPU_STATUS_STOPPED_BY_HALT |
SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING; if (!(csa->prob.spu_status_R & mask)) {
csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_R;
}
}
staticinlinevoid restore_mfc_rag(struct spu_state *csa, struct spu *spu)
{ /* Restore, Step 29: * Restore RA_GROUP_ID register and the * RA_ENABLE reigster from the CSA.
*/
spu_resource_allocation_groupID_set(spu,
csa->priv1.resource_allocation_groupID_RW);
spu_resource_allocation_enable_set(spu,
csa->priv1.resource_allocation_enable_RW);
}
/* Restore, Step 37: * Issue MFC DMA command to copy context * restore code to local storage.
*/
send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
}
staticinlinevoid setup_decr(struct spu_state *csa, struct spu *spu)
{ /* Restore, Step 34: * If CSA.MFC_CNTL[Ds]=1 (decrementer was * running) then adjust decrementer, set * decrementer running status in LSCSA, * and set decrementer "wrapped" status * in LSCSA.
*/ if (csa->priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) {
cycles_t resume_time = get_cycles();
cycles_t delta_time = resume_time - csa->suspend_time;
/* Restore, Step 44: * Issue a DMA command to restore the first * 16kb of local storage from CSA.
*/
send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
}
/* Restore, Step 47. * Write MFC_Cntl[Sc,Sm]='1','0' to suspend * the queue.
*/
out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
eieio();
}
staticinlinevoid clear_interrupts(struct spu_state *csa, struct spu *spu)
{ /* Restore, Step 49: * Write INT_MASK_class0 with value of 0. * Write INT_MASK_class1 with value of 0. * Write INT_MASK_class2 with value of 0. * Write INT_STAT_class0 with value of -1. * Write INT_STAT_class1 with value of -1. * Write INT_STAT_class2 with value of -1.
*/
spin_lock_irq(&spu->register_lock);
spu_int_mask_set(spu, 0, 0ul);
spu_int_mask_set(spu, 1, 0ul);
spu_int_mask_set(spu, 2, 0ul);
spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
spin_unlock_irq(&spu->register_lock);
}
/* Restore, Step 57: * Set the Lock Line Reservation Lost Event by: * 1. OR CSA.SPU_Event_Status with bit 21 (Lr) set to 1. * 2. If CSA.SPU_Channel_0_Count=0 and * CSA.SPU_Wr_Event_Mask[Lr]=1 and * CSA.SPU_Event_Status[Lr]=0 then set * CSA.SPU_Event_Status_Count=1.
*/
ch0_cnt = csa->spu_chnlcnt_RW[0];
ch0_data = csa->spu_chnldata_RW[0];
ch1_data = csa->spu_chnldata_RW[1];
csa->spu_chnldata_RW[0] |= MFC_LLR_LOST_EVENT; if ((ch0_cnt == 0) && !(ch0_data & MFC_LLR_LOST_EVENT) &&
(ch1_data & MFC_LLR_LOST_EVENT)) {
csa->spu_chnlcnt_RW[0] = 1;
}
}
staticinlinevoid restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
{ /* Restore, Step 58: * If the status of the CSA software decrementer * "wrapped" flag is set, OR in a '1' to * CSA.SPU_Event_Status[Tm].
*/ if (!(csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED)) return;
/* Restore, Step 62: * Restore the SPU_Cfg register from CSA.
*/
out_be64(&priv2->spu_cfg_RW, csa->priv2.spu_cfg_RW);
eieio();
}
staticinlinevoid restore_pm_trace(struct spu_state *csa, struct spu *spu)
{ /* Restore, Step 63: * Restore PM_Trace_Tag_Wait_Mask from CSA. * Not performed by this implementation.
*/
}
/* Restore, Step 72: * Restore the MFC_CNTL register for the CSA.
*/
out_be64(&priv2->mfc_control_RW, csa->priv2.mfc_control_RW);
eieio();
/* * The queue is put back into the same state that was evident prior to * the context switch. The suspend flag is added to the saved state in * the csa, if the operational state was suspending or suspended. In * this case, the code that suspended the mfc is responsible for * continuing it. Note that SPE faults do not change the operational * state of the spu.
*/
}
staticinlinevoid enable_user_access(struct spu_state *csa, struct spu *spu)
{ /* Restore, Step 73: * Enable user-space access (if provided) to this * SPU by mapping the virtual pages assigned to * the SPU memory-mapped I/O (MMIO) for problem * state. TBD.
*/
}
staticinlinevoid reset_switch_active(struct spu_state *csa, struct spu *spu)
{ /* Restore, Step 74: * Reset the "context switch active" flag. * Not performed by this implementation.
*/
}
staticvoid save_csa(struct spu_state *prev, struct spu *spu)
{ /* * Combine steps 19-44 of SPU context save sequence, which * save regions of the privileged & problem state areas.
*/
staticvoid save_lscsa(struct spu_state *prev, struct spu *spu)
{ /* * Perform steps 46-57 of SPU context save sequence, * which save regions of the local store and register * file.
*/
/** * stop_spu_isolate * Check SPU run-control state and force isolated * exit function as necessary.
*/ staticvoid stop_spu_isolate(struct spu *spu)
{ struct spu_problem __iomem *prob = spu->problem;
if (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE) { /* The SPU is in isolated state; the only way * to get it out is to perform an isolated * exit (clean) operation.
*/
force_spu_isolate_exit(spu);
}
}
staticvoid harvest(struct spu_state *prev, struct spu *spu)
{ /* * Perform steps 2-25 of SPU context restore sequence, * which resets an SPU either after a failed save, or * when using SPU for first time.
*/
staticvoid restore_lscsa(struct spu_state *next, struct spu *spu)
{ /* * Perform steps 26-40 of SPU context restore sequence, * which restores regions of the local store and register * file.
*/
staticvoid restore_csa(struct spu_state *next, struct spu *spu)
{ /* * Combine steps 41-76 of SPU context restore sequence, which * restore regions of the privileged & problem state areas.
*/
staticint __do_spu_save(struct spu_state *prev, struct spu *spu)
{ int rc;
/* * SPU context save can be broken into three phases: * * (a) quiesce [steps 2-16]. * (b) save of CSA, performed by PPE [steps 17-42] * (c) save of LSCSA, mostly performed by SPU [steps 43-52]. * * Returns 0 on success. * 2,6 if failed to quiece SPU * 53 if SPU-side of save failed.
*/
staticint __do_spu_restore(struct spu_state *next, struct spu *spu)
{ int rc;
/* * SPU context restore can be broken into three phases: * * (a) harvest (or reset) SPU [steps 2-24]. * (b) restore LSCSA [steps 25-40], mostly performed by SPU. * (c) restore CSA [steps 41-76], performed by PPE. * * The 'harvest' step is not performed here, but rather * as needed below.
*/
restore_lscsa(next, spu); /* Steps 24-39. */
rc = check_restore_status(next, spu); /* Step 40. */ switch (rc) { default: /* Failed. Return now. */ return rc; break; case 0: /* Fall through to next step. */ break;
}
restore_csa(next, spu);
return 0;
}
/** * spu_save - SPU context save, with locking. * @prev: pointer to SPU context save area, to be saved. * @spu: pointer to SPU iomem structure. * * Acquire locks, perform the save operation then return.
*/ int spu_save(struct spu_state *prev, struct spu *spu)
{ int rc;
/** * spu_restore - SPU context restore, with harvest and locking. * @new: pointer to SPU context save area, to be restored. * @spu: pointer to SPU iomem structure. * * Perform harvest + restore, as we may not be coming * from a previous successful save operation, and the * hardware state is unknown.
*/ int spu_restore(struct spu_state *new, struct spu *spu)
{ int rc;
/** * spu_alloc_csa - allocate and initialize an SPU context save area. * * Allocate and initialize the contents of an SPU context save area. * This includes enabling address translation, interrupt masks, etc., * as appropriate for the given OS environment. * * Note that storage for the 'lscsa' is allocated separately, * as it is by far the largest of the context save regions, * and may need to be pinned or otherwise specially aligned.
*/ int spu_init_csa(struct spu_state *csa)
{ int rc;
if (!csa) return -EINVAL;
memset(csa, 0, sizeof(struct spu_state));
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.