aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel/tm.S
diff options
context:
space:
mode:
authorCyril Bur <cyrilbur@gmail.com>2017-11-02 14:09:05 +1100
committerMichael Ellerman <mpe@ellerman.id.au>2017-11-06 20:39:33 +1100
commiteb5c3f1c86470fc1a57ab28cce15c12e4d6cdf8b (patch)
tree0d865f26543e4cc4ec7cca183c0aaddbfbf426a4 /arch/powerpc/kernel/tm.S
parent91381b9cb1c3afc162f830ebf698721402c7d577 (diff)
powerpc: Always save/restore checkpointed regs during treclaim/trecheckpoint
Lazy save and restore of FP/Altivec means that a userspace process can be sent to userspace with FP or Altivec disabled and loaded only as required (by way of an FP/Altivec unavailable exception). Transactional Memory complicates this situation as a transaction could be started without FP/Altivec being loaded up. This causes the hardware to checkpoint incorrect registers. Handling FP/Altivec unavailable exceptions while a thread is transactional requires a reclaim and recheckpoint to ensure the CPU has correct state for both sets of registers. tm_reclaim() has optimisations to not always save the FP/Altivec registers to the checkpointed save area. This was originally done because the caller might have information that the checkpointed registers aren't valid due to lazy save and restore. We've also been a little vague as to how tm_reclaim() leaves the FP/Altivec state since it doesn't necessarily always save it to the thread struct. This has lead to an (incorrect) assumption that it leaves the checkpointed state on the CPU. tm_recheckpoint() has similar optimisations in reverse. It may not always reload the checkpointed FP/Altivec registers from the thread struct before the trecheckpoint. It is therefore quite unclear where it expects to get the state from. This didn't help with the assumption made about tm_reclaim(). These optimisations sit in what is by definition a slow path. If a process has to go through a reclaim/recheckpoint then its transaction will be doomed on returning to userspace. This mean that the process will be unable to complete its transaction and be forced to its failure handler. This is already an out if line case for userspace. Furthermore, the cost of copying 64 times 128 bits from registers isn't very long[0] (at all) on modern processors. As such it appears these optimisations have only served to increase code complexity and are unlikely to have had a measurable performance impact. Our transactional memory handling has been riddled with bugs. A cause of this has been difficulty in following the code flow, code complexity has not been our friend here. It makes sense to remove these optimisations in favour of a (hopefully) more stable implementation. This patch does mean that some times the assembly will needlessly save 'junk' registers which will subsequently get overwritten with the correct value by the C code which calls the assembly function. This small inefficiency is far outweighed by the reduction in complexity for general TM code, context switching paths, and transactional facility unavailable exception handler. 0: I tried to measure it once for other work and found that it was hiding in the noise of everything else I was working with. I find it exceedingly likely this will be the case here. Signed-off-by: Cyril Bur <cyrilbur@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/kernel/tm.S')
-rw-r--r--arch/powerpc/kernel/tm.S59
1 files changed, 17 insertions, 42 deletions
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index c4ba37822ba0..d89fb0e6f9ed 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -79,15 +79,12 @@ _GLOBAL(tm_abort)
blr
/* void tm_reclaim(struct thread_struct *thread,
- * unsigned long orig_msr,
* uint8_t cause)
*
* - Performs a full reclaim. This destroys outstanding
* transactions and updates thread->regs.tm_ckpt_* with the
* original checkpointed state. Note that thread->regs is
* unchanged.
- * - FP regs are written back to thread->transact_fpr before
- * reclaiming. These are the transactional (current) versions.
*
* Purpose is to both abort transactions of, and preserve the state of,
* a transactions at a context switch. We preserve/restore both sets of process
@@ -98,9 +95,9 @@ _GLOBAL(tm_abort)
* Call with IRQs off, stacks get all out of sync for some periods in here!
*/
_GLOBAL(tm_reclaim)
- mfcr r6
+ mfcr r5
mflr r0
- stw r6, 8(r1)
+ stw r5, 8(r1)
std r0, 16(r1)
std r2, STK_GOT(r1)
stdu r1, -TM_FRAME_SIZE(r1)
@@ -108,7 +105,6 @@ _GLOBAL(tm_reclaim)
/* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */
std r3, STK_PARAM(R3)(r1)
- std r4, STK_PARAM(R4)(r1)
SAVE_NVGPRS(r1)
/* We need to setup MSR for VSX register save instructions. */
@@ -138,8 +134,8 @@ _GLOBAL(tm_reclaim)
std r1, PACAR1(r13)
/* Clear MSR RI since we are about to change r1, EE is already off. */
- li r4, 0
- mtmsrd r4, 1
+ li r5, 0
+ mtmsrd r5, 1
/*
* BE CAREFUL HERE:
@@ -151,7 +147,7 @@ _GLOBAL(tm_reclaim)
* to user register state. (FPRs, CCR etc. also!)
* Use an sprg and a tm_scratch in the PACA to shuffle.
*/
- TRECLAIM(R5) /* Cause in r5 */
+ TRECLAIM(R4) /* Cause in r4 */
/* ******************** GPRs ******************** */
/* Stash the checkpointed r13 away in the scratch SPR and get the real
@@ -242,40 +238,30 @@ _GLOBAL(tm_reclaim)
/* ******************** FPR/VR/VSRs ************
- * After reclaiming, capture the checkpointed FPRs/VRs /if used/.
- *
- * (If VSX used, FP and VMX are implied. Or, we don't need to look
- * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.)
- *
- * We're passed the thread's MSR as the second parameter
+ * After reclaiming, capture the checkpointed FPRs/VRs.
*
* We enabled VEC/FP/VSX in the msr above, so we can execute these
* instructions!
*/
- ld r4, STK_PARAM(R4)(r1) /* Second parameter, MSR * */
mr r3, r12
- andis. r0, r4, MSR_VEC@h
- beq dont_backup_vec
+ /* Altivec (VEC/VMX/VR)*/
addi r7, r3, THREAD_CKVRSTATE
SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */
mfvscr v0
li r6, VRSTATE_VSCR
stvx v0, r7, r6
-dont_backup_vec:
+
+ /* VRSAVE */
mfspr r0, SPRN_VRSAVE
std r0, THREAD_CKVRSAVE(r3)
- andi. r0, r4, MSR_FP
- beq dont_backup_fp
-
+ /* Floating Point (FP) */
addi r7, r3, THREAD_CKFPSTATE
SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */
-
mffs fr0
stfd fr0,FPSTATE_FPSCR(r7)
-dont_backup_fp:
/* TM regs, incl TEXASR -- these live in thread_struct. Note they've
* been updated by the treclaim, to explain to userland the failure
@@ -343,22 +329,19 @@ _GLOBAL(__tm_recheckpoint)
*/
subi r7, r7, STACK_FRAME_OVERHEAD
+ /* We need to setup MSR for FP/VMX/VSX register save instructions. */
mfmsr r6
- /* R4 = original MSR to indicate whether thread used FP/Vector etc. */
-
- /* Enable FP/vec in MSR if necessary! */
- lis r5, MSR_VEC@h
+ mr r5, r6
ori r5, r5, MSR_FP
- and. r5, r4, r5
- beq restore_gprs /* if neither, skip both */
-
+#ifdef CONFIG_ALTIVEC
+ oris r5, r5, MSR_VEC@h
+#endif
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
- oris r5, r5, MSR_VSX@h
+ oris r5,r5, MSR_VSX@h
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- or r5, r6, r5 /* Set MSR.FP+.VSX/.VEC */
- mtmsr r5
+ mtmsrd r5
#ifdef CONFIG_ALTIVEC
/*
@@ -367,28 +350,20 @@ _GLOBAL(__tm_recheckpoint)
* thread.fp_state[] version holds the 'live' (transactional)
* and will be loaded subsequently by any FPUnavailable trap.
*/
- andis. r0, r4, MSR_VEC@h
- beq dont_restore_vec
-
addi r8, r3, THREAD_CKVRSTATE
li r5, VRSTATE_VSCR
lvx v0, r8, r5
mtvscr v0
REST_32VRS(0, r5, r8) /* r5 scratch, r8 ptr */
-dont_restore_vec:
ld r5, THREAD_CKVRSAVE(r3)
mtspr SPRN_VRSAVE, r5
#endif
- andi. r0, r4, MSR_FP
- beq dont_restore_fp
-
addi r8, r3, THREAD_CKFPSTATE
lfd fr0, FPSTATE_FPSCR(r8)
MTFSF_L(fr0)
REST_32FPRS_VSRS(0, R4, R8)
-dont_restore_fp:
mtmsr r6 /* FP/Vec off again! */
restore_gprs: