diff options
34 files changed, 2294 insertions, 100 deletions
diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt new file mode 100644 index 000000000000..c907be41d60f --- /dev/null +++ b/Documentation/powerpc/transactional_memory.txt @@ -0,0 +1,175 @@ +Transactional Memory support +============================ + +POWER kernel support for this feature is currently limited to supporting +its use by user programs. It is not currently used by the kernel itself. + +This file aims to sum up how it is supported by Linux and what behaviour you +can expect from your user programs. + + +Basic overview +============== + +Hardware Transactional Memory is supported on POWER8 processors, and is a +feature that enables a different form of atomic memory access. Several new +instructions are presented to delimit transactions; transactions are +guaranteed to either complete atomically or roll back and undo any partial +changes. + +A simple transaction looks like this: + +begin_move_money: + tbegin + beq abort_handler + + ld r4, SAVINGS_ACCT(r3) + ld r5, CURRENT_ACCT(r3) + subi r5, r5, 1 + addi r4, r4, 1 + std r4, SAVINGS_ACCT(r3) + std r5, CURRENT_ACCT(r3) + + tend + + b continue + +abort_handler: + ... test for odd failures ... + + /* Retry the transaction if it failed because it conflicted with + * someone else: */ + b begin_move_money + + +The 'tbegin' instruction denotes the start point, and 'tend' the end point. +Between these points the processor is in 'Transactional' state; any memory +references will complete in one go if there are no conflicts with other +transactional or non-transactional accesses within the system. In this +example, the transaction completes as though it were normal straight-line code +IF no other processor has touched SAVINGS_ACCT(r3) or CURRENT_ACCT(r3); an +atomic move of money from the current account to the savings account has been +performed. Even though the normal ld/std instructions are used (note no +lwarx/stwcx), either *both* SAVINGS_ACCT(r3) and CURRENT_ACCT(r3) will be +updated, or neither will be updated. + +If, in the meantime, there is a conflict with the locations accessed by the +transaction, the transaction will be aborted by the CPU. Register and memory +state will roll back to that at the 'tbegin', and control will continue from +'tbegin+4'. The branch to abort_handler will be taken this second time; the +abort handler can check the cause of the failure, and retry. + +Checkpointed registers include all GPRs, FPRs, VRs/VSRs, LR, CCR/CR, CTR, FPCSR +and a few other status/flag regs; see the ISA for details. + +Causes of transaction aborts +============================ + +- Conflicts with cache lines used by other processors +- Signals +- Context switches +- See the ISA for full documentation of everything that will abort transactions. + + +Syscalls +======== + +Performing syscalls from within transaction is not recommended, and can lead +to unpredictable results. + +Syscalls do not by design abort transactions, but beware: The kernel code will +not be running in transactional state. The effect of syscalls will always +remain visible, but depending on the call they may abort your transaction as a +side-effect, read soon-to-be-aborted transactional data that should not remain +invisible, etc. If you constantly retry a transaction that constantly aborts +itself by calling a syscall, you'll have a livelock & make no progress. + +Simple syscalls (e.g. sigprocmask()) "could" be OK. Even things like write() +from, say, printf() should be OK as long as the kernel does not access any +memory that was accessed transactionally. + +Consider any syscalls that happen to work as debug-only -- not recommended for +production use. Best to queue them up till after the transaction is over. + + +Signals +======= + +Delivery of signals (both sync and async) during transactions provides a second +thread state (ucontext/mcontext) to represent the second transactional register +state. Signal delivery 'treclaim's to capture both register states, so signals +abort transactions. The usual ucontext_t passed to the signal handler +represents the checkpointed/original register state; the signal appears to have +arisen at 'tbegin+4'. + +If the sighandler ucontext has uc_link set, a second ucontext has been +delivered. For future compatibility the MSR.TS field should be checked to +determine the transactional state -- if so, the second ucontext in uc->uc_link +represents the active transactional registers at the point of the signal. + +For 64-bit processes, uc->uc_mcontext.regs->msr is a full 64-bit MSR and its TS +field shows the transactional mode. + +For 32-bit processes, the mcontext's MSR register is only 32 bits; the top 32 +bits are stored in the MSR of the second ucontext, i.e. in +uc->uc_link->uc_mcontext.regs->msr. The top word contains the transactional +state TS. + +However, basic signal handlers don't need to be aware of transactions +and simply returning from the handler will deal with things correctly: + +Transaction-aware signal handlers can read the transactional register state +from the second ucontext. This will be necessary for crash handlers to +determine, for example, the address of the instruction causing the SIGSEGV. + +Example signal handler: + + void crash_handler(int sig, siginfo_t *si, void *uc) + { + ucontext_t *ucp = uc; + ucontext_t *transactional_ucp = ucp->uc_link; + + if (ucp_link) { + u64 msr = ucp->uc_mcontext.regs->msr; + /* May have transactional ucontext! */ +#ifndef __powerpc64__ + msr |= ((u64)transactional_ucp->uc_mcontext.regs->msr) << 32; +#endif + if (MSR_TM_ACTIVE(msr)) { + /* Yes, we crashed during a transaction. Oops. */ + fprintf(stderr, "Transaction to be restarted at 0x%llx, but " + "crashy instruction was at 0x%llx\n", + ucp->uc_mcontext.regs->nip, + transactional_ucp->uc_mcontext.regs->nip); + } + } + + fix_the_problem(ucp->dar); + } + + +Failure cause codes used by kernel +================================== + +These are defined in <asm/reg.h>, and distinguish different reasons why the +kernel aborted a transaction: + + TM_CAUSE_RESCHED Thread was rescheduled. + TM_CAUSE_FAC_UNAV FP/VEC/VSX unavailable trap. + TM_CAUSE_SYSCALL Currently unused; future syscalls that must abort + transactions for consistency will use this. + TM_CAUSE_SIGNAL Signal delivered. + TM_CAUSE_MISC Currently unused. + +These can be checked by the user program's abort handler as TEXASR[0:7]. + + +GDB +=== + +GDB and ptrace are not currently TM-aware. If one stops during a transaction, +it looks like the transaction has just started (the checkpointed state is +presented). The transaction cannot then be continued and will take the failure +handler route. Furthermore, the transactional 2nd register state will be +inaccessible. GDB can currently be used on programs using TM, but not sensibly +in parts within transactions. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4b27edbdb799..85ff3a080360 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -313,6 +313,14 @@ config MATH_EMULATION unit, which will allow programs that use floating-point instructions to run. +config PPC_TRANSACTIONAL_MEM + bool "Transactional Memory support for POWERPC" + depends on PPC_BOOK3S_64 + depends on SMP + default n + ---help--- + Support user-mode Transactional Memory on POWERPC. + config 8XX_MINIMAL_FPEMU bool "Minimal math emulation for 8xx" depends on 8xx && !MATH_EMULATION diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 6cbdeb4290e2..aef3f71de5ad 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -49,6 +49,8 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_PMAC64=y CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m +CONFIG_PPC_TRANSACTIONAL_MEM=y +CONFIG_HOTPLUG_CPU=y CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTREMOVE=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index c2f4b4a86ece..7a5c15fcc7cf 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -6,6 +6,7 @@ CONFIG_NR_CPUS=2 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EMBEDDED=y @@ -24,12 +25,13 @@ CONFIG_PS3_DISK=y CONFIG_PS3_ROM=y CONFIG_PS3_FLASH=y CONFIG_PS3_VRAM=m +CONFIG_PS3_LPM=m # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set -CONFIG_HIGH_RES_TIMERS=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=y CONFIG_KEXEC=y # CONFIG_SPARSEMEM_VMEMMAP is not set +# CONFIG_COMPACTION is not set CONFIG_SCHED_SMT=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" @@ -59,6 +61,7 @@ CONFIG_BT_BNEP_PROTO_FILTER=y CONFIG_BT_HIDP=m CONFIG_BT_HCIBTUSB=m CONFIG_CFG80211=m +CONFIG_CFG80211_WEXT=y CONFIG_MAC80211=m CONFIG_MAC80211_RC_PID=y # CONFIG_MAC80211_RC_MINSTREL is not set @@ -78,7 +81,6 @@ CONFIG_MD=y CONFIG_BLK_DEV_DM=m CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_BROADCOM is not set -# CONFIG_NET_VENDOR_CHELSIO is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set @@ -119,21 +121,21 @@ CONFIG_SND=m # CONFIG_SND_DRIVERS is not set CONFIG_SND_USB_AUDIO=m CONFIG_HIDRAW=y -CONFIG_USB_HIDDEV=y CONFIG_HID_APPLE=m CONFIG_HID_BELKIN=m CONFIG_HID_CHERRY=m CONFIG_HID_EZKEY=m CONFIG_HID_TWINHAN=m CONFIG_HID_LOGITECH=m +CONFIG_HID_LOGITECH_DJ=m CONFIG_HID_MICROSOFT=m +CONFIG_HID_PS3REMOTE=m CONFIG_HID_SONY=m CONFIG_HID_SUNPLUS=m CONFIG_HID_SMARTJOYPLUS=m +CONFIG_USB_HIDDEV=y CONFIG_USB=m CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_SUSPEND=y CONFIG_USB_MON=m CONFIG_USB_EHCI_HCD=m @@ -158,8 +160,8 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y +CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_CIFS=m CONFIG_NLS=y @@ -176,6 +178,7 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_LIST=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_FTRACE is not set CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_CRYPTO_CCM=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 04fa7f2e295f..c4dfbaf8b192 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -43,6 +43,8 @@ CONFIG_RTAS_FLASH=m CONFIG_IBMEBUS=y CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m +CONFIG_PPC_TRANSACTIONAL_MEM=y +CONFIG_HOTPLUG_CPU=y CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTPLUG=y diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 5f1938fcce40..fb3245e928ea 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -220,6 +220,13 @@ extern const char *powerpc_base_platform; #define PPC_FEATURE_HAS_EFP_DOUBLE_COMP 0 #endif +/* We only set the TM feature if the kernel was compiled with TM supprt */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +#define CPU_FTR_TM_COMP CPU_FTR_TM +#else +#define CPU_FTR_TM_COMP 0 +#endif + /* We need to mark all pages as being coherent if we're SMP or we have a * 74[45]x and an MPC107 host bridge. Also 83xx and PowerQUICC II * require it for PCI "streaming/prefetch" to work properly. @@ -414,7 +421,8 @@ extern const char *powerpc_base_platform; CPU_FTR_DSCR | CPU_FTR_SAO | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ - CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | CPU_FTR_BCTAR) + CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | CPU_FTR_BCTAR | \ + CPU_FTR_TM_COMP) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 370298a0bca3..05e6d2ee1db9 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -50,7 +50,7 @@ #define EX_PPR 88 /* SMT thread status register (priority) */ #ifdef CONFIG_RELOCATABLE -#define EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ +#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ ld r12,PACAKBASE(r13); /* get high part of &label */ \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ LOAD_HANDLER(r12,label); \ @@ -61,13 +61,15 @@ blr; #else /* If not relocatable, we can jump directly -- and save messing with LR */ -#define EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ +#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ li r10,MSR_RI; \ mtmsrd r10,1; /* Set RI (EE=0) */ \ b label; #endif +#define EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ + __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ /* * As EXCEPTION_PROLOG_PSERIES(), except we've already got relocation on @@ -75,6 +77,7 @@ * case EXCEPTION_RELON_PROLOG_PSERIES_1 will be using lr. */ #define EXCEPTION_RELON_PROLOG_PSERIES(area, label, h, extra, vec) \ + EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) @@ -135,25 +138,32 @@ BEGIN_FTR_SECTION_NESTED(942) \ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,0,942) /*non P7*/ /* - * Save PPR in paca whenever some register is available to use. - * Then increase the priority. + * Get an SPR into a register if the CPU has the given feature */ -#define HMT_MEDIUM_PPR_SAVE(area, ra) \ +#define OPT_GET_SPR(ra, spr, ftr) \ BEGIN_FTR_SECTION_NESTED(943) \ - mfspr ra,SPRN_PPR; \ - std ra,area+EX_PPR(r13); \ - HMT_MEDIUM; \ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,943) + mfspr ra,spr; \ +END_FTR_SECTION_NESTED(ftr,ftr,943) -#define __EXCEPTION_PROLOG_1(area, extra, vec) \ +/* + * Save a register to the PACA if the CPU has the given feature + */ +#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \ +BEGIN_FTR_SECTION_NESTED(943) \ + std ra,offset(r13); \ +END_FTR_SECTION_NESTED(ftr,ftr,943) + +#define EXCEPTION_PROLOG_0(area) \ GET_PACA(r13); \ std r9,area+EX_R9(r13); /* save r9 */ \ - HMT_MEDIUM_PPR_SAVE(area, r9); \ + OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \ + HMT_MEDIUM; \ std r10,area+EX_R10(r13); /* save r10 - r12 */ \ - BEGIN_FTR_SECTION_NESTED(66); \ - mfspr r10,SPRN_CFAR; \ - std r10,area+EX_CFAR(r13); \ - END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \ + OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) + +#define __EXCEPTION_PROLOG_1(area, extra, vec) \ + OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ + OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ SAVE_LR(r10, area); \ mfcr r9; \ extra(vec); \ @@ -178,6 +188,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,943) __EXCEPTION_PROLOG_PSERIES_1(label, h) #define EXCEPTION_PROLOG_PSERIES(area, label, h, extra, vec) \ + EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ EXCEPTION_PROLOG_PSERIES_1(label, h); @@ -188,10 +199,14 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,943) #define __KVM_HANDLER(area, h, n) \ do_kvm_##n: \ + BEGIN_FTR_SECTION_NESTED(947) \ + ld r10,area+EX_CFAR(r13); \ + std r10,HSTATE_CFAR(r13); \ + END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947); \ ld r10,area+EX_R10(r13); \ - stw r9,HSTATE_SCRATCH1(r13); \ + stw r9,HSTATE_SCRATCH1(r13); \ ld r9,area+EX_R9(r13); \ - std r12,HSTATE_SCRATCH0(r13); \ + std r12,HSTATE_SCRATCH0(r13); \ li r12,n; \ b kvmppc_interrupt @@ -312,6 +327,13 @@ label##_pSeries: \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ EXC_STD, KVMTEST_PR, vec) +/* Version of above for when we have to branch out-of-line */ +#define STD_EXCEPTION_PSERIES_OOL(vec, label) \ + .globl label##_pSeries; \ +label##_pSeries: \ + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \ + EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_STD) + #define STD_EXCEPTION_HV(loc, vec, label) \ . = loc; \ .globl label##_hv; \ @@ -321,6 +343,13 @@ label##_hv: \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ EXC_HV, KVMTEST, vec) +/* Version of above for when we have to branch out-of-line */ +#define STD_EXCEPTION_HV_OOL(vec, label) \ + .globl label##_hv; \ +label##_hv: \ + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \ + EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV) + #define STD_RELON_EXCEPTION_PSERIES(loc, vec, label) \ . = loc; \ .globl label##_relon_pSeries; \ @@ -331,6 +360,12 @@ label##_relon_pSeries: \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ EXC_STD, KVMTEST_PR, vec) +#define STD_RELON_EXCEPTION_PSERIES_OOL(vec, label) \ + .globl label##_relon_pSeries; \ +label##_relon_pSeries: \ + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \ + EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_STD) + #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ . = loc; \ .globl label##_relon_hv; \ @@ -341,6 +376,12 @@ label##_relon_hv: \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ EXC_HV, KVMTEST, vec) +#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ + .globl label##_relon_hv; \ +label##_relon_hv: \ + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \ + EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_HV) + /* This associate vector numbers with bits in paca->irq_happened */ #define SOFTEN_VALUE_0x500 PACA_IRQ_EE #define SOFTEN_VALUE_0x502 PACA_IRQ_EE @@ -375,8 +416,10 @@ label##_relon_hv: \ #define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ HMT_MEDIUM_PPR_DISCARD; \ SET_SCRATCH0(r13); /* save r13 */ \ - __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ + EXCEPTION_PROLOG_0(PACA_EXGEN); \ + __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ EXCEPTION_PROLOG_PSERIES_1(label##_common, h); + #define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) @@ -394,9 +437,16 @@ label##_hv: \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ EXC_HV, SOFTEN_TEST_HV) +#define MASKABLE_EXCEPTION_HV_OOL(vec, label) \ + .globl label##_hv; \ +label##_hv: \ + EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ + EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV); + #define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ HMT_MEDIUM_PPR_DISCARD; \ SET_SCRATCH0(r13); /* save r13 */ \ + EXCEPTION_PROLOG_0(PACA_EXGEN); \ __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, h); #define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ @@ -416,6 +466,12 @@ label##_relon_hv: \ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ EXC_HV, SOFTEN_NOTEST_HV) +#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \ + .globl label##_relon_hv; \ +label##_relon_hv: \ + EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec); \ + EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV); + /* * Our exception common code can be passed various "additions" * to specify the behaviour of interrupts, whether to kick the diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 88609b23b775..cdc3d2717cc6 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -93,6 +93,9 @@ struct kvmppc_host_state { u64 host_dscr; u64 dec_expires; #endif +#ifdef CONFIG_PPC_BOOK3S_64 + u64 cfar; +#endif }; struct kvmppc_book3s_shadow_vcpu { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index ca9bf459db6a..03d7beae89a0 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -440,6 +440,7 @@ struct kvm_vcpu_arch { ulong uamor; u32 ctrl; ulong dabr; + ulong cfar; #endif u32 vrsave; /* also USPRG0 */ u32 mmucr; diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index c47d687ab01b..77c91e74b612 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -137,6 +137,9 @@ struct paca_struct { u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ u8 nap_state_lost; /* NV GPR values lost in power7_idle */ u64 sprg3; /* Saved user-visible sprg */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + u64 tm_scratch; /* TM scratch area for reclaim */ +#endif #ifdef CONFIG_PPC_POWERNV /* Pointer to OPAL machine check event structure set by the @@ -167,7 +170,6 @@ struct paca_struct { }; extern struct paca_struct *paca; -extern __initdata struct paca_struct boot_paca; extern void initialise_paca(struct paca_struct *new_paca, int cpu); extern void setup_paca(struct paca_struct *new_paca); extern void allocate_pacas(void); diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 0fd1928efb93..8752bc8e34a3 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -129,6 +129,9 @@ #define PPC_INST_TLBSRX_DOT 0x7c0006a5 #define PPC_INST_XXLOR 0xf0000510 #define PPC_INST_XVCPSGNDP 0xf0000780 +#define PPC_INST_TRECHKPT 0x7c0007dd +#define PPC_INST_TRECLAIM 0x7c00075d +#define PPC_INST_TABORT 0x7c00071d #define PPC_INST_NAP 0x4c000364 #define PPC_INST_SLEEP 0x4c0003a4 @@ -294,4 +297,11 @@ #define PPC_NAP stringify_in_c(.long PPC_INST_NAP) #define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP) +/* Transactional memory instructions */ +#define TRECHKPT stringify_in_c(.long PPC_INST_TRECHKPT) +#define TRECLAIM(r) stringify_in_c(.long PPC_INST_TRECLAIM \ + | __PPC_RA(r)) +#define TABORT(r) stringify_in_c(.long PPC_INST_TABORT \ + | __PPC_RA(r)) + #endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index c2d0e58aba31..54219cea9e88 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -123,6 +123,89 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #define REST_16VRS(n,b,base) REST_8VRS(n,b,base); REST_8VRS(n+8,b,base) #define REST_32VRS(n,b,base) REST_16VRS(n,b,base); REST_16VRS(n+16,b,base) +/* Save/restore FPRs, VRs and VSRs from their checkpointed backups in + * thread_struct: + */ +#define SAVE_FPR_TRANSACT(n, base) stfd n,THREAD_TRANSACT_FPR0+ \ + 8*TS_FPRWIDTH*(n)(base) +#define SAVE_2FPRS_TRANSACT(n, base) SAVE_FPR_TRANSACT(n, base); \ + SAVE_FPR_TRANSACT(n+1, base) +#define SAVE_4FPRS_TRANSACT(n, base) SAVE_2FPRS_TRANSACT(n, base); \ + SAVE_2FPRS_TRANSACT(n+2, base) +#define SAVE_8FPRS_TRANSACT(n, base) SAVE_4FPRS_TRANSACT(n, base); \ + SAVE_4FPRS_TRANSACT(n+4, base) +#define SAVE_16FPRS_TRANSACT(n, base) SAVE_8FPRS_TRANSACT(n, base); \ + SAVE_8FPRS_TRANSACT(n+8, base) +#define SAVE_32FPRS_TRANSACT(n, base) SAVE_16FPRS_TRANSACT(n, base); \ + SAVE_16FPRS_TRANSACT(n+16, base) + +#define REST_FPR_TRANSACT(n, base) lfd n,THREAD_TRANSACT_FPR0+ \ + 8*TS_FPRWIDTH*(n)(base) +#define REST_2FPRS_TRANSACT(n, base) REST_FPR_TRANSACT(n, base); \ + REST_FPR_TRANSACT(n+1, base) +#define REST_4FPRS_TRANSACT(n, base) REST_2FPRS_TRANSACT(n, base); \ + REST_2FPRS_TRANSACT(n+2, base) +#define REST_8FPRS_TRANSACT(n, base) REST_4FPRS_TRANSACT(n, base); \ + REST_4FPRS_TRANSACT(n+4, base) +#define REST_16FPRS_TRANSACT(n, base) REST_8FPRS_TRANSACT(n, base); \ + REST_8FPRS_TRANSACT(n+8, base) +#define REST_32FPRS_TRANSACT(n, base) REST_16FPRS_TRANSACT(n, base); \ + REST_16FPRS_TRANSACT(n+16, base) + + +#define SAVE_VR_TRANSACT(n,b,base) li b,THREAD_TRANSACT_VR0+(16*(n)); \ + stvx n,b,base +#define SAVE_2VRS_TRANSACT(n,b,base) SAVE_VR_TRANSACT(n,b,base); \ + SAVE_VR_TRANSACT(n+1,b,base) +#define SAVE_4VRS_TRANSACT(n,b,base) SAVE_2VRS_TRANSACT(n,b,base); \ + SAVE_2VRS_TRANSACT(n+2,b,base) +#define SAVE_8VRS_TRANSACT(n,b,base) SAVE_4VRS_TRANSACT(n,b,base); \ + SAVE_4VRS_TRANSACT(n+4,b,base) +#define SAVE_16VRS_TRANSACT(n,b,base) SAVE_8VRS_TRANSACT(n,b,base); \ + SAVE_8VRS_TRANSACT(n+8,b,base) +#define SAVE_32VRS_TRANSACT(n,b,base) SAVE_16VRS_TRANSACT(n,b,base); \ + SAVE_16VRS_TRANSACT(n+16,b,base) + +#define REST_VR_TRANSACT(n,b,base) li b,THREAD_TRANSACT_VR0+(16*(n)); \ + lvx n,b,base +#define REST_2VRS_TRANSACT(n,b,base) REST_VR_TRANSACT(n,b,base); \ + REST_VR_TRANSACT(n+1,b,base) +#define REST_4VRS_TRANSACT(n,b,base) REST_2VRS_TRANSACT(n,b,base); \ + REST_2VRS_TRANSACT(n+2,b,base) +#define REST_8VRS_TRANSACT(n,b,base) REST_4VRS_TRANSACT(n,b,base); \ + REST_4VRS_TRANSACT(n+4,b,base) +#define REST_16VRS_TRANSACT(n,b,base) REST_8VRS_TRANSACT(n,b,base); \ + REST_8VRS_TRANSACT(n+8,b,base) +#define REST_32VRS_TRANSACT(n,b,base) REST_16VRS_TRANSACT(n,b,base); \ + REST_16VRS_TRANSACT(n+16,b,base) + + +#define SAVE_VSR_TRANSACT(n,b,base) li b,THREAD_TRANSACT_VSR0+(16*(n)); \ + STXVD2X(n,R##base,R##b) +#define SAVE_2VSRS_TRANSACT(n,b,base) SAVE_VSR_TRANSACT(n,b,base); \ + SAVE_VSR_TRANSACT(n+1,b,base) +#define SAVE_4VSRS_TRANSACT(n,b,base) SAVE_2VSRS_TRANSACT(n,b,base); \ + SAVE_2VSRS_TRANSACT(n+2,b,base) +#define SAVE_8VSRS_TRANSACT(n,b,base) SAVE_4VSRS_TRANSACT(n,b,base); \ + SAVE_4VSRS_TRANSACT(n+4,b,base) +#define SAVE_16VSRS_TRANSACT(n,b,base) SAVE_8VSRS_TRANSACT(n,b,base); \ + SAVE_8VSRS_TRANSACT(n+8,b,base) +#define SAVE_32VSRS_TRANSACT(n,b,base) SAVE_16VSRS_TRANSACT(n,b,base); \ + SAVE_16VSRS_TRANSACT(n+16,b,base) + +#define REST_VSR_TRANSACT(n,b,base) li b,THREAD_TRANSACT_VSR0+(16*(n)); \ + LXVD2X(n,R##base,R##b) +#define REST_2VSRS_TRANSACT(n,b,base) REST_VSR_TRANSACT(n,b,base); \ + REST_VSR_TRANSACT(n+1,b,base) +#define REST_4VSRS_TRANSACT(n,b,base) REST_2VSRS_TRANSACT(n,b,base); \ + REST_2VSRS_TRANSACT(n+2,b,base) +#define REST_8VSRS_TRANSACT(n,b,base) REST_4VSRS_TRANSACT(n,b,base); \ + REST_4VSRS_TRANSACT(n+4,b,base) +#define REST_16VSRS_TRANSACT(n,b,base) REST_8VSRS_TRANSACT(n,b,base); \ + REST_8VSRS_TRANSACT(n+8,b,base) +#define REST_32VSRS_TRANSACT(n,b,base) REST_16VSRS_TRANSACT(n,b,base); \ + REST_16VSRS_TRANSACT(n+16,b,base) + /* Save the lower 32 VSRs in the thread VSR region */ #define SAVE_VSR(n,b,base) li b,THREAD_VSR0+(16*(n)); STXVD2X(n,R##base,R##b) #define SAVE_2VSRS(n,b,base) SAVE_VSR(n,b,base); SAVE_VSR(n+1,b,base) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 42ac53cebacd..7ff9eaa3ea6c 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -152,6 +152,7 @@ typedef struct { #define TS_FPROFFSET 0 #define TS_VSRLOWOFFSET 1 #define TS_FPR(i) fpr[i][TS_FPROFFSET] +#define TS_TRANS_FPR(i) transact_fpr[i][TS_FPROFFSET] struct thread_struct { unsigned long ksp; /* Kernel stack pointer */ @@ -246,6 +247,34 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + u64 tm_tfhar; /* Transaction fail handler addr */ + u64 tm_texasr; /* Transaction exception & summary */ + u64 tm_tfiar; /* Transaction fail instr address reg */ + unsigned long tm_orig_msr; /* Thread's MSR on ctx switch */ + struct pt_regs ckpt_regs; /* Checkpointed registers */ + + /* + * Transactional FP and VSX 0-31 register set. + * NOTE: the sense of these is the opposite of the integer ckpt_regs! + * + * When a transaction is active/signalled/scheduled etc., *regs is the + * most recent set of/speculated GPRs with ckpt_regs being the older + * checkpointed regs to which we roll back if transaction aborts. + * + * However, fpr[] is the checkpointed 'base state' of FP regs, and + * transact_fpr[] is the new set of transactional values. + * VRs work the same way. + */ + double transact_fpr[32][TS_FPRWIDTH]; + struct { + unsigned int pad; + unsigned int val; /* Floating point status */ + } transact_fpscr; + vector128 transact_vr[32] __attribute__((aligned(16))); + vector128 transact_vscr __attribute__((aligned(16))); + unsigned long transact_vrsave; +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index 0e15db4d703b..678a7c1d9cb8 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -245,7 +245,7 @@ enum lv1_result { static inline const char* ps3_result(int result) { -#if defined(DEBUG) +#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT) switch (result) { case LV1_SUCCESS: return "LV1_SUCCESS (0)"; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e09ac51961d7..7035e608f3fa 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -29,6 +29,10 @@ #define MSR_SF_LG 63 /* Enable 64 bit mode */ #define MSR_ISF_LG 61 /* Interrupt 64b mode valid on 630 */ #define MSR_HV_LG 60 /* Hypervisor state */ +#define MSR_TS_T_LG 34 /* Trans Mem state: Transactional */ +#define MSR_TS_S_LG 33 /* Trans Mem state: Suspended */ +#define MSR_TS_LG 33 /* Trans Mem state (2 bits) */ +#define MSR_TM_LG 32 /* Trans Mem Available */ #define MSR_VEC_LG 25 /* Enable AltiVec */ #define MSR_VSX_LG 23 /* Enable VSX */ #define MSR_POW_LG 18 /* Enable Power Management */ @@ -98,6 +102,26 @@ #define MSR_RI __MASK(MSR_RI_LG) /* Recoverable Exception */ #define MSR_LE __MASK(MSR_LE_LG) /* Little Endian */ +#define MSR_TM __MASK(MSR_TM_LG) /* Transactional Mem Available */ +#define MSR_TS_N 0 /* Non-transactional */ +#define MSR_TS_S __MASK(MSR_TS_S_LG) /* Transaction Suspended */ +#define MSR_TS_T __MASK(MSR_TS_T_LG) /* Transaction Transactional */ +#define MSR_TS_MASK (MSR_TS_T | MSR_TS_S) /* Transaction State bits */ +#define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */ +#define MSR_TM_TRANSACTIONAL(x) (((x) & MSR_TS_MASK) == MSR_TS_T) +#define MSR_TM_SUSPENDED(x) (((x) & MSR_TS_MASK) == MSR_TS_S) + +/* Reason codes describing kernel causes for transaction aborts. By + convention, bit0 is copied to TEXASR[56] (IBM bit 7) which is set if + the failure is persistent. +*/ +#define TM_CAUSE_RESCHED 0xfe +#define TM_CAUSE_TLBI 0xfc +#define TM_CAUSE_FAC_UNAV 0xfa +#define TM_CAUSE_SYSCALL 0xf9 /* Persistent */ +#define TM_CAUSE_MISC 0xf6 +#define TM_CAUSE_SIGNAL 0xf4 + #if defined(CONFIG_PPC_BOOK3S_64) #define MSR_64BIT MSR_SF @@ -193,6 +217,10 @@ #define SPRN_UAMOR 0x9d /* User Authority Mask Override Register */ #define SPRN_AMOR 0x15d /* Authority Mask Override Register */ #define SPRN_ACOP 0x1F /* Available Coprocessor Register */ +#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ +#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ +#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ +#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_CTRLF 0x088 #define SPRN_CTRLT 0x098 #define CTRL_CT 0xc0000000 /* current thread */ @@ -770,7 +798,7 @@ * HV mode in which case it is HSPRG0 * * 64-bit server: - * - SPRG0 unused (reserved for HV on Power4) + * - SPRG0 scratch for TM recheckpoint/reclaim (reserved for HV on Power4) * - SPRG2 scratch for exception vectors * - SPRG3 CPU and NUMA node for VDSO getcpu (user visible) * - HSPRG0 stores PACA in HV mode diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h new file mode 100644 index 000000000000..4b4449abf3f8 --- /dev/null +++ b/arch/powerpc/include/asm/tm.h @@ -0,0 +1,20 @@ +/* + * Transactional memory support routines to reclaim and recheckpoint + * transactional process state. + * + * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation. + */ + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +extern void do_load_up_transact_fpu(struct thread_struct *thread); +extern void do_load_up_transact_altivec(struct thread_struct *thread); +#endif + +extern void tm_enable(void); +extern void tm_reclaim(struct thread_struct *thread, + unsigned long orig_msr, uint8_t cause); +extern void tm_recheckpoint(struct thread_struct *thread, + unsigned long orig_msr); +extern void tm_abort(uint8_t cause); +extern void tm_save_sprs(struct thread_struct *thread); +extern void tm_restore_sprs(struct thread_struct *thread); diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b4f0e360e414..f960a7944553 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -121,6 +121,8 @@ ifneq ($(CONFIG_PPC_INDIRECT_IO),y) obj-y += iomap.o endif +obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o + obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC32) += $(obj32-y) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index beddba432518..781190367292 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -125,6 +125,30 @@ int main(void) #ifdef CONFIG_PPC_BOOK3S_64 DEFINE(THREAD_TAR, offsetof(struct thread_struct, tar)); #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch)); + DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar)); + DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr)); + DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar)); + DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs)); + DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct, + transact_vr[0])); + DEFINE(THREAD_TRANSACT_VSCR, offsetof(struct thread_struct, + transact_vscr)); + DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct, + transact_vrsave)); + DEFINE(THREAD_TRANSACT_FPR0, offsetof(struct thread_struct, + transact_fpr[0])); + DEFINE(THREAD_TRANSACT_FPSCR, offsetof(struct thread_struct, + transact_fpscr)); +#ifdef CONFIG_VSX + DEFINE(THREAD_TRANSACT_VSR0, offsetof(struct thread_struct, + transact_fpr[0])); +#endif + /* Local pt_regs on stack for Transactional Memory funcs. */ + DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD + + sizeof(struct pt_regs) + 16); +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); @@ -479,6 +503,7 @@ int main(void) DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid)); + DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); @@ -558,6 +583,10 @@ int main(void) DEFINE(IPI_PRIORITY, IPI_PRIORITY); #endif /* CONFIG_KVM_BOOK3S_64_HV */ +#ifdef CONFIG_PPC_BOOK3S_64 + HSTATE_FIELD(HSTATE_CFAR, cfar); +#endif /* CONFIG_PPC_BOOK3S_64 */ + #else /* CONFIG_PPC_BOOK3S */ DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 9ae8451bbc83..612ea130c5d2 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -785,6 +785,10 @@ fast_exception_return: andc r4,r4,r0 /* r0 contains MSR_RI here */ mtmsrd r4,1 +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* TM debug */ + std r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */ +#endif /* * r13 is our per cpu area, only restore it if we are returning to * userspace the value stored in the stack frame may belong to diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 7a1c87cbb890..a8a5361fb70c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -153,7 +153,10 @@ machine_check_pSeries_1: * some code path might still want to branch into the original * vector */ - b machine_check_pSeries + HMT_MEDIUM_PPR_DISCARD + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_0(PACA_EXMC) + b machine_check_pSeries_0 . = 0x300 .globl data_access_pSeries @@ -172,6 +175,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) data_access_slb_pSeries: HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR @@ -203,6 +207,7 @@ data_access_slb_pSeries: instruction_access_slb_pSeries: HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ @@ -284,16 +289,28 @@ system_call_pSeries: */ . = 0xe00 hv_exception_trampoline: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b h_data_storage_hv + . = 0xe20 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b h_instr_storage_hv + . = 0xe40 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b emulation_assist_hv - . = 0xe50 - b hmi_exception_hv + . = 0xe60 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b hmi_exception_hv + . = 0xe80 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b h_doorbell_hv /* We need to deal with the Altivec unavailable exception @@ -303,16 +320,27 @@ hv_exception_trampoline: */ performance_monitor_pSeries_1: . = 0xf00 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b performance_monitor_pSeries altivec_unavailable_pSeries_1: . = 0xf20 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b altivec_unavailable_pSeries vsx_unavailable_pSeries_1: . = 0xf40 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b vsx_unavailable_pSeries + . = 0xf60 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b tm_unavailable_pSeries + #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202) @@ -326,10 +354,7 @@ vsx_unavailable_pSeries_1: denorm_exception_hv: HMT_MEDIUM_PPR_DISCARD mtspr SPRN_SPRG_HSCRATCH0,r13 - mfspr r13,SPRN_SPRG_HPACA - std r9,PACA_EXGEN+EX_R9(r13) - HMT_MEDIUM_PPR_SAVE(PACA_EXGEN, r9) - std r10,PACA_EXGEN+EX_R10(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) std r11,PACA_EXGEN+EX_R11(r13) std r12,PACA_EXGEN+EX_R12(r13) mfspr r9,SPRN_SPRG_HSCRATCH0 @@ -372,8 +397,10 @@ machine_check_pSeries: machine_check_fwnmi: HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, - EXC_STD, KVMTEST, 0x200) + EXCEPTION_PROLOG_0(PACA_EXMC) +machine_check_pSeries_0: + EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200) + EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD) KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200) /* moved from 0x300 */ @@ -510,24 +537,26 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206) .align 7 /* moved from 0xe00 */ - STD_EXCEPTION_HV(., 0xe02, h_data_storage) + STD_EXCEPTION_HV_OOL(0xe02, h_data_storage) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02) - STD_EXCEPTION_HV(., 0xe22, h_instr_storage) + STD_EXCEPTION_HV_OOL(0xe22, h_instr_storage) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22) - STD_EXCEPTION_HV(., 0xe42, emulation_assist) + STD_EXCEPTION_HV_OOL(0xe42, emulation_assist) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42) - STD_EXCEPTION_HV(., 0xe62, hmi_exception) /* need to flush cache ? */ + STD_EXCEPTION_HV_OOL(0xe62, hmi_exception) /* need to flush cache ? */ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62) - MASKABLE_EXCEPTION_HV(., 0xe82, h_doorbell) + MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82) /* moved from 0xf00 */ - STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor) + STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00) - STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable) + STD_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) - STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable) + STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) + STD_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) /* * An interrupt came in while soft-disabled. We set paca->irq_happened, then: @@ -718,8 +747,8 @@ machine_check_common: . = 0x4380 .globl data_access_slb_relon_pSeries data_access_slb_relon_pSeries: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR @@ -743,8 +772,8 @@ data_access_slb_relon_pSeries: . = 0x4480 .globl instruction_access_slb_relon_pSeries instruction_access_slb_relon_pSeries: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ @@ -788,55 +817,64 @@ system_call_relon_pSeries: STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step) . = 0x4e00 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b h_data_storage_relon_hv . = 0x4e20 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b h_instr_storage_relon_hv . = 0x4e40 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b emulation_assist_relon_hv - . = 0x4e50 - b hmi_exception_relon_hv - . = 0x4e60 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b hmi_exception_relon_hv . = 0x4e80 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b h_doorbell_relon_hv performance_monitor_relon_pSeries_1: . = 0x4f00 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b performance_monitor_relon_pSeries altivec_unavailable_relon_pSeries_1: . = 0x4f20 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b altivec_unavailable_relon_pSeries vsx_unavailable_relon_pSeries_1: . = 0x4f40 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) b vsx_unavailable_relon_pSeries -#ifdef CONFIG_CBE_RAS - STD_RELON_EXCEPTION_HV(0x5200, 0x1202, cbe_system_error) -#endif /* CONFIG_CBE_RAS */ +tm_unavailable_relon_pSeries_1: + . = 0x4f60 + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b tm_unavailable_relon_pSeries + STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) #ifdef CONFIG_PPC_DENORMALISATION . = 0x5500 b denorm_exception_hv #endif -#ifdef CONFIG_CBE_RAS - STD_RELON_EXCEPTION_HV(0x5600, 0x1602, cbe_maintenance) -#else #ifdef CONFIG_HVC_SCOM STD_RELON_EXCEPTION_HV(0x5600, 0x1600, maintence_interrupt) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1600) #endif /* CONFIG_HVC_SCOM */ -#endif /* CONFIG_CBE_RAS */ STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist) -#ifdef CONFIG_CBE_RAS - STD_RELON_EXCEPTION_HV(0x5800, 0x1802, cbe_thermal) -#endif /* CONFIG_CBE_RAS */ /* Other future vectors */ .align 7 @@ -1138,9 +1176,26 @@ fp_unavailable_common: addi r3,r1,STACK_FRAME_OVERHEAD bl .kernel_fp_unavailable_exception BUG_OPCODE -1: bl .load_up_fpu +1: +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + /* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in + * transaction), go do TM stuff + */ + rldicl. r0, r12, (64-MSR_TS_LG), (64-2) + bne- 2f +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif + bl .load_up_fpu b fast_exception_return - +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +2: /* User process was in a transaction */ + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .fp_unavailable_tm + b .ret_from_except +#endif .align 7 .globl altivec_unavailable_common altivec_unavailable_common: @@ -1148,8 +1203,25 @@ altivec_unavailable_common: #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION beq 1f +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + BEGIN_FTR_SECTION_NESTED(69) + /* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in + * transaction), go do TM stuff + */ + rldicl. r0, r12, (64-MSR_TS_LG), (64-2) + bne- 2f + END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) +#endif bl .load_up_altivec b fast_exception_return +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +2: /* User process was in a transaction */ + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .altivec_unavailable_tm + b .ret_from_except +#endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif @@ -1166,7 +1238,24 @@ vsx_unavailable_common: #ifdef CONFIG_VSX BEGIN_FTR_SECTION beq 1f +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + BEGIN_FTR_SECTION_NESTED(69) + /* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in + * transaction), go do TM stuff + */ + rldicl. r0, r12, (64-MSR_TS_LG), (64-2) + bne- 2f + END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) +#endif b .load_up_vsx +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +2: /* User process was in a transaction */ + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .vsx_unavailable_tm + b .ret_from_except +#endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif @@ -1177,24 +1266,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) b .ret_from_except .align 7 + .globl tm_unavailable_common +tm_unavailable_common: + EXCEPTION_PROLOG_COMMON(0xf60, PACA_EXGEN) + bl .save_nvgprs + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .tm_unavailable_exception + b .ret_from_except + + .align 7 .globl __end_handlers __end_handlers: /* Equivalents to the above handlers for relocation-on interrupt vectors */ - STD_RELON_EXCEPTION_HV(., 0xe00, h_data_storage) + STD_RELON_EXCEPTION_HV_OOL(0xe00, h_data_storage) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe00) - STD_RELON_EXCEPTION_HV(., 0xe20, h_instr_storage) + STD_RELON_EXCEPTION_HV_OOL(0xe20, h_instr_storage) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe20) - STD_RELON_EXCEPTION_HV(., 0xe40, emulation_assist) + STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe40) - STD_RELON_EXCEPTION_HV(., 0xe60, hmi_exception) + STD_RELON_EXCEPTION_HV_OOL(0xe60, hmi_exception) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe60) - MASKABLE_RELON_EXCEPTION_HV(., 0xe80, h_doorbell) + MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe80) - STD_RELON_EXCEPTION_PSERIES(., 0xf00, performance_monitor) - STD_RELON_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable) - STD_RELON_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable) + STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) + STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) + STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) + STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, tm_unavailable) #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index e0ada05f2df3..caeaabf11a2f 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -35,6 +35,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: REST_32VSRS(n,c,base); \ 3: +#define __REST_32FPVSRS_TRANSACT(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_32FPRS_TRANSACT(n,base); \ + b 3f; \ +2: REST_32VSRS_TRANSACT(n,c,base); \ +3: + #define __SAVE_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ @@ -45,11 +54,68 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 3: #else #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) +#define __REST_32FPVSRS_TRANSACT(n,b,base) REST_32FPRS(n, base) #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) +#define REST_32FPVSRS_TRANSACT(n,c,base) \ + __REST_32FPVSRS_TRANSACT(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Wrapper to call load_up_fpu from C. + * void do_load_up_fpu(struct pt_regs *regs); + */ +_GLOBAL(do_load_up_fpu) + mflr r0 + std r0, 16(r1) + stdu r1, -112(r1) + + subi r6, r3, STACK_FRAME_OVERHEAD + /* load_up_fpu expects r12=MSR, r13=PACA, and returns + * with r12 = new MSR. + */ + ld r12,_MSR(r6) + GET_PACA(r13) + + bl load_up_fpu + std r12,_MSR(r6) + + ld r0, 112+16(r1) + addi r1, r1, 112 + mtlr r0 + blr + + +/* void do_load_up_transact_fpu(struct thread_struct *thread) + * + * This is similar to load_up_fpu but for the transactional version of the FP + * register set. It doesn't mess with the task MSR or valid flags. + * Furthermore, we don't do lazy FP with TM currently. + */ +_GLOBAL(do_load_up_transact_fpu) + mfmsr r6 + ori r5,r6,MSR_FP +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION + oris r5,r5,MSR_VSX@h +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + SYNC + MTMSRD(r5) + + lfd fr0,THREAD_TRANSACT_FPSCR(r3) + MTFSF_L(fr0) + REST_32FPVSRS_TRANSACT(0, R4, R3) + + /* FP/VSX off again */ + MTMSRD(r6) + SYNC + + blr +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + /* * This task wants to use the FPU now. * On UP, disable FP for the task which had the FPU previously, diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index cd6da855090c..f8f24685f10a 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -120,8 +120,6 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = { struct paca_struct *paca; EXPORT_SYMBOL(paca); -struct paca_struct boot_paca; - void __init initialise_paca(struct paca_struct *new_paca, int cpu) { /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 96e31de89b43..59dd545fdde1 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -50,6 +50,7 @@ #include <asm/runlatch.h> #include <asm/syscalls.h> #include <asm/switch_to.h> +#include <asm/tm.h> #include <asm/debug.h> #ifdef CONFIG_PPC64 #include <asm/firmware.h> @@ -57,6 +58,13 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> +/* Transactional Memory debug */ +#ifdef TM_DEBUG_SW +#define TM_DEBUG(x...) printk(KERN_INFO x) +#else +#define TM_DEBUG(x...) do { } while(0) +#endif + extern unsigned long _get_SP(void); #ifndef CONFIG_SMP @@ -460,6 +468,117 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a, return false; return true; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static inline void tm_reclaim_task(struct task_struct *tsk) +{ + /* We have to work out if we're switching from/to a task that's in the + * middle of a transaction. + * + * In switching we need to maintain a 2nd register state as + * oldtask->thread.ckpt_regs. We tm_reclaim(oldproc); this saves the + * checkpointed (tbegin) state in ckpt_regs and saves the transactional + * (current) FPRs into oldtask->thread.transact_fpr[]. + * + * We also context switch (save) TFHAR/TEXASR/TFIAR in here. + */ + struct thread_struct *thr = &tsk->thread; + + if (!thr->regs) + return; + + if (!MSR_TM_ACTIVE(thr->regs->msr)) + goto out_and_saveregs; + + /* Stash the original thread MSR, as giveup_fpu et al will + * modify it. We hold onto it to see whether the task used + * FP & vector regs. + */ + thr->tm_orig_msr = thr->regs->msr; + + TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, " + "ccr=%lx, msr=%lx, trap=%lx)\n", + tsk->pid, thr->regs->nip, + thr->regs->ccr, thr->regs->msr, + thr->regs->trap); + + tm_reclaim(thr, thr->regs->msr, TM_CAUSE_RESCHED); + + TM_DEBUG("--- tm_reclaim on pid %d complete\n", + tsk->pid); + +out_and_saveregs: + /* Always save the regs here, even if a transaction's not active. + * This context-switches a thread's TM info SPRs. We do it here to + * be consistent with the restore path (in recheckpoint) which + * cannot happen later in _switch(). + */ + tm_save_sprs(thr); +} + +static inline void tm_recheckpoint_new_task(struct task_struct *new) +{ + unsigned long msr; + + if (!cpu_has_feature(CPU_FTR_TM)) + return; + + /* Recheckpoint the registers of the thread we're about to switch to. + * + * If the task was using FP, we non-lazily reload both the original and + * the speculative FP register states. This is because the kernel + * doesn't see if/when a TM rollback occurs, so if we take an FP + * unavoidable later, we are unable to determine which set of FP regs + * need to be restored. + */ + if (!new->thread.regs) + return; + + /* The TM SPRs are restored here, so that TEXASR.FS can be set + * before the trecheckpoint and no explosion occurs. + */ + tm_restore_sprs(&new->thread); + + if (!MSR_TM_ACTIVE(new->thread.regs->msr)) + return; + msr = new->thread.tm_orig_msr; + /* Recheckpoint to restore original checkpointed register state. */ + TM_DEBUG("*** tm_recheckpoint of pid %d " + "(new->msr 0x%lx, new->origmsr 0x%lx)\n", + new->pid, new->thread.regs->msr, msr); + + /* This loads the checkpointed FP/VEC state, if used */ + tm_recheckpoint(&new->thread, msr); + + /* This loads the speculative FP/VEC state, if used */ + if (msr & MSR_FP) { + do_load_up_transact_fpu(&new->thread); + new->thread.regs->msr |= + (MSR_FP | new->thread.fpexc_mode); + } + if (msr & MSR_VEC) { + do_load_up_transact_altivec(&new->thread); + new->thread.regs->msr |= MSR_VEC; + } + /* We may as well turn on VSX too since all the state is restored now */ + if (msr & MSR_VSX) + new->thread.regs->msr |= MSR_VSX; + + TM_DEBUG("*** tm_recheckpoint of pid %d complete " + "(kernel msr 0x%lx)\n", + new->pid, mfmsr()); +} + +static inline void __switch_to_tm(struct task_struct *prev) +{ + if (cpu_has_feature(CPU_FTR_TM)) { + tm_enable(); + tm_reclaim_task(prev); + } +} +#else +#define tm_recheckpoint_new_task(new) +#define __switch_to_tm(prev) +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) @@ -471,6 +590,8 @@ struct task_struct *__switch_to(struct task_struct *prev, struct ppc64_tlb_batch *batch; #endif + __switch_to_tm(prev); + #ifdef CONFIG_SMP /* avoid complexity of lazy save/restore of fpu * by just saving it every time we switch out if @@ -586,6 +707,9 @@ struct task_struct *__switch_to(struct task_struct *prev, * of sync. Hard disable here. */ hard_irq_disable(); + + tm_recheckpoint_new_task(new); + last = _switch(old_thread, new_thread); #ifdef CONFIG_PPC_BOOK3S_64 @@ -747,6 +871,9 @@ void show_regs(struct pt_regs * regs) printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); #endif +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + printk("PACATMSCRATCH [%llx]\n", get_paca()->tm_scratch); +#endif show_stack(current, (unsigned long *) regs->gpr[1]); if (!user_mode(regs)) show_instructions(regs); @@ -958,7 +1085,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) regs->msr = MSR_USER32; } #endif - discard_lazy_cpu_state(); #ifdef CONFIG_VSX current->thread.used_vsr = 0; @@ -978,6 +1104,13 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.spefscr = 0; current->thread.used_spe = 0; #endif /* CONFIG_SPE */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (cpu_has_feature(CPU_FTR_TM)) + regs->msr |= MSR_TM; + current->thread.tm_tfhar = 0; + current->thread.tm_texasr = 0; + current->thread.tm_tfiar = 0; +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ } #define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6da881b35dac..75fbaceb5c87 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -156,6 +156,15 @@ early_param("smt-enabled", early_smt_enabled); #define check_smt_enabled() #endif /* CONFIG_SMP */ +/** Fix up paca fields required for the boot cpu */ +static void fixup_boot_paca(void) +{ + /* The boot cpu is started */ + get_paca()->cpu_start = 1; + /* Allow percpu accesses to work until we setup percpu data */ + get_paca()->data_offset = 0; +} + /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of @@ -177,6 +186,8 @@ early_param("smt-enabled", early_smt_enabled); void __init early_setup(unsigned long dt_ptr) { + static __initdata struct paca_struct boot_paca; + /* -------- printk is _NOT_ safe to use here ! ------- */ /* Identify CPU type */ @@ -185,6 +196,7 @@ void __init early_setup(unsigned long dt_ptr) /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ initialise_paca(&boot_paca, 0); setup_paca(&boot_paca); + fixup_boot_paca(); /* Initialize lockdep early or else spinlocks will blow */ lockdep_init(); @@ -205,11 +217,7 @@ void __init early_setup(unsigned long dt_ptr) /* Now we know the logical id of our boot cpu, setup the paca. */ setup_paca(&paca[boot_cpuid]); - - /* Fix up paca fields required for the boot cpu */ - get_paca()->cpu_start = 1; - /* Allow percpu accesses to "work" until we setup percpu data */ - get_paca()->data_offset = 0; + fixup_boot_paca(); /* Probe the machine type */ probe_machine(); diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index e00acb413934..ec84c901ceab 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -25,13 +25,21 @@ extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, extern unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task); +extern unsigned long copy_transact_fpr_to_user(void __user *to, + struct task_struct *task); extern unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from); +extern unsigned long copy_transact_fpr_from_user(struct task_struct *task, + void __user *from); #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, struct task_struct *task); +extern unsigned long copy_transact_vsx_to_user(void __user *to, + struct task_struct *task); extern unsigned long copy_vsx_from_user(struct task_struct *task, void __user *from); +extern unsigned long copy_transact_vsx_from_user(struct task_struct *task, + void __user *from); #endif #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 804e323c139d..e4a88d340de6 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -43,6 +43,7 @@ #include <asm/sigcontext.h> #include <asm/vdso.h> #include <asm/switch_to.h> +#include <asm/tm.h> #ifdef CONFIG_PPC64 #include "ppc32.h" #include <asm/unistd.h> @@ -293,6 +294,10 @@ long sys_sigaction(int sig, struct old_sigaction __user *act, struct sigframe { struct sigcontext sctx; /* the sigcontext */ struct mcontext mctx; /* all the register values */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + struct sigcontext sctx_transact; + struct mcontext mctx_transact; +#endif /* * Programs using the rs6000/xcoff abi can save up to 19 gp * regs and 18 fp regs below sp before decrementing it. @@ -321,6 +326,9 @@ struct rt_sigframe { struct siginfo info; #endif struct ucontext uc; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + struct ucontext uc_transact; +#endif /* * Programs using the rs6000/xcoff abi can save up to 19 gp * regs and 18 fp regs below sp before decrementing it. @@ -381,6 +389,61 @@ unsigned long copy_vsx_from_user(struct task_struct *task, task->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i]; return 0; } + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +unsigned long copy_transact_fpr_to_user(void __user *to, + struct task_struct *task) +{ + double buf[ELF_NFPREG]; + int i; + + /* save FPR copy to local buffer then write to the thread_struct */ + for (i = 0; i < (ELF_NFPREG - 1) ; i++) + buf[i] = task->thread.TS_TRANS_FPR(i); + memcpy(&buf[i], &task->thread.transact_fpscr, sizeof(double)); + return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double)); +} + +unsigned long copy_transact_fpr_from_user(struct task_struct *task, + void __user *from) +{ + double buf[ELF_NFPREG]; + int i; + + if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double))) + return 1; + for (i = 0; i < (ELF_NFPREG - 1) ; i++) + task->thread.TS_TRANS_FPR(i) = buf[i]; + memcpy(&task->thread.transact_fpscr, &buf[i], sizeof(double)); + + return 0; +} + +unsigned long copy_transact_vsx_to_user(void __user *to, + struct task_struct *task) +{ + double buf[ELF_NVSRHALFREG]; + int i; + + /* save FPR copy to local buffer then write to the thread_struct */ + for (i = 0; i < ELF_NVSRHALFREG; i++) + buf[i] = task->thread.transact_fpr[i][TS_VSRLOWOFFSET]; + return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double)); +} + +unsigned long copy_transact_vsx_from_user(struct task_struct *task, + void __user *from) +{ + double buf[ELF_NVSRHALFREG]; + int i; + + if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double))) + return 1; + for (i = 0; i < ELF_NVSRHALFREG ; i++) + task->thread.transact_fpr[i][TS_VSRLOWOFFSET] = buf[i]; + return 0; +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #else inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) @@ -395,6 +458,22 @@ inline unsigned long copy_fpr_from_user(struct task_struct *task, return __copy_from_user(task->thread.fpr, from, ELF_NFPREG * sizeof(double)); } + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +inline unsigned long copy_transact_fpr_to_user(void __user *to, + struct task_struct *task) +{ + return __copy_to_user(to, task->thread.transact_fpr, + ELF_NFPREG * sizeof(double)); +} + +inline unsigned long copy_transact_fpr_from_user(struct task_struct *task, + void __user *from) +{ + return __copy_from_user(task->thread.transact_fpr, from, + ELF_NFPREG * sizeof(double)); +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #endif /* @@ -483,6 +562,156 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, return 0; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Save the current user registers on the user stack. + * We only save the altivec/spe registers if the process has used + * altivec/spe instructions at some point. + * We also save the transactional registers to a second ucontext in the + * frame. + * + * See save_user_regs() and signal_64.c:setup_tm_sigcontexts(). + */ +static int save_tm_user_regs(struct pt_regs *regs, + struct mcontext __user *frame, + struct mcontext __user *tm_frame, int sigret) +{ + unsigned long msr = regs->msr; + + /* tm_reclaim rolls back all reg states, updating thread.ckpt_regs, + * thread.transact_fpr[], thread.transact_vr[], etc. + */ + tm_enable(); + tm_reclaim(¤t->thread, msr, TM_CAUSE_SIGNAL); + + /* Make sure floating point registers are stored in regs */ + flush_fp_to_thread(current); + + /* Save both sets of general registers */ + if (save_general_regs(¤t->thread.ckpt_regs, frame) + || save_general_regs(regs, tm_frame)) + return 1; + + /* Stash the top half of the 64bit MSR into the 32bit MSR word + * of the transactional mcontext. This way we have a backward-compatible + * MSR in the 'normal' (checkpointed) mcontext and additionally one can + * also look at what type of transaction (T or S) was active at the + * time of the signal. + */ + if (__put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR])) + return 1; + +#ifdef CONFIG_ALTIVEC + /* save altivec registers */ + if (current->thread.used_vr) { + flush_altivec_to_thread(current); + if (__copy_to_user(&frame->mc_vregs, current->thread.vr, + ELF_NVRREG * sizeof(vector128))) + return 1; + if (msr & MSR_VEC) { + if (__copy_to_user(&tm_frame->mc_vregs, + current->thread.transact_vr, + ELF_NVRREG * sizeof(vector128))) + return 1; + } else { + if (__copy_to_user(&tm_frame->mc_vregs, + current->thread.vr, + ELF_NVRREG * sizeof(vector128))) + return 1; + } + + /* set MSR_VEC in the saved MSR value to indicate that + * frame->mc_vregs contains valid data + */ + msr |= MSR_VEC; + } + + /* We always copy to/from vrsave, it's 0 if we don't have or don't + * use altivec. Since VSCR only contains 32 bits saved in the least + * significant bits of a vector, we "cheat" and stuff VRSAVE in the + * most significant bits of that same vector. --BenH + */ + if (__put_user(current->thread.vrsave, + (u32 __user *)&frame->mc_vregs[32])) + return 1; + if (msr & MSR_VEC) { + if (__put_user(current->thread.transact_vrsave, + (u32 __user *)&tm_frame->mc_vregs[32])) + return 1; + } else { + if (__put_user(current->thread.vrsave, + (u32 __user *)&tm_frame->mc_vregs[32])) + return 1; + } +#endif /* CONFIG_ALTIVEC */ + + if (copy_fpr_to_user(&frame->mc_fregs, current)) + return 1; + if (msr & MSR_FP) { + if (copy_transact_fpr_to_user(&tm_frame->mc_fregs, current)) + return 1; + } else { + if (copy_fpr_to_user(&tm_frame->mc_fregs, current)) + return 1; + } + +#ifdef CONFIG_VSX + /* + * Copy VSR 0-31 upper half from thread_struct to local + * buffer, then write that to userspace. Also set MSR_VSX in + * the saved MSR value to indicate that frame->mc_vregs + * contains valid data + */ + if (current->thread.used_vsr) { + __giveup_vsx(current); + if (copy_vsx_to_user(&frame->mc_vsregs, current)) + return 1; + if (msr & MSR_VSX) { + if (copy_transact_vsx_to_user(&tm_frame->mc_vsregs, + current)) + return 1; + } else { + if (copy_vsx_to_user(&tm_frame->mc_vsregs, current)) + return 1; + } + + msr |= MSR_VSX; + } +#endif /* CONFIG_VSX */ +#ifdef CONFIG_SPE + /* SPE regs are not checkpointed with TM, so this section is + * simply the same as in save_user_regs(). + */ + if (current->thread.used_spe) { + flush_spe_to_thread(current); + if (__copy_to_user(&frame->mc_vregs, current->thread.evr, + ELF_NEVRREG * sizeof(u32))) + return 1; + /* set MSR_SPE in the saved MSR value to indicate that + * frame->mc_vregs contains valid data */ + msr |= MSR_SPE; + } + + /* We always copy to/from spefscr */ + if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG)) + return 1; +#endif /* CONFIG_SPE */ + + if (__put_user(msr, &frame->mc_gregs[PT_MSR])) + return 1; + if (sigret) { + /* Set up the sigreturn trampoline: li r0,sigret; sc */ + if (__put_user(0x38000000UL + sigret, &frame->tramp[0]) + || __put_user(0x44000002UL, &frame->tramp[1])) + return 1; + flush_icache_range((unsigned long) &frame->tramp[0], + (unsigned long) &frame->tramp[2]); + } + + return 0; +} +#endif + /* * Restore the current user register values from the user stack, * (except for MSR). @@ -588,6 +817,139 @@ static long restore_user_regs(struct pt_regs *regs, return 0; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Restore the current user register values from the user stack, except for + * MSR, and recheckpoint the original checkpointed register state for processes + * in transactions. + */ +static long restore_tm_user_regs(struct pt_regs *regs, + struct mcontext __user *sr, + struct mcontext __user *tm_sr) +{ + long err; + unsigned long msr; +#ifdef CONFIG_VSX + int i; +#endif + + /* + * restore general registers but not including MSR or SOFTE. Also + * take care of keeping r2 (TLS) intact if not a signal. + * See comment in signal_64.c:restore_tm_sigcontexts(); + * TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR + * were set by the signal delivery. + */ + err = restore_general_regs(regs, tm_sr); + err |= restore_general_regs(¤t->thread.ckpt_regs, sr); + + err |= __get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP]); + + err |= __get_user(msr, &sr->mc_gregs[PT_MSR]); + if (err) + return 1; + + /* Restore the previous little-endian mode */ + regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + + /* + * Do this before updating the thread state in + * current->thread.fpr/vr/evr. That way, if we get preempted + * and another task grabs the FPU/Altivec/SPE, it won't be + * tempted to save the current CPU state into the thread_struct + * and corrupt what we are writing there. + */ + discard_lazy_cpu_state(); + +#ifdef CONFIG_ALTIVEC + regs->msr &= ~MSR_VEC; + if (msr & MSR_VEC) { + /* restore altivec registers from the stack */ + if (__copy_from_user(current->thread.vr, &sr->mc_vregs, + sizeof(sr->mc_vregs)) || + __copy_from_user(current->thread.transact_vr, + &tm_sr->mc_vregs, + sizeof(sr->mc_vregs))) + return 1; + } else if (current->thread.used_vr) { + memset(current->thread.vr, 0, ELF_NVRREG * sizeof(vector128)); + memset(current->thread.transact_vr, 0, + ELF_NVRREG * sizeof(vector128)); + } + + /* Always get VRSAVE back */ + if (__get_user(current->thread.vrsave, + (u32 __user *)&sr->mc_vregs[32]) || + __get_user(current->thread.transact_vrsave, + (u32 __user *)&tm_sr->mc_vregs[32])) + return 1; +#endif /* CONFIG_ALTIVEC */ + + regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); + + if (copy_fpr_from_user(current, &sr->mc_fregs) || + copy_transact_fpr_from_user(current, &tm_sr->mc_fregs)) + return 1; + +#ifdef CONFIG_VSX + regs->msr &= ~MSR_VSX; + if (msr & MSR_VSX) { + /* + * Restore altivec registers from the stack to a local + * buffer, then write this out to the thread_struct + */ + if (copy_vsx_from_user(current, &sr->mc_vsregs) || + copy_transact_vsx_from_user(current, &tm_sr->mc_vsregs)) + return 1; + } else if (current->thread.used_vsr) + for (i = 0; i < 32 ; i++) { + current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; + current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0; + } +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_SPE + /* SPE regs are not checkpointed with TM, so this section is + * simply the same as in restore_user_regs(). + */ + regs->msr &= ~MSR_SPE; + if (msr & MSR_SPE) { + if (__copy_from_user(current->thread.evr, &sr->mc_vregs, + ELF_NEVRREG * sizeof(u32))) + return 1; + } else if (current->thread.used_spe) + memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); + + /* Always get SPEFSCR back */ + if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + + ELF_NEVRREG)) + return 1; +#endif /* CONFIG_SPE */ + + /* Now, recheckpoint. This loads up all of the checkpointed (older) + * registers, including FP and V[S]Rs. After recheckpointing, the + * transactional versions should be loaded. + */ + tm_enable(); + /* This loads the checkpointed FP/VEC state, if used */ + tm_recheckpoint(¤t->thread, msr); + /* The task has moved into TM state S, so ensure MSR reflects this */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S; + + /* This loads the speculative FP/VEC state, if used */ + if (msr & MSR_FP) { + do_load_up_transact_fpu(¤t->thread); + regs->msr |= (MSR_FP | current->thread.fpexc_mode); + } + if (msr & MSR_VEC) { + do_load_up_transact_altivec(¤t->thread); + regs->msr |= MSR_VEC; + } + + return 0; +} +#endif + #ifdef CONFIG_PPC64 long compat_sys_rt_sigaction(int sig, const struct sigaction32 __user *act, struct sigaction32 __user *oact, size_t sigsetsize) @@ -827,6 +1189,8 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, struct mcontext __user *frame; void __user *addr; unsigned long newsp = 0; + int sigret; + unsigned long tramp; /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ @@ -838,7 +1202,6 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* Put the siginfo & fill in most of the ucontext */ if (copy_siginfo_to_user(&rt_sf->info, info) || __put_user(0, &rt_sf->uc.uc_flags) - || __put_user(0, &rt_sf->uc.uc_link) || __put_user(current->sas_ss_sp, &rt_sf->uc.uc_stack.ss_sp) || __put_user(sas_ss_flags(regs->gpr[1]), &rt_sf->uc.uc_stack.ss_flags) @@ -852,14 +1215,37 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, frame = &rt_sf->uc.uc_mcontext; addr = frame; if (vdso32_rt_sigtramp && current->mm->context.vdso_base) { - if (save_user_regs(regs, frame, 0, 1)) - goto badframe; - regs->link = current->mm->context.vdso_base + vdso32_rt_sigtramp; + sigret = 0; + tramp = current->mm->context.vdso_base + vdso32_rt_sigtramp; } else { - if (save_user_regs(regs, frame, __NR_rt_sigreturn, 1)) + sigret = __NR_rt_sigreturn; + tramp = (unsigned long) frame->tramp; + } + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) { + if (save_tm_user_regs(regs, &rt_sf->uc.uc_mcontext, + &rt_sf->uc_transact.uc_mcontext, sigret)) goto badframe; - regs->link = (unsigned long) frame->tramp; } + else +#endif + if (save_user_regs(regs, frame, sigret, 1)) + goto badframe; + regs->link = tramp; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) { + if (__put_user((unsigned long)&rt_sf->uc_transact, + &rt_sf->uc.uc_link) + || __put_user(to_user_ptr(&rt_sf->uc_transact.uc_mcontext), + &rt_sf->uc_transact.uc_regs)) + goto badframe; + } + else +#endif + if (__put_user(0, &rt_sf->uc.uc_link)) + goto badframe; current->thread.fpscr.val = 0; /* turn off all fp exceptions */ @@ -878,6 +1264,13 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, regs->nip = (unsigned long) ka->sa.sa_handler; /* enter the signal handler in big-endian mode */ regs->msr &= ~MSR_LE; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Remove TM bits from thread's MSR. The MSR in the sigcontext + * just indicates to userland that we were doing a transaction, but we + * don't want to return in transactional state: + */ + regs->msr &= ~MSR_TS_MASK; +#endif return 1; badframe: @@ -925,6 +1318,35 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int return 0; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static int do_setcontext_tm(struct ucontext __user *ucp, + struct ucontext __user *tm_ucp, + struct pt_regs *regs) +{ + sigset_t set; + struct mcontext __user *mcp; + struct mcontext __user *tm_mcp; + u32 cmcp; + u32 tm_cmcp; + + if (get_sigset_t(&set, &ucp->uc_sigmask)) + return -EFAULT; + + if (__get_user(cmcp, &ucp->uc_regs) || + __get_user(tm_cmcp, &tm_ucp->uc_regs)) + return -EFAULT; + mcp = (struct mcontext __user *)(u64)cmcp; + tm_mcp = (struct mcontext __user *)(u64)tm_cmcp; + /* no need to check access_ok(mcp), since mcp < 4GB */ + + set_current_blocked(&set); + if (restore_tm_user_regs(regs, mcp, tm_mcp)) + return -EFAULT; + + return 0; +} +#endif + long sys_swapcontext(struct ucontext __user *old_ctx, struct ucontext __user *new_ctx, int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) @@ -1020,7 +1442,12 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, struct pt_regs *regs) { struct rt_sigframe __user *rt_sf; - +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + struct ucontext __user *uc_transact; + unsigned long msr_hi; + unsigned long tmp; + int tm_restore = 0; +#endif /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; @@ -1028,6 +1455,34 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, (regs->gpr[1] + __SIGNAL_FRAMESIZE + 16); if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf))) goto bad; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (__get_user(tmp, &rt_sf->uc.uc_link)) + goto bad; + uc_transact = (struct ucontext __user *)(uintptr_t)tmp; + if (uc_transact) { + u32 cmcp; + struct mcontext __user *mcp; + + if (__get_user(cmcp, &uc_transact->uc_regs)) + return -EFAULT; + mcp = (struct mcontext __user *)(u64)cmcp; + /* The top 32 bits of the MSR are stashed in the transactional + * ucontext. */ + if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR])) + goto bad; + + if (MSR_TM_SUSPENDED(msr_hi<<32)) { + /* We only recheckpoint on return if we're + * transaction. + */ + tm_restore = 1; + if (do_setcontext_tm(&rt_sf->uc, uc_transact, regs)) + goto bad; + } + } + if (!tm_restore) + /* Fall through, for non-TM restore */ +#endif if (do_setcontext(&rt_sf->uc, regs, 1)) goto bad; @@ -1179,6 +1634,8 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, struct sigcontext __user *sc; struct sigframe __user *frame; unsigned long newsp = 0; + int sigret; + unsigned long tramp; /* Set up Signal Frame */ frame = get_sigframe(ka, regs, sizeof(*frame), 1); @@ -1201,14 +1658,25 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, goto badframe; if (vdso32_sigtramp && current->mm->context.vdso_base) { - if (save_user_regs(regs, &frame->mctx, 0, 1)) - goto badframe; - regs->link = current->mm->context.vdso_base + vdso32_sigtramp; + sigret = 0; + tramp = current->mm->context.vdso_base + vdso32_sigtramp; } else { - if (save_user_regs(regs, &frame->mctx, __NR_sigreturn, 1)) + sigret = __NR_sigreturn; + tramp = (unsigned long) frame->mctx.tramp; + } + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) { + if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, + sigret)) goto badframe; - regs->link = (unsigned long) frame->mctx.tramp; } + else +#endif + if (save_user_regs(regs, &frame->mctx, sigret, 1)) + goto badframe; + + regs->link = tramp; current->thread.fpscr.val = 0; /* turn off all fp exceptions */ @@ -1223,7 +1691,13 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, regs->nip = (unsigned long) ka->sa.sa_handler; /* enter the signal handler in big-endian mode */ regs->msr &= ~MSR_LE; - +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Remove TM bits from thread's MSR. The MSR in the sigcontext + * just indicates to userland that we were doing a transaction, but we + * don't want to return in transactional state: + */ + regs->msr &= ~MSR_TS_MASK; +#endif return 1; badframe: diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 1ca045d44324..7a76ee48a952 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -34,6 +34,7 @@ #include <asm/syscalls.h> #include <asm/vdso.h> #include <asm/switch_to.h> +#include <asm/tm.h> #include "signal.h" @@ -56,6 +57,9 @@ struct rt_sigframe { /* sys_rt_sigreturn requires the ucontext be the first field */ struct ucontext uc; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + struct ucontext uc_transact; +#endif unsigned long _unused[2]; unsigned int tramp[TRAMP_SIZE]; struct siginfo __user *pinfo; @@ -145,6 +149,145 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, return err; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * As above, but Transactional Memory is in use, so deliver sigcontexts + * containing checkpointed and transactional register states. + * + * To do this, we treclaim to gather both sets of registers and set up the + * 'normal' sigcontext registers with rolled-back register values such that a + * simple signal handler sees a correct checkpointed register state. + * If interested, a TM-aware sighandler can examine the transactional registers + * in the 2nd sigcontext to determine the real origin of the signal. + */ +static long setup_tm_sigcontexts(struct sigcontext __user *sc, + struct sigcontext __user *tm_sc, + struct pt_regs *regs, + int signr, sigset_t *set, unsigned long handler) +{ + /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the + * process never used altivec yet (MSR_VEC is zero in pt_regs of + * the context). This is very important because we must ensure we + * don't lose the VRSAVE content that may have been set prior to + * the process doing its first vector operation + * Userland shall check AT_HWCAP to know wether it can rely on the + * v_regs pointer or not. + */ +#ifdef CONFIG_ALTIVEC + elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *) + (((unsigned long)sc->vmx_reserve + 15) & ~0xful); + elf_vrreg_t __user *tm_v_regs = (elf_vrreg_t __user *) + (((unsigned long)tm_sc->vmx_reserve + 15) & ~0xful); +#endif + unsigned long msr = regs->msr; + long err = 0; + + BUG_ON(!MSR_TM_ACTIVE(regs->msr)); + + /* tm_reclaim rolls back all reg states, saving checkpointed (older) + * GPRs to thread.ckpt_regs and (if used) FPRs to (newer) + * thread.transact_fp and/or VRs to (newer) thread.transact_vr. + * THEN we save out FP/VRs, if necessary, to the checkpointed (older) + * thread.fr[]/vr[]s. The transactional (newer) GPRs are on the + * stack, in *regs. + */ + tm_enable(); + tm_reclaim(¤t->thread, msr, TM_CAUSE_SIGNAL); + + flush_fp_to_thread(current); + +#ifdef CONFIG_ALTIVEC + err |= __put_user(v_regs, &sc->v_regs); + err |= __put_user(tm_v_regs, &tm_sc->v_regs); + + /* save altivec registers */ + if (current->thread.used_vr) { + flush_altivec_to_thread(current); + /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ + err |= __copy_to_user(v_regs, current->thread.vr, + 33 * sizeof(vector128)); + /* If VEC was enabled there are transactional VRs valid too, + * else they're a copy of the checkpointed VRs. + */ + if (msr & MSR_VEC) + err |= __copy_to_user(tm_v_regs, + current->thread.transact_vr, + 33 * sizeof(vector128)); + else + err |= __copy_to_user(tm_v_regs, + current->thread.vr, + 33 * sizeof(vector128)); + + /* set MSR_VEC in the MSR value in the frame to indicate + * that sc->v_reg contains valid data. + */ + msr |= MSR_VEC; + } + /* We always copy to/from vrsave, it's 0 if we don't have or don't + * use altivec. + */ + err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); + if (msr & MSR_VEC) + err |= __put_user(current->thread.transact_vrsave, + (u32 __user *)&tm_v_regs[33]); + else + err |= __put_user(current->thread.vrsave, + (u32 __user *)&tm_v_regs[33]); + +#else /* CONFIG_ALTIVEC */ + err |= __put_user(0, &sc->v_regs); + err |= __put_user(0, &tm_sc->v_regs); +#endif /* CONFIG_ALTIVEC */ + + /* copy fpr regs and fpscr */ + err |= copy_fpr_to_user(&sc->fp_regs, current); + if (msr & MSR_FP) + err |= copy_transact_fpr_to_user(&tm_sc->fp_regs, current); + else + err |= copy_fpr_to_user(&tm_sc->fp_regs, current); + +#ifdef CONFIG_VSX + /* + * Copy VSX low doubleword to local buffer for formatting, + * then out to userspace. Update v_regs to point after the + * VMX data. + */ + if (current->thread.used_vsr) { + __giveup_vsx(current); + v_regs += ELF_NVRREG; + tm_v_regs += ELF_NVRREG; + + err |= copy_vsx_to_user(v_regs, current); + + if (msr & MSR_VSX) + err |= copy_transact_vsx_to_user(tm_v_regs, current); + else + err |= copy_vsx_to_user(tm_v_regs, current); + + /* set MSR_VSX in the MSR value in the frame to + * indicate that sc->vs_reg) contains valid data. + */ + msr |= MSR_VSX; + } +#endif /* CONFIG_VSX */ + + err |= __put_user(&sc->gp_regs, &sc->regs); + err |= __put_user(&tm_sc->gp_regs, &tm_sc->regs); + WARN_ON(!FULL_REGS(regs)); + err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE); + err |= __copy_to_user(&sc->gp_regs, + ¤t->thread.ckpt_regs, GP_REGS_SIZE); + err |= __put_user(msr, &tm_sc->gp_regs[PT_MSR]); + err |= __put_user(msr, &sc->gp_regs[PT_MSR]); + err |= __put_user(signr, &sc->signal); + err |= __put_user(handler, &sc->handler); + if (set != NULL) + err |= __put_user(set->sig[0], &sc->oldmask); + + return err; +} +#endif + /* * Restore the sigcontext from the signal frame. */ @@ -241,6 +384,153 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, return err; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Restore the two sigcontexts from the frame of a transactional processes. + */ + +static long restore_tm_sigcontexts(struct pt_regs *regs, + struct sigcontext __user *sc, + struct sigcontext __user *tm_sc) +{ +#ifdef CONFIG_ALTIVEC + elf_vrreg_t __user *v_regs, *tm_v_regs; +#endif + unsigned long err = 0; + unsigned long msr; +#ifdef CONFIG_VSX + int i; +#endif + /* copy the GPRs */ + err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr)); + err |= __copy_from_user(¤t->thread.ckpt_regs, sc->gp_regs, + sizeof(regs->gpr)); + + /* + * TFHAR is restored from the checkpointed 'wound-back' ucontext's NIP. + * TEXASR was set by the signal delivery reclaim, as was TFIAR. + * Users doing anything abhorrent like thread-switching w/ signals for + * TM-Suspended code will have to back TEXASR/TFIAR up themselves. + * For the case of getting a signal and simply returning from it, + * we don't need to re-copy them here. + */ + err |= __get_user(regs->nip, &tm_sc->gp_regs[PT_NIP]); + err |= __get_user(current->thread.tm_tfhar, &sc->gp_regs[PT_NIP]); + + /* get MSR separately, transfer the LE bit if doing signal return */ + err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + + /* The following non-GPR non-FPR non-VR state is also checkpointed: */ + err |= __get_user(regs->ctr, &tm_sc->gp_regs[PT_CTR]); + err |= __get_user(regs->link, &tm_sc->gp_regs[PT_LNK]); + err |= __get_user(regs->xer, &tm_sc->gp_regs[PT_XER]); + err |= __get_user(regs->ccr, &tm_sc->gp_regs[PT_CCR]); + err |= __get_user(current->thread.ckpt_regs.ctr, + &sc->gp_regs[PT_CTR]); + err |= __get_user(current->thread.ckpt_regs.link, + &sc->gp_regs[PT_LNK]); + err |= __get_user(current->thread.ckpt_regs.xer, + &sc->gp_regs[PT_XER]); + err |= __get_user(current->thread.ckpt_regs.ccr, + &sc->gp_regs[PT_CCR]); + + /* These regs are not checkpointed; they can go in 'regs'. */ + err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]); + err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); + err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); + err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); + + /* + * Do this before updating the thread state in + * current->thread.fpr/vr. That way, if we get preempted + * and another task grabs the FPU/Altivec, it won't be + * tempted to save the current CPU state into the thread_struct + * and corrupt what we are writing there. + */ + discard_lazy_cpu_state(); + + /* + * Force reload of FP/VEC. + * This has to be done before copying stuff into current->thread.fpr/vr + * for the reasons explained in the previous comment. + */ + regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); + +#ifdef CONFIG_ALTIVEC + err |= __get_user(v_regs, &sc->v_regs); + err |= __get_user(tm_v_regs, &tm_sc->v_regs); + if (err) + return err; + if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128))) + return -EFAULT; + if (tm_v_regs && !access_ok(VERIFY_READ, + tm_v_regs, 34 * sizeof(vector128))) + return -EFAULT; + /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ + if (v_regs != 0 && tm_v_regs != 0 && (msr & MSR_VEC) != 0) { + err |= __copy_from_user(current->thread.vr, v_regs, + 33 * sizeof(vector128)); + err |= __copy_from_user(current->thread.transact_vr, tm_v_regs, + 33 * sizeof(vector128)); + } + else if (current->thread.used_vr) { + memset(current->thread.vr, 0, 33 * sizeof(vector128)); + memset(current->thread.transact_vr, 0, 33 * sizeof(vector128)); + } + /* Always get VRSAVE back */ + if (v_regs != 0 && tm_v_regs != 0) { + err |= __get_user(current->thread.vrsave, + (u32 __user *)&v_regs[33]); + err |= __get_user(current->thread.transact_vrsave, + (u32 __user *)&tm_v_regs[33]); + } + else { + current->thread.vrsave = 0; + current->thread.transact_vrsave = 0; + } +#endif /* CONFIG_ALTIVEC */ + /* restore floating point */ + err |= copy_fpr_from_user(current, &sc->fp_regs); + err |= copy_transact_fpr_from_user(current, &tm_sc->fp_regs); +#ifdef CONFIG_VSX + /* + * Get additional VSX data. Update v_regs to point after the + * VMX data. Copy VSX low doubleword from userspace to local + * buffer for formatting, then into the taskstruct. + */ + if (v_regs && ((msr & MSR_VSX) != 0)) { + v_regs += ELF_NVRREG; + tm_v_regs += ELF_NVRREG; + err |= copy_vsx_from_user(current, v_regs); + err |= copy_transact_vsx_from_user(current, tm_v_regs); + } else { + for (i = 0; i < 32 ; i++) { + current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; + current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0; + } + } +#endif + tm_enable(); + /* This loads the checkpointed FP/VEC state, if used */ + tm_recheckpoint(¤t->thread, msr); + /* The task has moved into TM state S, so ensure MSR reflects this: */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | __MASK(33); + + /* This loads the speculative FP/VEC state, if used */ + if (msr & MSR_FP) { + do_load_up_transact_fpu(¤t->thread); + regs->msr |= (MSR_FP | current->thread.fpexc_mode); + } + if (msr & MSR_VEC) { + do_load_up_transact_altivec(¤t->thread); + regs->msr |= MSR_VEC; + } + + return err; +} +#endif + /* * Setup the trampoline code on the stack */ @@ -355,6 +645,9 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, { struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1]; sigset_t set; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + unsigned long msr; +#endif /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; @@ -365,6 +658,21 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) goto badframe; set_current_blocked(&set); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) + goto badframe; + if (MSR_TM_SUSPENDED(msr)) { + /* We recheckpoint on return. */ + struct ucontext __user *uc_transact; + if (__get_user(uc_transact, &uc->uc_link)) + goto badframe; + if (restore_tm_sigcontexts(regs, &uc->uc_mcontext, + &uc_transact->uc_mcontext)) + goto badframe; + } + else + /* Fall through, for non-TM restore */ +#endif if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext)) goto badframe; @@ -415,19 +723,42 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->gpr[1]), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, NULL, - (unsigned long)ka->sa.sa_handler, 1); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) { + /* The ucontext_t passed to userland points to the second + * ucontext_t (for transactional state) with its uc_link ptr. + */ + err |= __put_user(&frame->uc_transact, &frame->uc.uc_link); + err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, + &frame->uc_transact.uc_mcontext, + regs, signr, + NULL, + (unsigned long)ka->sa.sa_handler); + } else +#endif + { + err |= __put_user(0, &frame->uc.uc_link); + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, + NULL, (unsigned long)ka->sa.sa_handler, + 1); + } err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) goto badframe; /* Make sure signal handler doesn't get spurious FP exceptions */ current->thread.fpscr.val = 0; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Remove TM bits from thread's MSR. The MSR in the sigcontext + * just indicates to userland that we were doing a transaction, but we + * don't want to return in transactional state: + */ + regs->msr &= ~MSR_TS_MASK; +#endif /* Set up to return from userspace. */ if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S new file mode 100644 index 000000000000..84dbace657ce --- /dev/null +++ b/arch/powerpc/kernel/tm.S @@ -0,0 +1,388 @@ +/* + * Transactional memory support routines to reclaim and recheckpoint + * transactional process state. + * + * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation. + */ + +#include <asm/asm-offsets.h> +#include <asm/ppc_asm.h> +#include <asm/ppc-opcode.h> +#include <asm/ptrace.h> +#include <asm/reg.h> + +#ifdef CONFIG_VSX +/* See fpu.S, this is very similar but to save/restore checkpointed FPRs/VSRs */ +#define __SAVE_32FPRS_VSRS_TRANSACT(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + SAVE_32FPRS_TRANSACT(n,base); \ + b 3f; \ +2: SAVE_32VSRS_TRANSACT(n,c,base); \ +3: +/* ...and this is just plain borrowed from there. */ +#define __REST_32FPRS_VSRS(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_32FPRS(n,base); \ + b 3f; \ +2: REST_32VSRS(n,c,base); \ +3: +#else +#define __SAVE_32FPRS_VSRS_TRANSACT(n,c,base) SAVE_32FPRS_TRANSACT(n, base) +#define __REST_32FPRS_VSRS(n,c,base) REST_32FPRS(n, base) +#endif +#define SAVE_32FPRS_VSRS_TRANSACT(n,c,base) \ + __SAVE_32FPRS_VSRS_TRANSACT(n,__REG_##c,__REG_##base) +#define REST_32FPRS_VSRS(n,c,base) \ + __REST_32FPRS_VSRS(n,__REG_##c,__REG_##base) + +/* Stack frame offsets for local variables. */ +#define TM_FRAME_L0 TM_FRAME_SIZE-16 +#define TM_FRAME_L1 TM_FRAME_SIZE-8 +#define STACK_PARAM(x) (48+((x)*8)) + + +/* In order to access the TM SPRs, TM must be enabled. So, do so: */ +_GLOBAL(tm_enable) + mfmsr r4 + li r3, MSR_TM >> 32 + sldi r3, r3, 32 + and. r0, r4, r3 + bne 1f + or r4, r4, r3 + mtmsrd r4 +1: blr + +_GLOBAL(tm_save_sprs) + mfspr r0, SPRN_TFHAR + std r0, THREAD_TM_TFHAR(r3) + mfspr r0, SPRN_TEXASR + std r0, THREAD_TM_TEXASR(r3) + mfspr r0, SPRN_TFIAR + std r0, THREAD_TM_TFIAR(r3) + blr + +_GLOBAL(tm_restore_sprs) + ld r0, THREAD_TM_TFHAR(r3) + mtspr SPRN_TFHAR, r0 + ld r0, THREAD_TM_TEXASR(r3) + mtspr SPRN_TEXASR, r0 + ld r0, THREAD_TM_TFIAR(r3) + mtspr SPRN_TFIAR, r0 + blr + + /* Passed an 8-bit failure cause as first argument. */ +_GLOBAL(tm_abort) + TABORT(R3) + blr + + +/* void tm_reclaim(struct thread_struct *thread, + * unsigned long orig_msr, + * uint8_t cause) + * + * - Performs a full reclaim. This destroys outstanding + * transactions and updates thread->regs.tm_ckpt_* with the + * original checkpointed state. Note that thread->regs is + * unchanged. + * - FP regs are written back to thread->transact_fpr before + * reclaiming. These are the transactional (current) versions. + * + * Purpose is to both abort transactions of, and preserve the state of, + * a transactions at a context switch. We preserve/restore both sets of process + * state to restore them when the thread's scheduled again. We continue in + * userland as though nothing happened, but when the transaction is resumed + * they will abort back to the checkpointed state we save out here. + * + * Call with IRQs off, stacks get all out of sync for some periods in here! + */ +_GLOBAL(tm_reclaim) + mfcr r6 + mflr r0 + std r6, 8(r1) + std r0, 16(r1) + std r2, 40(r1) + stdu r1, -TM_FRAME_SIZE(r1) + + /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */ + + std r3, STACK_PARAM(0)(r1) + SAVE_NVGPRS(r1) + + mfmsr r14 + mr r15, r14 + ori r15, r15, MSR_FP + oris r15, r15, MSR_VEC@h +#ifdef CONFIG_VSX + BEGIN_FTR_SECTION + oris r15,r15, MSR_VSX@h + END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + mtmsrd r15 + std r14, TM_FRAME_L0(r1) + + /* Stash the stack pointer away for use after reclaim */ + std r1, PACAR1(r13) + + /* ******************** FPR/VR/VSRs ************ + * Before reclaiming, capture the current/transactional FPR/VR + * versions /if used/. + * + * (If VSX used, FP and VMX are implied. Or, we don't need to look + * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.) + * + * We're passed the thread's MSR as parameter 2. + * + * We enabled VEC/FP/VSX in the msr above, so we can execute these + * instructions! + */ + andis. r0, r4, MSR_VEC@h + beq dont_backup_vec + + SAVE_32VRS_TRANSACT(0, r6, r3) /* r6 scratch, r3 thread */ + mfvscr vr0 + li r6, THREAD_TRANSACT_VSCR + stvx vr0, r3, r6 + mfspr r0, SPRN_VRSAVE + std r0, THREAD_TRANSACT_VRSAVE(r3) + +dont_backup_vec: + andi. r0, r4, MSR_FP + beq dont_backup_fp + + SAVE_32FPRS_VSRS_TRANSACT(0, R6, R3) /* r6 scratch, r3 thread */ + + mffs fr0 + stfd fr0,THREAD_TRANSACT_FPSCR(r3) + +dont_backup_fp: + /* The moment we treclaim, ALL of our GPRs will switch + * to user register state. (FPRs, CCR etc. also!) + * Use an sprg and a tm_scratch in the PACA to shuffle. + */ + TRECLAIM(R5) /* Cause in r5 */ + + /* ******************** GPRs ******************** */ + /* Stash the checkpointed r13 away in the scratch SPR and get the real + * paca + */ + SET_SCRATCH0(r13) + GET_PACA(r13) + + /* Stash the checkpointed r1 away in paca tm_scratch and get the real + * stack pointer back + */ + std r1, PACATMSCRATCH(r13) + ld r1, PACAR1(r13) + + /* Now get some more GPRS free */ + std r7, GPR7(r1) /* Temporary stash */ + std r12, GPR12(r1) /* '' '' '' */ + ld r12, STACK_PARAM(0)(r1) /* Param 0, thread_struct * */ + + addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */ + + /* Make r7 look like an exception frame so that we + * can use the neat GPRx(n) macros. r7 is NOT a pt_regs ptr! + */ + subi r7, r7, STACK_FRAME_OVERHEAD + + /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */ + SAVE_GPR(0, r7) /* user r0 */ + SAVE_GPR(2, r7) /* user r2 */ + SAVE_4GPRS(3, r7) /* user r3-r6 */ + SAVE_4GPRS(8, r7) /* user r8-r11 */ + ld r3, PACATMSCRATCH(r13) /* user r1 */ + ld r4, GPR7(r1) /* user r7 */ + ld r5, GPR12(r1) /* user r12 */ + GET_SCRATCH0(6) /* user r13 */ + std r3, GPR1(r7) + std r4, GPR7(r7) + std r5, GPR12(r7) + std r6, GPR13(r7) + + SAVE_NVGPRS(r7) /* user r14-r31 */ + + /* ******************** NIP ******************** */ + mfspr r3, SPRN_TFHAR + std r3, _NIP(r7) /* Returns to failhandler */ + /* The checkpointed NIP is ignored when rescheduling/rechkpting, + * but is used in signal return to 'wind back' to the abort handler. + */ + + /* ******************** CR,LR,CCR,MSR ********** */ + mfctr r3 + mflr r4 + mfcr r5 + mfxer r6 + + std r3, _CTR(r7) + std r4, _LINK(r7) + std r5, _CCR(r7) + std r6, _XER(r7) + + /* MSR and flags: We don't change CRs, and we don't need to alter + * MSR. + */ + + /* TM regs, incl TEXASR -- these live in thread_struct. Note they've + * been updated by the treclaim, to explain to userland the failure + * cause (aborted). + */ + mfspr r0, SPRN_TEXASR + mfspr r3, SPRN_TFHAR + mfspr r4, SPRN_TFIAR + std r0, THREAD_TM_TEXASR(r12) + std r3, THREAD_TM_TFHAR(r12) + std r4, THREAD_TM_TFIAR(r12) + + /* AMR and PPR are checkpointed too, but are unsupported by Linux. */ + + /* Restore original MSR/IRQ state & clear TM mode */ + ld r14, TM_FRAME_L0(r1) /* Orig MSR */ + li r15, 0 + rldimi r14, r15, MSR_TS_LG, (63-MSR_TS_LG)-1 + mtmsrd r14 + + REST_NVGPRS(r1) + + addi r1, r1, TM_FRAME_SIZE + ld r4, 8(r1) + ld r0, 16(r1) + mtcr r4 + mtlr r0 + ld r2, 40(r1) + blr + + + /* void tm_recheckpoint(struct thread_struct *thread, + * unsigned long orig_msr) + * - Restore the checkpointed register state saved by tm_reclaim + * when we switch_to a process. + * + * Call with IRQs off, stacks get all out of sync for + * some periods in here! + */ +_GLOBAL(tm_recheckpoint) + mfcr r5 + mflr r0 + std r5, 8(r1) + std r0, 16(r1) + std r2, 40(r1) + stdu r1, -TM_FRAME_SIZE(r1) + + /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. + * This is used for backing up the NVGPRs: + */ + SAVE_NVGPRS(r1) + + std r1, PACAR1(r13) + + /* Load complete register state from ts_ckpt* registers */ + + addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */ + + /* Make r7 look like an exception frame so that we + * can use the neat GPRx(n) macros. r7 is now NOT a pt_regs ptr! + */ + subi r7, r7, STACK_FRAME_OVERHEAD + + SET_SCRATCH0(r1) + + mfmsr r6 + /* R4 = original MSR to indicate whether thread used FP/Vector etc. */ + + /* Enable FP/vec in MSR if necessary! */ + lis r5, MSR_VEC@h + ori r5, r5, MSR_FP + and. r5, r4, r5 + beq restore_gprs /* if neither, skip both */ + +#ifdef CONFIG_VSX + BEGIN_FTR_SECTION + oris r5, r5, MSR_VSX@h + END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif + or r5, r6, r5 /* Set MSR.FP+.VSX/.VEC */ + mtmsr r5 + + /* FP and VEC registers: These are recheckpointed from thread.fpr[] + * and thread.vr[] respectively. The thread.transact_fpr[] version + * is more modern, and will be loaded subsequently by any FPUnavailable + * trap. + */ + andis. r0, r4, MSR_VEC@h + beq dont_restore_vec + + li r5, THREAD_VSCR + lvx vr0, r3, r5 + mtvscr vr0 + REST_32VRS(0, r5, r3) /* r5 scratch, r3 THREAD ptr */ + ld r5, THREAD_VRSAVE(r3) + mtspr SPRN_VRSAVE, r5 + +dont_restore_vec: + andi. r0, r4, MSR_FP + beq dont_restore_fp + + lfd fr0, THREAD_FPSCR(r3) + MTFSF_L(fr0) + REST_32FPRS_VSRS(0, R4, R3) + +dont_restore_fp: + mtmsr r6 /* FP/Vec off again! */ + +restore_gprs: + /* ******************** CR,LR,CCR,MSR ********** */ + ld r3, _CTR(r7) + ld r4, _LINK(r7) + ld r5, _CCR(r7) + ld r6, _XER(r7) + + mtctr r3 + mtlr r4 + mtcr r5 + mtxer r6 + + /* MSR and flags: We don't change CRs, and we don't need to alter + * MSR. + */ + + REST_4GPRS(0, r7) /* GPR0-3 */ + REST_GPR(4, r7) /* GPR4-6 */ + REST_GPR(5, r7) + REST_GPR(6, r7) + REST_4GPRS(8, r7) /* GPR8-11 */ + REST_2GPRS(12, r7) /* GPR12-13 */ + + REST_NVGPRS(r7) /* GPR14-31 */ + + ld r7, GPR7(r7) /* GPR7 */ + + /* Commit register state as checkpointed state: */ + TRECHKPT + + /* Our transactional state has now changed. + * + * Now just get out of here. Transactional (current) state will be + * updated once restore is called on the return path in the _switch-ed + * -to process. + */ + + GET_PACA(r13) + GET_SCRATCH0(r1) + + REST_NVGPRS(r1) + + addi r1, r1, TM_FRAME_SIZE + ld r4, 8(r1) + ld r0, 16(r1) + mtcr r4 + mtlr r0 + ld r2, 40(r1) + blr + + /* ****************************************************************** */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index a008cf5c0fce..f9b751b29558 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -58,6 +58,7 @@ #include <asm/rio.h> #include <asm/fadump.h> #include <asm/switch_to.h> +#include <asm/tm.h> #include <asm/debug.h> #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) @@ -78,6 +79,13 @@ EXPORT_SYMBOL(__debugger_break_match); EXPORT_SYMBOL(__debugger_fault_handler); #endif +/* Transactional Memory trap debug */ +#ifdef TM_DEBUG_SW +#define TM_DEBUG(x...) printk(KERN_INFO x) +#else +#define TM_DEBUG(x...) do { } while(0) +#endif + /* * Trap & Exception support */ @@ -350,6 +358,7 @@ static inline int check_io_access(struct pt_regs *regs) exception is in the MSR. */ #define get_reason(regs) ((regs)->msr) #define get_mc_reason(regs) ((regs)->msr) +#define REASON_TM 0x200000 #define REASON_FP 0x100000 #define REASON_ILLEGAL 0x80000 #define REASON_PRIVILEGED 0x40000 @@ -1020,6 +1029,38 @@ void __kprobes program_check_exception(struct pt_regs *regs) _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); return; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (reason & REASON_TM) { + /* This is a TM "Bad Thing Exception" program check. + * This occurs when: + * - An rfid/hrfid/mtmsrd attempts to cause an illegal + * transition in TM states. + * - A trechkpt is attempted when transactional. + * - A treclaim is attempted when non transactional. + * - A tend is illegally attempted. + * - writing a TM SPR when transactional. + */ + if (!user_mode(regs) && + report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) { + regs->nip += 4; + return; + } + /* If usermode caused this, it's done something illegal and + * gets a SIGILL slap on the wrist. We call it an illegal + * operand to distinguish from the instruction just being bad + * (e.g. executing a 'tend' on a CPU without TM!); it's an + * illegal /placement/ of a valid instruction. + */ + if (user_mode(regs)) { + _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); + return; + } else { + printk(KERN_EMERG "Unexpected TM Bad Thing exception " + "at %lx (msr 0x%x)\n", regs->nip, reason); + die("Unrecoverable exception", regs, SIGABRT); + } + } +#endif /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) @@ -1160,6 +1201,109 @@ void vsx_unavailable_exception(struct pt_regs *regs) die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); } +void tm_unavailable_exception(struct pt_regs *regs) +{ + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + + /* Currently we never expect a TMU exception. Catch + * this and kill the process! + */ + printk(KERN_EMERG "Unexpected TM unavailable exception at %lx " + "(msr %lx)\n", + regs->nip, regs->msr); + + if (user_mode(regs)) { + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } + + die("Unexpected TM unavailable exception", regs, SIGABRT); +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + +extern void do_load_up_fpu(struct pt_regs *regs); + +void fp_unavailable_tm(struct pt_regs *regs) +{ + /* Note: This does not handle any kind of FP laziness. */ + + TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n", + regs->nip, regs->msr); + tm_enable(); + + /* We can only have got here if the task started using FP after + * beginning the transaction. So, the transactional regs are just a + * copy of the checkpointed ones. But, we still need to recheckpoint + * as we're enabling FP for the process; it will return, abort the + * transaction, and probably retry but now with FP enabled. So the + * checkpointed FP registers need to be loaded. + */ + tm_reclaim(¤t->thread, current->thread.regs->msr, + TM_CAUSE_FAC_UNAV); + /* Reclaim didn't save out any FPRs to transact_fprs. */ + + /* Enable FP for the task: */ + regs->msr |= (MSR_FP | current->thread.fpexc_mode); + + /* This loads and recheckpoints the FP registers from + * thread.fpr[]. They will remain in registers after the + * checkpoint so we don't need to reload them after. + */ + tm_recheckpoint(¤t->thread, regs->msr); +} + +#ifdef CONFIG_ALTIVEC +extern void do_load_up_altivec(struct pt_regs *regs); + +void altivec_unavailable_tm(struct pt_regs *regs) +{ + /* See the comments in fp_unavailable_tm(). This function operates + * the same way. + */ + + TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx," + "MSR=%lx\n", + regs->nip, regs->msr); + tm_enable(); + tm_reclaim(¤t->thread, current->thread.regs->msr, + TM_CAUSE_FAC_UNAV); + regs->msr |= MSR_VEC; + tm_recheckpoint(¤t->thread, regs->msr); + current->thread.used_vr = 1; +} +#endif + +#ifdef CONFIG_VSX +void vsx_unavailable_tm(struct pt_regs *regs) +{ + /* See the comments in fp_unavailable_tm(). This works similarly, + * though we're loading both FP and VEC registers in here. + * + * If FP isn't in use, load FP regs. If VEC isn't in use, load VEC + * regs. Either way, set MSR_VSX. + */ + + TM_DEBUG("VSX Unavailable trap whilst transactional at 0x%lx," + "MSR=%lx\n", + regs->nip, regs->msr); + + tm_enable(); + /* This reclaims FP and/or VR regs if they're already enabled */ + tm_reclaim(¤t->thread, current->thread.regs->msr, + TM_CAUSE_FAC_UNAV); + + regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode | + MSR_VSX; + /* This loads & recheckpoints FP and VRs. */ + tm_recheckpoint(¤t->thread, regs->msr); + current->thread.used_vsr = 1; +} +#endif +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + void performance_monitor_exception(struct pt_regs *regs) { __get_cpu_var(irq_stat).pmu_irqs++; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index e830289d2e48..9e20999aaef2 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -7,6 +7,57 @@ #include <asm/page.h> #include <asm/ptrace.h> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Wrapper to call load_up_altivec from C. + * void do_load_up_altivec(struct pt_regs *regs); + */ +_GLOBAL(do_load_up_altivec) + mflr r0 + std r0, 16(r1) + stdu r1, -112(r1) + + subi r6, r3, STACK_FRAME_OVERHEAD + /* load_up_altivec expects r12=MSR, r13=PACA, and returns + * with r12 = new MSR. + */ + ld r12,_MSR(r6) + GET_PACA(r13) + bl load_up_altivec + std r12,_MSR(r6) + + ld r0, 112+16(r1) + addi r1, r1, 112 + mtlr r0 + blr + +/* void do_load_up_transact_altivec(struct thread_struct *thread) + * + * This is similar to load_up_altivec but for the transactional version of the + * vector regs. It doesn't mess with the task MSR or valid flags. + * Furthermore, VEC laziness is not supported with TM currently. + */ +_GLOBAL(do_load_up_transact_altivec) + mfmsr r6 + oris r5,r6,MSR_VEC@h + MTMSRD(r5) + isync + + li r4,1 + stw r4,THREAD_USED_VR(r3) + + li r10,THREAD_TRANSACT_VSCR + lvx vr0,r10,r3 + mtvscr vr0 + REST_32VRS_TRANSACT(0,r4,r3) + + /* Disable VEC again. */ + MTMSRD(r6) + isync + + blr +#endif + /* * load_up_altivec(unused, unused, tsk) * Disable VMX for the task which had it previously, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 10b6c358dd77..e33d11f1b977 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -539,6 +539,11 @@ fast_guest_return: /* Enter guest */ +BEGIN_FTR_SECTION + ld r5, VCPU_CFAR(r4) + mtspr SPRN_CFAR, r5 +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + ld r5, VCPU_LR(r4) lwz r6, VCPU_CR(r4) mtlr r5 @@ -604,6 +609,10 @@ kvmppc_interrupt: lwz r4, HSTATE_SCRATCH1(r13) std r3, VCPU_GPR(R12)(r9) stw r4, VCPU_CR(r9) +BEGIN_FTR_SECTION + ld r3, HSTATE_CFAR(r13) + std r3, VCPU_CFAR(r9) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) /* Restore R1/R2 so we can handle faults */ ld r1, HSTATE_HOST_R1(r13) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 67e4708388a0..6702442ca818 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -35,6 +35,7 @@ #include <asm/mmu_context.h> #include <asm/switch_to.h> #include <asm/firmware.h> +#include <asm/hvcall.h> #include <linux/gfp.h> #include <linux/sched.h> #include <linux/vmalloc.h> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 3a292be2e079..1b6e1271719f 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -55,6 +55,7 @@ #include <asm/code-patching.h> #include <asm/fadump.h> #include <asm/firmware.h> +#include <asm/tm.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -1171,6 +1172,21 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); ppc_md.hpte_invalidate(slot, vpn, psize, ssize, local); } pte_iterate_hashed_end(); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Transactions are not aborted by tlbiel, only tlbie. + * Without, syncing a page back to a block device w/ PIO could pick up + * transactional data (bad!) so we force an abort here. Before the + * sync the page will be made read-only, which will flush_hash_page. + * BIG ISSUE here: if the kernel uses a page from userspace without + * unmapping it first, it may see the speculated version. + */ + if (local && cpu_has_feature(CPU_FTR_TM) && + MSR_TM_ACTIVE(current->thread.regs->msr)) { + tm_enable(); + tm_abort(TM_CAUSE_TLBI); + } +#endif } void flush_hash_range(unsigned long number, int local) diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index d00d7b0a3bda..6cc58201db8c 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -27,6 +27,7 @@ #include <asm/lv1call.h> #include <asm/ps3fb.h> +#define PS3_VERBOSE_RESULT #include "platform.h" /** @@ -75,8 +76,9 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn, if (result) { /* all entries bolted !*/ - pr_info("%s:result=%d vpn=%lx pa=%lx ix=%lx v=%llx r=%llx\n", - __func__, result, vpn, pa, hpte_group, hpte_v, hpte_r); + pr_info("%s:result=%s vpn=%lx pa=%lx ix=%lx v=%llx r=%llx\n", + __func__, ps3_result(result), vpn, pa, hpte_group, + hpte_v, hpte_r); BUG(); } @@ -125,8 +127,8 @@ static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, &hpte_rs); if (result) { - pr_info("%s: res=%d read vpn=%lx slot=%lx psize=%d\n", - __func__, result, vpn, slot, psize); + pr_info("%s: result=%s read vpn=%lx slot=%lx psize=%d\n", + __func__, ps3_result(result), vpn, slot, psize); BUG(); } @@ -170,8 +172,8 @@ static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn, result = lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, slot, 0, 0); if (result) { - pr_info("%s: res=%d vpn=%lx slot=%lx psize=%d\n", - __func__, result, vpn, slot, psize); + pr_info("%s: result=%s vpn=%lx slot=%lx psize=%d\n", + __func__, ps3_result(result), vpn, slot, psize); BUG(); } |