summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2012-04-15 20:56:45 +0000
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-04-30 15:37:17 +1000
commit35000870fcfbb28757ad47de77b4645072d916b8 (patch)
tree92e9fe4570f7239e514b1254090cc595827f0dff
parent8cd3c23df79411f6b24ddb7d2ed58d26e3b06815 (diff)
downloadblackbird-op-linux-35000870fcfbb28757ad47de77b4645072d916b8.tar.gz
blackbird-op-linux-35000870fcfbb28757ad47de77b4645072d916b8.zip
powerpc: Optimise enable_kernel_altivec
Add two optimisations to enable_kernel_altivec: - enable_kernel_altivec has already determined if we need to save the previous task's state but we call giveup_altivec in both cases, requiring an extra branch in giveup_altivec. Create giveup_altivec_notask which only turns on the VMX bit in the MSR. - We write the VMX MSR bit each time we call enable_kernel_altivec even it was already set. Check the bit and branch out if we have already set it. The classic case for this is vectored IO where we have to copy multiple buffers to or from userspace. The following testcase was used to confirm this patch improves performance: http://ozlabs.org/~anton/junkcode/copy_to_user.c Since the current breakpoint for using VMX in copy_tofrom_user is 4096 bytes, I'm using buffers of 4096 + 1 cacheline (4224) bytes. A benchmark of 16 entry readvs (-s 16): time copy_to_user -l 4224 -s 16 -i 1000000 completes 5.2% faster on a POWER7 PS700. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/switch_to.h1
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/vector.S10
3 files changed, 12 insertions, 1 deletions
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 2824609319c7..1a6320290d26 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -40,6 +40,7 @@ static inline void discard_lazy_cpu_state(void)
#ifdef CONFIG_ALTIVEC
extern void flush_altivec_to_thread(struct task_struct *);
extern void giveup_altivec(struct task_struct *);
+extern void giveup_altivec_notask(void);
#else
static inline void flush_altivec_to_thread(struct task_struct *t)
{
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4937c9690090..bb58f41fc045 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -124,7 +124,7 @@ void enable_kernel_altivec(void)
if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
giveup_altivec(current);
else
- giveup_altivec(NULL); /* just enable AltiVec for kernel - force */
+ giveup_altivec_notask();
#else
giveup_altivec(last_task_used_altivec);
#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 4d5a3edff49e..e830289d2e48 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -89,6 +89,16 @@ _GLOBAL(load_up_altivec)
/* restore registers and return */
blr
+_GLOBAL(giveup_altivec_notask)
+ mfmsr r3
+ andis. r4,r3,MSR_VEC@h
+ bnelr /* Already enabled? */
+ oris r3,r3,MSR_VEC@h
+ SYNC
+ MTMSRD(r3) /* enable use of VMX now */
+ isync
+ blr
+
/*
* giveup_altivec(tsk)
* Disable VMX for the task given as the argument,
OpenPOWER on IntegriCloud