summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHans Wennborg <hans@hanshq.net>2014-03-18 20:04:34 +0000
committerHans Wennborg <hans@hanshq.net>2014-03-18 20:04:34 +0000
commitaec21ce43e7fa7e0f45eb3ebfd0761076f881ea3 (patch)
treeeb00f1d1a70e8e8a5098a9f415749b8dfa27acef
parent34b26458cbfe810e1cc8b6a7cfdc9cb8338e90cf (diff)
downloadbcm5719-llvm-aec21ce43e7fa7e0f45eb3ebfd0761076f881ea3.tar.gz
bcm5719-llvm-aec21ce43e7fa7e0f45eb3ebfd0761076f881ea3.zip
X86 memcpy lowering: use "rep movs" even when esi is used as base pointer
For functions where esi is used as base pointer, we would previously fall back from lowering memcpy with "rep movs" because that clobbers esi. With this patch, we just store esi in another physical register, and restore it afterwards. This adds a little bit of register preassure, but the more efficient memcpy should be worth it. Differential Revision: http://llvm-reviews.chandlerc.com/D2968 llvm-svn: 204174
-rw-r--r--llvm/lib/Target/X86/X86SelectionDAGInfo.cpp42
-rw-r--r--llvm/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll4
-rw-r--r--llvm/test/CodeGen/X86/stack-align-memcpy.ll9
3 files changed, 39 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index b9c620fddc4..3b3ab2a83a1 100644
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -200,13 +200,11 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SrcPtrInfo.getAddrSpace() >= 256)
return SDValue();
- // ESI might be used as a base pointer, in that case we can't simply overwrite
- // the register. Fall back to generic code.
+ // If ESI is used as a base pointer, we must preserve it when doing rep movs.
const X86RegisterInfo *TRI =
static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo());
- if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
- TRI->getBaseRegister() == X86::ESI)
- return SDValue();
+ bool PreserveESI = TRI->hasBasePointer(DAG.getMachineFunction()) &&
+ TRI->getBaseRegister() == X86::ESI;
MVT AVT;
if (Align & 1)
@@ -225,27 +223,45 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SDValue Count = DAG.getIntPtrConstant(CountVal);
unsigned BytesLeft = SizeVal % UBytes;
- SDValue InFlag(0, 0);
+
+ if (PreserveESI) {
+ // Save ESI to a physical register. (We cannot use a virtual register
+ // because if it is spilled we wouldn't be able to reload it.)
+ // We don't glue this because the register dependencies are explicit.
+ Chain = DAG.getCopyToReg(Chain, dl, X86::EDX,
+ DAG.getRegister(X86::ESI, MVT::i32));
+ }
+
+ SDValue InGlue(0, 0);
Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
X86::ECX,
- Count, InFlag);
- InFlag = Chain.getValue(1);
+ Count, InGlue);
+ InGlue = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
X86::EDI,
- Dst, InFlag);
- InFlag = Chain.getValue(1);
+ Dst, InGlue);
+ InGlue = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
X86::ESI,
- Src, InFlag);
- InFlag = Chain.getValue(1);
+ Src, InGlue);
+ InGlue = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue };
+ // FIXME: Make X86rep_movs explicitly use FCX, RDI, RSI instead of glue.
SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
array_lengthof(Ops));
+ if (PreserveESI) {
+ InGlue = RepMovs.getValue(1);
+ RepMovs = DAG.getCopyToReg(RepMovs, dl, X86::ESI,
+ DAG.getRegister(X86::EDX, MVT::i32), InGlue);
+ }
+
SmallVector<SDValue, 4> Results;
Results.push_back(RepMovs);
+
+
if (BytesLeft) {
// Handle the last 1 - 7 bytes.
unsigned Offset = SizeVal - BytesLeft;
diff --git a/llvm/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll b/llvm/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
index b55571bcba0..f64ccb84683 100644
--- a/llvm/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
@@ -13,5 +13,7 @@ define void @test1(%struct.foo* nocapture %x, i32 %y, i8* %z) nounwind {
; CHECK-LABEL: test1:
; CHECK: movl %esp, %esi
-; CHECK-NOT: rep;movsl
+; CHECK: movl %esi, %edx
+; CHECK: rep;movsl
+; CHECK: movl %edx, %esi
}
diff --git a/llvm/test/CodeGen/X86/stack-align-memcpy.ll b/llvm/test/CodeGen/X86/stack-align-memcpy.ll
index 0cc3aa84889..d6ea8b31475 100644
--- a/llvm/test/CodeGen/X86/stack-align-memcpy.ll
+++ b/llvm/test/CodeGen/X86/stack-align-memcpy.ll
@@ -15,7 +15,9 @@ define void @test1(%struct.foo* nocapture %x, i32 %y) nounwind {
; CHECK-LABEL: test1:
; CHECK: andl $-16, %esp
; CHECK: movl %esp, %esi
-; CHECK-NOT: rep;movsl
+; CHECK: movl %esi, %edx
+; CHECK: rep;movsl
+; CHECK: movl %edx, %esi
}
; PR19012
@@ -28,7 +30,9 @@ define void @test2(%struct.foo* nocapture %x, i32 %y, i8* %z) nounwind {
; CHECK-LABEL: test2:
; CHECK: movl %esp, %esi
-; CHECK-NOT: rep;movsl
+; CHECK: movl %esi, %edx
+; CHECK: rep;movsl
+; CHECK: movl %edx, %esi
}
; Check that we do use rep movs if we make the alloca static.
@@ -39,5 +43,6 @@ define void @test3(%struct.foo* nocapture %x, i32 %y, i8* %z) nounwind {
ret void
; CHECK-LABEL: test3:
+; CHECK-NOT: movl %esi, %edx
; CHECK: rep;movsl
}
OpenPOWER on IntegriCloud