summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2011-01-02 19:57:05 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2011-01-02 19:57:05 +0000
commit25e6e06e423a14626bf7d650367a07c3c21e08a8 (patch)
tree031c07eeca5ead0b3ed7b51e985e30ca5aae7c65 /llvm
parent7293698ed3596d10e60c1394f2ab78171fa975d6 (diff)
downloadbcm5719-llvm-25e6e06e423a14626bf7d650367a07c3c21e08a8.tar.gz
bcm5719-llvm-25e6e06e423a14626bf7d650367a07c3c21e08a8.zip
Try to reuse the value when lowering memset.
This allows us to compile: void test(char *s, int a) { __builtin_memset(s, a, 15); } into 1 mul + 3 stores instead of 3 muls + 3 stores. llvm-svn: 122710
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp24
-rw-r--r--llvm/lib/Target/X86/README-X86-64.txt44
-rw-r--r--llvm/test/CodeGen/X86/memset-2.ll9
3 files changed, 30 insertions, 47 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b639b7c9cf7..110812c4371 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3527,16 +3527,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SmallVector<SDValue, 8> OutChains;
uint64_t DstOff = 0;
unsigned NumMemOps = MemOps.size();
+
+ // Find the largest store and generate the bit pattern for it.
+ EVT LargestVT = MemOps[0];
+ for (unsigned i = 1; i < NumMemOps; i++)
+ if (MemOps[i].bitsGT(LargestVT))
+ LargestVT = MemOps[i];
+ SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
- unsigned VTSize = VT.getSizeInBits() / 8;
- SDValue Value = getMemsetValue(Src, VT, DAG, dl);
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ SDValue Value = MemSetValue;
+ if (VT.bitsLT(LargestVT)) {
+ if (!LargestVT.isVector() && !VT.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+ else
+ Value = getMemsetValue(Src, VT, DAG, dl);
+ }
+ assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
DstPtrInfo.getWithOffset(DstOff),
isVol, false, Align);
OutChains.push_back(Store);
- DstOff += VTSize;
+ DstOff += VT.getSizeInBits() / 8;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
diff --git a/llvm/lib/Target/X86/README-X86-64.txt b/llvm/lib/Target/X86/README-X86-64.txt
index 78c4dc00ee7..e21d69a7bcb 100644
--- a/llvm/lib/Target/X86/README-X86-64.txt
+++ b/llvm/lib/Target/X86/README-X86-64.txt
@@ -41,50 +41,6 @@ saved a few instructions.
//===---------------------------------------------------------------------===//
-Poor codegen:
-
-int X[2];
-int b;
-void test(void) {
- memset(X, b, 2*sizeof(X[0]));
-}
-
-llc:
- movq _b@GOTPCREL(%rip), %rax
- movzbq (%rax), %rax
- movq %rax, %rcx
- shlq $8, %rcx
- orq %rax, %rcx
- movq %rcx, %rax
- shlq $16, %rax
- orq %rcx, %rax
- movq %rax, %rcx
- shlq $32, %rcx
- movq _X@GOTPCREL(%rip), %rdx
- orq %rax, %rcx
- movq %rcx, (%rdx)
- ret
-
-gcc:
- movq _b@GOTPCREL(%rip), %rax
- movabsq $72340172838076673, %rdx
- movzbq (%rax), %rax
- imulq %rdx, %rax
- movq _X@GOTPCREL(%rip), %rdx
- movq %rax, (%rdx)
- ret
-
-And the codegen is even worse for the following
-(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103):
- void fill1(char *s, int a)
- {
- __builtin_memset(s, a, 15);
- }
-
-For this version, we duplicate the computation of the constant to store.
-
-//===---------------------------------------------------------------------===//
-
It's not possible to reference AH, BH, CH, and DH registers in an instruction
requiring REX prefix. However, divb and mulb both produce results in AH. If isel
emits a CopyFromReg which gets turned into a movb and that can be allocated a
diff --git a/llvm/test/CodeGen/X86/memset-2.ll b/llvm/test/CodeGen/X86/memset-2.ll
index 11f619cd6da..ae6b6e9772b 100644
--- a/llvm/test/CodeGen/X86/memset-2.ll
+++ b/llvm/test/CodeGen/X86/memset-2.ll
@@ -28,3 +28,12 @@ entry:
; CHECK: imull $16843009
}
+define void @t4(i8* nocapture %s, i8 %a) nounwind {
+entry:
+ tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
+ ret void
+; CHECK: t4:
+; CHECK: imull $16843009
+; CHECK-NOT: imul
+; CHECK: ret
+}
OpenPOWER on IntegriCloud