summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp4
-rw-r--r--llvm/test/CodeGen/X86/2011-10-21-widen-cmp.ll5
-rw-r--r--llvm/test/CodeGen/X86/mem-intrin-base-reg.ll7
3 files changed, 8 insertions, 8 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fc5501622b6..80db5ee8943 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7135,6 +7135,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return N0.getOperand(0);
}
+ // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
+ return SDValue();
+
// Fold extract-and-trunc into a narrow extract. For example:
// i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
// i32 y = TRUNCATE(i64 x)
diff --git a/llvm/test/CodeGen/X86/2011-10-21-widen-cmp.ll b/llvm/test/CodeGen/X86/2011-10-21-widen-cmp.ll
index 420e843b52a..f916ff28c36 100644
--- a/llvm/test/CodeGen/X86/2011-10-21-widen-cmp.ll
+++ b/llvm/test/CodeGen/X86/2011-10-21-widen-cmp.ll
@@ -10,10 +10,7 @@ define void @cmp_2_floats() {
; CHECK-NEXT: cmpordps %xmm0, %xmm0
; CHECK-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; CHECK-NEXT: psllq $32, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-NEXT: psrad $31, %xmm0
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; CHECK-NEXT: pslld $31, %xmm0
; CHECK-NEXT: blendvps %xmm0, %xmm0
; CHECK-NEXT: movlps %xmm0, (%rax)
diff --git a/llvm/test/CodeGen/X86/mem-intrin-base-reg.ll b/llvm/test/CodeGen/X86/mem-intrin-base-reg.ll
index 9bace29e185..59a2207b470 100644
--- a/llvm/test/CodeGen/X86/mem-intrin-base-reg.ll
+++ b/llvm/test/CodeGen/X86/mem-intrin-base-reg.ll
@@ -8,15 +8,12 @@ target triple = "i686-pc-windows-msvc"
; for when this is necessary. Typically, we chose ESI for the base register,
; which all of the X86 string instructions use.
-; The pattern of vector icmp and extractelement is used in these tests because
-; it forces creation of an aligned stack temporary. Perhaps such temporaries
-; shouldn't be aligned.
-
declare void @escape_vla_and_icmp(i8*, i1 zeroext)
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
define i32 @memcpy_novla_vector(<4 x i32>* %vp0, i8* %a, i8* %b, i32 %n, i1 zeroext %cond) {
+ %foo = alloca <4 x i32>, align 16
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 128, i32 4, i1 false)
br i1 %cond, label %spill_vectors, label %no_vectors
@@ -42,6 +39,7 @@ spill_vectors:
; CHECK: rep;movsl
define i32 @memcpy_vla_vector(<4 x i32>* %vp0, i8* %a, i8* %b, i32 %n, i1 zeroext %cond) {
+ %foo = alloca <4 x i32>, align 16
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 128, i32 4, i1 false)
br i1 %cond, label %spill_vectors, label %no_vectors
@@ -70,6 +68,7 @@ spill_vectors:
; stosd doesn't clobber esi, so we can use it.
define i32 @memset_vla_vector(<4 x i32>* %vp0, i8* %a, i32 %n, i1 zeroext %cond) {
+ %foo = alloca <4 x i32>, align 16
call void @llvm.memset.p0i8.i32(i8* %a, i8 42, i32 128, i32 4, i1 false)
br i1 %cond, label %spill_vectors, label %no_vectors
OpenPOWER on IntegriCloud