summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp13
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp14
-rw-r--r--llvm/test/CodeGen/AArch64/pr40091.ll2
-rw-r--r--llvm/test/CodeGen/X86/fold-vector-sext-zext.ll20
4 files changed, 34 insertions, 15 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7a22caf9c8b..10cce7813dc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8064,10 +8064,15 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
unsigned NumElts = VT.getVectorNumElements();
SDLoc DL(N);
- for (unsigned i=0; i != NumElts; ++i) {
- SDValue Op = N0->getOperand(i);
- if (Op->isUndef()) {
- Elts.push_back(DAG.getUNDEF(SVT));
+ // For zero-extensions, UNDEF elements still guarantee to have the upper
+ // bits set to zero.
+ bool IsZext =
+ Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Op = N0.getOperand(i);
+ if (Op.isUndef()) {
+ Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
continue;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4c551d5b231..7053e596237 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1848,6 +1848,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return true;
KnownZero = SrcZero.zextOrTrunc(NumElts);
KnownUndef = SrcUndef.zextOrTrunc(NumElts);
+
+ if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
+ // zext(undef) upper bits are guaranteed to be zero.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+ KnownUndef.clearAllBits();
+ }
break;
}
case ISD::OR:
@@ -1892,6 +1899,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1))
return true;
+
+ if (Op.getOpcode() == ISD::ZERO_EXTEND) {
+ // zext(undef) upper bits are guaranteed to be zero.
+ if (DemandedElts.isSubsetOf(KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+ KnownUndef.clearAllBits();
+ }
break;
default: {
if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
diff --git a/llvm/test/CodeGen/AArch64/pr40091.ll b/llvm/test/CodeGen/AArch64/pr40091.ll
index 8cf51f4beb5..b70ae8a39b7 100644
--- a/llvm/test/CodeGen/AArch64/pr40091.ll
+++ b/llvm/test/CodeGen/AArch64/pr40091.ll
@@ -4,7 +4,7 @@
define i64 @test(i64 %aa) {
; CHECK-LABEL: test:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-NEXT: movi v0.8b, #137
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/X86/fold-vector-sext-zext.ll b/llvm/test/CodeGen/X86/fold-vector-sext-zext.ll
index a8e78cc5565..465c7cebf54 100644
--- a/llvm/test/CodeGen/X86/fold-vector-sext-zext.ll
+++ b/llvm/test/CodeGen/X86/fold-vector-sext-zext.ll
@@ -261,12 +261,12 @@ define <4 x i64> @test_zext_4i8_4i64() {
define <4 x i16> @test_zext_4i8_4i16_undef() {
; X32-LABEL: test_zext_4i8_4i16_undef:
; X32: # %bb.0:
-; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,255,u,253>
+; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253]
; X32-NEXT: retl
;
; X64-LABEL: test_zext_4i8_4i16_undef:
; X64: # %bb.0:
-; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,255,u,253>
+; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253]
; X64-NEXT: retq
%1 = insertelement <4 x i8> undef, i8 undef, i32 0
%2 = insertelement <4 x i8> %1, i8 -1, i32 1
@@ -279,12 +279,12 @@ define <4 x i16> @test_zext_4i8_4i16_undef() {
define <4 x i32> @test_zext_4i8_4i32_undef() {
; X32-LABEL: test_zext_4i8_4i32_undef:
; X32: # %bb.0:
-; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,u,2,u>
+; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0]
; X32-NEXT: retl
;
; X64-LABEL: test_zext_4i8_4i32_undef:
; X64: # %bb.0:
-; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,u,2,u>
+; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0]
; X64-NEXT: retq
%1 = insertelement <4 x i8> undef, i8 0, i32 0
%2 = insertelement <4 x i8> %1, i8 undef, i32 1
@@ -297,12 +297,12 @@ define <4 x i32> @test_zext_4i8_4i32_undef() {
define <4 x i64> @test_zext_4i8_4i64_undef() {
; X32-LABEL: test_zext_4i8_4i64_undef:
; X32: # %bb.0:
-; X32-NEXT: vmovaps {{.*#+}} ymm0 = <u,u,255,0,2,0,u,u>
+; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,0,0]
; X32-NEXT: retl
;
; X64-LABEL: test_zext_4i8_4i64_undef:
; X64: # %bb.0:
-; X64-NEXT: vmovaps {{.*#+}} ymm0 = <u,255,2,u>
+; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,0]
; X64-NEXT: retq
%1 = insertelement <4 x i8> undef, i8 undef, i32 0
%2 = insertelement <4 x i8> %1, i8 -1, i32 1
@@ -359,12 +359,12 @@ define <8 x i32> @test_zext_8i8_8i32() {
define <8 x i16> @test_zext_8i8_8i16_undef() {
; X32-LABEL: test_zext_8i8_8i16_undef:
; X32: # %bb.0:
-; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,255,u,253,u,251,u,249>
+; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249]
; X32-NEXT: retl
;
; X64-LABEL: test_zext_8i8_8i16_undef:
; X64: # %bb.0:
-; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,255,u,253,u,251,u,249>
+; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249]
; X64-NEXT: retq
%1 = insertelement <8 x i8> undef, i8 undef, i32 0
%2 = insertelement <8 x i8> %1, i8 -1, i32 1
@@ -381,12 +381,12 @@ define <8 x i16> @test_zext_8i8_8i16_undef() {
define <8 x i32> @test_zext_8i8_8i32_undef() {
; X32-LABEL: test_zext_8i8_8i32_undef:
; X32: # %bb.0:
-; X32-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u>
+; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0]
; X32-NEXT: retl
;
; X64-LABEL: test_zext_8i8_8i32_undef:
; X64: # %bb.0:
-; X64-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u>
+; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0]
; X64-NEXT: retq
%1 = insertelement <8 x i8> undef, i8 0, i32 0
%2 = insertelement <8 x i8> %1, i8 undef, i32 1
OpenPOWER on IntegriCloud