[DAG] make binops with undef operands consistent with IR

This started by noticing that scalar and vector types were producing different results with div ops in PR36305: https://bugs.llvm.org/show_bug.cgi?id=36305 ...but the problem is bigger. I couldn't keep it straight without a table, so I'm attaching that as a PDF to the review. The x86 tests in undef-ops.ll correspond to that table. Green means that instsimplify and the DAG agree on the result for all types. Red means the DAG was returning undef when IR was not. Yellow means the DAG was returning a non-undef result when IR returned undef. This patch assumes that we're currently doing the right thing in IR. Note: I couldn't find any problems with lowering vector constants as the code comments were warning, but those comments were written long ago in rL36413 . Differential Revision: https://reviews.llvm.org/D43141 llvm-svn: 324941
author: Sanjay Patel <spatel@rotateright.com> 2018-02-12 21:37:27 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2018-02-12 21:37:27 +0000
commit: 014c000f6aa84528dfe492bfb02a04ef2993b98c (patch)
tree: d1b3dfbdb8ab99e9ffcb16d366cbc6d451d1eb35
parent: e7ed8807615e75f8f76d2c5c76b52b59e9a1fdcc (diff)
download: bcm5719-llvm-014c000f6aa84528dfe492bfb02a04ef2993b98c.tar.gz
bcm5719-llvm-014c000f6aa84528dfe492bfb02a04ef2993b98c.zip
5 files changed, 27 insertions, 30 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 68502fd2378..2ece6a82f6c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4667,19 +4667,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       case ISD::FSUB:
       case ISD::FDIV:
       case ISD::FREM:
-      case ISD::SRA:
         return N1;     // fold op(undef, arg2) -> undef
       case ISD::UDIV:
       case ISD::SDIV:
       case ISD::UREM:
       case ISD::SREM:
+      case ISD::SRA:
       case ISD::SRL:
       case ISD::SHL:
-        if (!VT.isVector())
-          return getConstant(0, DL, VT);    // fold op(undef, arg2) -> 0
-        // For vectors, we can't easily build an all zero vector, just return
-        // the LHS.
-        return N2;
+        return getConstant(0, DL, VT);    // fold op(undef, arg2) -> 0
       }
     }
   }
@@ -4701,6 +4697,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     case ISD::SDIV:
     case ISD::UREM:
     case ISD::SREM:
+    case ISD::SRA:
+    case ISD::SRL:
+    case ISD::SHL:
       return N2;       // fold op(arg1, undef) -> undef
     case ISD::FADD:
     case ISD::FSUB:
@@ -4712,21 +4711,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       break;
     case ISD::MUL:
     case ISD::AND:
-    case ISD::SRL:
-    case ISD::SHL:
-      if (!VT.isVector())
-        return getConstant(0, DL, VT);  // fold op(arg1, undef) -> 0
-      // For vectors, we can't easily build an all zero vector, just return
-      // the LHS.
-      return N1;
+      return getConstant(0, DL, VT);  // fold op(arg1, undef) -> 0
     case ISD::OR:
-      if (!VT.isVector())
-        return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
-      // For vectors, we can't easily build an all one vector, just return
-      // the LHS.
-      return N1;
-    case ISD::SRA:
-      return N1;
+      return getAllOnesConstant(DL, VT);
     }
   }
 
diff --git a/llvm/test/CodeGen/Hexagon/tail-dup-subreg-map.ll b/llvm/test/CodeGen/Hexagon/tail-dup-subreg-map.ll
index 1b11d087832..148d018b01e 100644
--- a/llvm/test/CodeGen/Hexagon/tail-dup-subreg-map.ll
+++ b/llvm/test/CodeGen/Hexagon/tail-dup-subreg-map.ll
@@ -5,7 +5,7 @@
 ; subregisters were dropped by the tail duplicator, resulting in invalid
 ; COPY instructions being generated.
 
-; CHECK: = extractu(r{{[0-9]+}},#15,#17)
+; CHECK: = asl(r{{[0-9]+}}:{{[0-9]+}},#15)
 
 target triple = "hexagon"
 
@@ -36,20 +36,20 @@ if.then5.i:                                       ; preds = %if.then.i
   br label %if.end.i
 
 if.else.i:                                        ; preds = %if.then.i
-  %shl12.i = shl i64 %0, undef
+  %shl12.i = shl i64 %0, 7
   br label %if.end.i
 
 if.end.i:                                         ; preds = %if.else.i, %if.then5.i
   %aSig0.0 = phi i64 [ undef, %if.then5.i ], [ %shl12.i, %if.else.i ]
   %storemerge43.i = phi i64 [ %shl.i21, %if.then5.i ], [ 0, %if.else.i ]
-  %sub15.i = sub nsw i32 -63, undef
+  %sub15.i = sub nsw i32 -63, 8
   br label %if.end13
 
 if.else16.i:                                      ; preds = %if.then7
   br label %if.end13
 
 if.else:                                          ; preds = %entry
-  %or12 = or i64 undef, 281474976710656
+  %or12 = or i64 9, 281474976710656
   br label %if.end13
 
 if.end13:                                         ; preds = %if.else, %if.else16.i, %if.end.i
diff --git a/llvm/test/CodeGen/X86/pr13577.ll b/llvm/test/CodeGen/X86/pr13577.ll
index 66bbf4531e5..8c0752b127a 100644
--- a/llvm/test/CodeGen/X86/pr13577.ll
+++ b/llvm/test/CodeGen/X86/pr13577.ll
@@ -31,7 +31,6 @@ define float @pr26070() {
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; CHECK-NEXT:    orps {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %c = call float @copysignf(float 1.0, float undef) readnone
   ret float %c
diff --git a/llvm/test/CodeGen/X86/pr33960.ll b/llvm/test/CodeGen/X86/pr33960.ll
index 34af4df9455..fd5a9a40050 100644
--- a/llvm/test/CodeGen/X86/pr33960.ll
+++ b/llvm/test/CodeGen/X86/pr33960.ll
@@ -7,12 +7,12 @@
 define void @PR33960() {
 ; X86-LABEL: PR33960:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    movl $0, b
+; X86-NEXT:    movl $-1, b
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: PR33960:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movl $0, {{.*}}(%rip)
+; X64-NEXT:    movl $-1, {{.*}}(%rip)
 ; X64-NEXT:    retq
 entry:
   %tmp = insertelement <4 x i32> <i32 undef, i32 -7, i32 -3, i32 undef>, i32 -2, i32 3
diff --git a/llvm/test/CodeGen/X86/undef-ops.ll b/llvm/test/CodeGen/X86/undef-ops.ll
index 18a408a7fc5..1ed1ec18e1d 100644
--- a/llvm/test/CodeGen/X86/undef-ops.ll
+++ b/llvm/test/CodeGen/X86/undef-ops.ll
@@ -77,6 +77,7 @@ define i32 @mul_undef_rhs(i32 %x) {
 define <4 x i32> @mul_undef_rhs_vec(<4 x i32> %x) {
 ; CHECK-LABEL: mul_undef_rhs_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %r = mul <4 x i32> %x, undef
   ret <4 x i32> %r
@@ -94,6 +95,7 @@ define i32 @mul_undef_lhs(i32 %x) {
 define <4 x i32> @mul_undef_lhs_vec(<4 x i32> %x) {
 ; CHECK-LABEL: mul_undef_lhs_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %r = mul <4 x i32> undef, %x
   ret <4 x i32> %r
@@ -127,6 +129,7 @@ define i32 @sdiv_undef_lhs(i32 %x) {
 define <4 x i32> @sdiv_undef_lhs_vec(<4 x i32> %x) {
 ; CHECK-LABEL: sdiv_undef_lhs_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %r = sdiv <4 x i32> undef, %x
   ret <4 x i32> %r
@@ -160,6 +163,7 @@ define i32 @udiv_undef_lhs(i32 %x) {
 define <4 x i32> @udiv_undef_lhs_vec(<4 x i32> %x) {
 ; CHECK-LABEL: udiv_undef_lhs_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %r = udiv <4 x i32> undef, %x
   ret <4 x i32> %r
@@ -193,6 +197,7 @@ define i32 @srem_undef_lhs(i32 %x) {
 define <4 x i32> @srem_undef_lhs_vec(<4 x i32> %x) {
 ; CHECK-LABEL: srem_undef_lhs_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %r = srem <4 x i32> undef, %x
   ret <4 x i32> %r
@@ -226,6 +231,7 @@ define i32 @urem_undef_lhs(i32 %x) {
 define <4 x i32> @urem_undef_lhs_vec(<4 x i32> %x) {
 ; CHECK-LABEL: urem_undef_lhs_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %r = urem <4 x i32> undef, %x
   ret <4 x i32> %r
@@ -234,7 +240,6 @@ define <4 x i32> @urem_undef_lhs_vec(<4 x i32> %x) {
 define i32 @ashr_undef_rhs(i32 %x) {
 ; CHECK-LABEL: ashr_undef_rhs:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    retq
   %r = ashr i32 %x, undef
   ret i32 %r
@@ -251,6 +256,7 @@ define <4 x i32> @ashr_undef_rhs_vec(<4 x i32> %x) {
 define i32 @ashr_undef_lhs(i32 %x) {
 ; CHECK-LABEL: ashr_undef_lhs:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    retq
   %r = ashr i32 undef, %x
   ret i32 %r
@@ -259,6 +265,7 @@ define i32 @ashr_undef_lhs(i32 %x) {
 define <4 x i32> @ashr_undef_lhs_vec(<4 x i32> %x) {
 ; CHECK-LABEL: ashr_undef_lhs_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %r = ashr <4 x i32> undef, %x
   ret <4 x i32> %r
@@ -267,7 +274,6 @@ define <4 x i32> @ashr_undef_lhs_vec(<4 x i32> %x) {
 define i32 @lshr_undef_rhs(i32 %x) {
 ; CHECK-LABEL: lshr_undef_rhs:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    retq
   %r = lshr i32 %x, undef
   ret i32 %r
@@ -293,6 +299,7 @@ define i32 @lshr_undef_lhs(i32 %x) {
 define <4 x i32> @lshr_undef_lhs_vec(<4 x i32> %x) {
 ; CHECK-LABEL: lshr_undef_lhs_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %r = lshr <4 x i32> undef, %x
   ret <4 x i32> %r
@@ -301,7 +308,6 @@ define <4 x i32> @lshr_undef_lhs_vec(<4 x i32> %x) {
 define i32 @shl_undef_rhs(i32 %x) {
 ; CHECK-LABEL: shl_undef_rhs:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    retq
   %r = shl i32 %x, undef
   ret i32 %r
@@ -327,6 +333,7 @@ define i32 @shl_undef_lhs(i32 %x) {
 define <4 x i32> @shl_undef_lhs_vec(<4 x i32> %x) {
 ; CHECK-LABEL: shl_undef_lhs_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %r = shl <4 x i32> undef, %x
   ret <4 x i32> %r
@@ -344,6 +351,7 @@ define i32 @and_undef_rhs(i32 %x) {
 define <4 x i32> @and_undef_rhs_vec(<4 x i32> %x) {
 ; CHECK-LABEL: and_undef_rhs_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %r = and <4 x i32> %x, undef
   ret <4 x i32> %r
@@ -361,6 +369,7 @@ define i32 @and_undef_lhs(i32 %x) {
 define <4 x i32> @and_undef_lhs_vec(<4 x i32> %x) {
 ; CHECK-LABEL: and_undef_lhs_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %r = and <4 x i32> undef, %x
   ret <4 x i32> %r
@@ -378,6 +387,7 @@ define i32 @or_undef_rhs(i32 %x) {
 define <4 x i32> @or_undef_rhs_vec(<4 x i32> %x) {
 ; CHECK-LABEL: or_undef_rhs_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %r = or <4 x i32> %x, undef
   ret <4 x i32> %r
@@ -395,6 +405,7 @@ define i32 @or_undef_lhs(i32 %x) {
 define <4 x i32> @or_undef_lhs_vec(<4 x i32> %x) {
 ; CHECK-LABEL: or_undef_lhs_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %r = or <4 x i32> undef, %x
   ret <4 x i32> %r
author	Sanjay Patel <spatel@rotateright.com>	2018-02-12 21:37:27 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2018-02-12 21:37:27 +0000
commit	014c000f6aa84528dfe492bfb02a04ef2993b98c (patch)
tree	d1b3dfbdb8ab99e9ffcb16d366cbc6d451d1eb35
parent	e7ed8807615e75f8f76d2c5c76b52b59e9a1fdcc (diff)
download	bcm5719-llvm-014c000f6aa84528dfe492bfb02a04ef2993b98c.tar.gz bcm5719-llvm-014c000f6aa84528dfe492bfb02a04ef2993b98c.zip