summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAG.h3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp36
-rw-r--r--llvm/test/CodeGen/X86/div-rem-simplify.ll5
-rw-r--r--llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll14
5 files changed, 35 insertions, 33 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index cd4567cf105..4e75c0f0ef3 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -740,6 +740,9 @@ public:
return getNode(ISD::CALLSEQ_END, DL, NodeTys, Ops);
}
+ /// Return true if the result of this operation is always undefined.
+ bool isUndef(unsigned Opcode, ArrayRef<SDValue> Ops);
+
/// Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getUNDEF(EVT VT) {
return getNode(ISD::UNDEF, SDLoc(), VT);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 30330e4588c..788b52dabe9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2524,15 +2524,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
- // X / undef -> undef
- // X % undef -> undef
- if (N1.isUndef())
- return N1;
-
- // X / 0 --> undef
- // X % 0 --> undef
- // We don't need to preserve faults!
- if (isNullConstantOrNullSplatConstant(N1))
+ if (DAG.isUndef(N->getOpcode(), {N0, N1}))
return DAG.getUNDEF(VT);
// undef / X -> 0
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d8acd3cfe97..4f33eef7513 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3695,12 +3695,6 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (Cst1->isOpaque() || Cst2->isOpaque())
return SDValue();
- // Division/remainder with a zero divisor is undefined behavior.
- if ((Opcode == ISD::SDIV || Opcode == ISD::UDIV ||
- Opcode == ISD::SREM || Opcode == ISD::UREM) &&
- Cst2->isNullValue())
- return getUNDEF(VT);
-
std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(),
Cst2->getAPIntValue());
if (!Folded.second)
@@ -3728,6 +3722,30 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
GA->getOffset() + uint64_t(Offset));
}
+bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
+ switch (Opcode) {
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM: {
+ // If a divisor is zero/undef or any element of a divisor vector is
+ // zero/undef, the whole op is undef.
+ assert(Ops.size() == 2 && "Div/rem should have 2 operands");
+ SDValue Divisor = Ops[1];
+ if (Divisor.isUndef() || isNullConstant(Divisor))
+ return true;
+
+ return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) &&
+ any_of(Divisor->op_values(),
+ [](SDValue V) { return V.isUndef() || isNullConstant(V); });
+ // TODO: Handle signed overflow.
+ }
+ // TODO: Handle oversized shifts.
+ default:
+ return false;
+ }
+}
+
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
EVT VT, SDNode *Cst1,
SDNode *Cst2) {
@@ -3737,6 +3755,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
+ if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)}))
+ return getUNDEF(VT);
+
// Handle the case of two scalars.
if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
@@ -3804,6 +3825,9 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
+ if (isUndef(Opcode, Ops))
+ return getUNDEF(VT);
+
// We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
if (!VT.isVector())
return SDValue();
diff --git a/llvm/test/CodeGen/X86/div-rem-simplify.ll b/llvm/test/CodeGen/X86/div-rem-simplify.ll
index c0ce3e57505..04cf439dc15 100644
--- a/llvm/test/CodeGen/X86/div-rem-simplify.ll
+++ b/llvm/test/CodeGen/X86/div-rem-simplify.ll
@@ -152,7 +152,6 @@ define <4 x i32> @sel_sdiv0_vec(i1 %cond) {
define <4 x i32> @sdiv0elt_vec(<4 x i32> %x) {
; CHECK-LABEL: sdiv0elt_vec:
; CHECK: # BB#0:
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = <u,12,u,4294967292>
; CHECK-NEXT: retq
%zero = and <4 x i32> %x, <i32 0, i32 0, i32 0, i32 0>
%some_ones = or <4 x i32> %zero, <i32 0, i32 -1, i32 0, i32 3>
@@ -163,7 +162,6 @@ define <4 x i32> @sdiv0elt_vec(<4 x i32> %x) {
define <4 x i32> @udiv0elt_vec(<4 x i32> %x) {
; CHECK-LABEL: udiv0elt_vec:
; CHECK: # BB#0:
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = <u,4,3,u>
; CHECK-NEXT: retq
%div = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, <i32 0, i32 3, i32 4, i32 0>
ret <4 x i32> %div
@@ -172,7 +170,6 @@ define <4 x i32> @udiv0elt_vec(<4 x i32> %x) {
define <4 x i32> @urem0elt_vec(<4 x i32> %x) {
; CHECK-LABEL: urem0elt_vec:
; CHECK: # BB#0:
-; CHECK-NEXT: movaps {{.*#+}} xmm0 = <u,u,u,2>
; CHECK-NEXT: retq
%zero = and <4 x i32> %x, <i32 0, i32 0, i32 0, i32 0>
%some_ones = or <4 x i32> %zero, <i32 0, i32 0, i32 0, i32 3>
@@ -183,8 +180,6 @@ define <4 x i32> @urem0elt_vec(<4 x i32> %x) {
define <4 x i32> @srem0elt_vec(<4 x i32> %x) {
; CHECK-LABEL: srem0elt_vec:
; CHECK: # BB#0:
-; CHECK-NEXT: movl $-2, %eax
-; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: retq
%rem = srem <4 x i32> <i32 -11, i32 -12, i32 -13, i32 -14>, <i32 -3, i32 -3, i32 0, i32 2>
ret <4 x i32> %rem
diff --git a/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll b/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll
index 01a91936e56..f0c9069d8c7 100644
--- a/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll
+++ b/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll
@@ -184,27 +184,15 @@ entry:
ret <16 x i16> %a0
}
-; TODO: The div-by-0 lanes are folded away, so we use scalar ops. Would it be better to keep this in the vector unit?
+; Div-by-0 in any lane is UB.
define <4 x i32> @sdiv_non_splat(<4 x i32> %x) {
; SSE-LABEL: sdiv_non_splat:
; SSE: # BB#0:
-; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: shrl $31, %ecx
-; SSE-NEXT: addl %eax, %ecx
-; SSE-NEXT: sarl %ecx
-; SSE-NEXT: movd %ecx, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sdiv_non_splat:
; AVX: # BB#0:
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $31, %ecx
-; AVX-NEXT: addl %eax, %ecx
-; AVX-NEXT: sarl %ecx
-; AVX-NEXT: vmovd %ecx, %xmm0
; AVX-NEXT: retq
%y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0>
ret <4 x i32> %y
OpenPOWER on IntegriCloud