diff options
-rw-r--r-- | llvm/include/llvm/CodeGen/SelectionDAG.h | 3 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 36 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/div-rem-simplify.ll | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll | 14 |
5 files changed, 35 insertions, 33 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index cd4567cf105..4e75c0f0ef3 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -740,6 +740,9 @@ public: return getNode(ISD::CALLSEQ_END, DL, NodeTys, Ops); } + /// Return true if the result of this operation is always undefined. + bool isUndef(unsigned Opcode, ArrayRef<SDValue> Ops); + /// Return an UNDEF node. UNDEF does not have a useful SDLoc. SDValue getUNDEF(EVT VT) { return getNode(ISD::UNDEF, SDLoc(), VT); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 30330e4588c..788b52dabe9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2524,15 +2524,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); SDLoc DL(N); - // X / undef -> undef - // X % undef -> undef - if (N1.isUndef()) - return N1; - - // X / 0 --> undef - // X % 0 --> undef - // We don't need to preserve faults! - if (isNullConstantOrNullSplatConstant(N1)) + if (DAG.isUndef(N->getOpcode(), {N0, N1})) return DAG.getUNDEF(VT); // undef / X -> 0 diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d8acd3cfe97..4f33eef7513 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3695,12 +3695,6 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (Cst1->isOpaque() || Cst2->isOpaque()) return SDValue(); - // Division/remainder with a zero divisor is undefined behavior. - if ((Opcode == ISD::SDIV || Opcode == ISD::UDIV || - Opcode == ISD::SREM || Opcode == ISD::UREM) && - Cst2->isNullValue()) - return getUNDEF(VT); - std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(), Cst2->getAPIntValue()); if (!Folded.second) @@ -3728,6 +3722,30 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, GA->getOffset() + uint64_t(Offset)); } +bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { + switch (Opcode) { + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: { + // If a divisor is zero/undef or any element of a divisor vector is + // zero/undef, the whole op is undef. + assert(Ops.size() == 2 && "Div/rem should have 2 operands"); + SDValue Divisor = Ops[1]; + if (Divisor.isUndef() || isNullConstant(Divisor)) + return true; + + return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) && + any_of(Divisor->op_values(), + [](SDValue V) { return V.isUndef() || isNullConstant(V); }); + // TODO: Handle signed overflow. + } + // TODO: Handle oversized shifts. + default: + return false; + } +} + SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, SDNode *Cst1, SDNode *Cst2) { @@ -3737,6 +3755,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); + if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)})) + return getUNDEF(VT); + // Handle the case of two scalars. if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) { if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) { @@ -3804,6 +3825,9 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); + if (isUndef(Opcode, Ops)) + return getUNDEF(VT); + // We can only fold vectors - maybe merge with FoldConstantArithmetic someday? if (!VT.isVector()) return SDValue(); diff --git a/llvm/test/CodeGen/X86/div-rem-simplify.ll b/llvm/test/CodeGen/X86/div-rem-simplify.ll index c0ce3e57505..04cf439dc15 100644 --- a/llvm/test/CodeGen/X86/div-rem-simplify.ll +++ b/llvm/test/CodeGen/X86/div-rem-simplify.ll @@ -152,7 +152,6 @@ define <4 x i32> @sel_sdiv0_vec(i1 %cond) { define <4 x i32> @sdiv0elt_vec(<4 x i32> %x) { ; CHECK-LABEL: sdiv0elt_vec: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = <u,12,u,4294967292> ; CHECK-NEXT: retq %zero = and <4 x i32> %x, <i32 0, i32 0, i32 0, i32 0> %some_ones = or <4 x i32> %zero, <i32 0, i32 -1, i32 0, i32 3> @@ -163,7 +162,6 @@ define <4 x i32> @sdiv0elt_vec(<4 x i32> %x) { define <4 x i32> @udiv0elt_vec(<4 x i32> %x) { ; CHECK-LABEL: udiv0elt_vec: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = <u,4,3,u> ; CHECK-NEXT: retq %div = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, <i32 0, i32 3, i32 4, i32 0> ret <4 x i32> %div @@ -172,7 +170,6 @@ define <4 x i32> @udiv0elt_vec(<4 x i32> %x) { define <4 x i32> @urem0elt_vec(<4 x i32> %x) { ; CHECK-LABEL: urem0elt_vec: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = <u,u,u,2> ; CHECK-NEXT: retq %zero = and <4 x i32> %x, <i32 0, i32 0, i32 0, i32 0> %some_ones = or <4 x i32> %zero, <i32 0, i32 0, i32 0, i32 3> @@ -183,8 +180,6 @@ define <4 x i32> @urem0elt_vec(<4 x i32> %x) { define <4 x i32> @srem0elt_vec(<4 x i32> %x) { ; CHECK-LABEL: srem0elt_vec: ; CHECK: # BB#0: -; CHECK-NEXT: movl $-2, %eax -; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: retq %rem = srem <4 x i32> <i32 -11, i32 -12, i32 -13, i32 -14>, <i32 -3, i32 -3, i32 0, i32 2> ret <4 x i32> %rem diff --git a/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll b/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll index 01a91936e56..f0c9069d8c7 100644 --- a/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll +++ b/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll @@ -184,27 +184,15 @@ entry: ret <16 x i16> %a0 } -; TODO: The div-by-0 lanes are folded away, so we use scalar ops. Would it be better to keep this in the vector unit? +; Div-by-0 in any lane is UB. define <4 x i32> @sdiv_non_splat(<4 x i32> %x) { ; SSE-LABEL: sdiv_non_splat: ; SSE: # BB#0: -; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $31, %ecx -; SSE-NEXT: addl %eax, %ecx -; SSE-NEXT: sarl %ecx -; SSE-NEXT: movd %ecx, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: sdiv_non_splat: ; AVX: # BB#0: -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: shrl $31, %ecx -; AVX-NEXT: addl %eax, %ecx -; AVX-NEXT: sarl %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 ; AVX-NEXT: retq %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0> ret <4 x i32> %y |