diff options
| -rw-r--r-- | llvm/include/llvm/CodeGen/SelectionDAG.h | 3 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 36 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/div-rem-simplify.ll | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll | 14 | 
5 files changed, 35 insertions, 33 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index cd4567cf105..4e75c0f0ef3 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -740,6 +740,9 @@ public:      return getNode(ISD::CALLSEQ_END, DL, NodeTys, Ops);    } +  /// Return true if the result of this operation is always undefined. +  bool isUndef(unsigned Opcode, ArrayRef<SDValue> Ops); +    /// Return an UNDEF node. UNDEF does not have a useful SDLoc.    SDValue getUNDEF(EVT VT) {      return getNode(ISD::UNDEF, SDLoc(), VT); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 30330e4588c..788b52dabe9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2524,15 +2524,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {    EVT VT = N->getValueType(0);    SDLoc DL(N); -  // X / undef -> undef -  // X % undef -> undef -  if (N1.isUndef()) -    return N1; - -  // X / 0 --> undef -  // X % 0 --> undef -  // We don't need to preserve faults! -  if (isNullConstantOrNullSplatConstant(N1)) +  if (DAG.isUndef(N->getOpcode(), {N0, N1}))      return DAG.getUNDEF(VT);    // undef / X -> 0 diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d8acd3cfe97..4f33eef7513 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3695,12 +3695,6 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,    if (Cst1->isOpaque() || Cst2->isOpaque())      return SDValue(); -  // Division/remainder with a zero divisor is undefined behavior. -  if ((Opcode == ISD::SDIV || Opcode == ISD::UDIV || -       Opcode == ISD::SREM || Opcode == ISD::UREM) && -      Cst2->isNullValue()) -    return getUNDEF(VT); -    std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(),                                              Cst2->getAPIntValue());    if (!Folded.second) @@ -3728,6 +3722,30 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,                            GA->getOffset() + uint64_t(Offset));  } +bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { +  switch (Opcode) { +  case ISD::SDIV: +  case ISD::UDIV: +  case ISD::SREM: +  case ISD::UREM: { +    // If a divisor is zero/undef or any element of a divisor vector is +    // zero/undef, the whole op is undef. +    assert(Ops.size() == 2 && "Div/rem should have 2 operands"); +    SDValue Divisor = Ops[1]; +    if (Divisor.isUndef() || isNullConstant(Divisor)) +      return true; + +    return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) && +           any_of(Divisor->op_values(), +                  [](SDValue V) { return V.isUndef() || isNullConstant(V); }); +    // TODO: Handle signed overflow. +  } +  // TODO: Handle oversized shifts. +  default: +    return false; +  } +} +  SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,                                               EVT VT, SDNode *Cst1,                                               SDNode *Cst2) { @@ -3737,6 +3755,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,    if (Opcode >= ISD::BUILTIN_OP_END)      return SDValue(); +  if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)})) +    return getUNDEF(VT); +    // Handle the case of two scalars.    if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {      if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) { @@ -3804,6 +3825,9 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,    if (Opcode >= ISD::BUILTIN_OP_END)      return SDValue(); +  if (isUndef(Opcode, Ops)) +    return getUNDEF(VT); +    // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?    if (!VT.isVector())      return SDValue(); diff --git a/llvm/test/CodeGen/X86/div-rem-simplify.ll b/llvm/test/CodeGen/X86/div-rem-simplify.ll index c0ce3e57505..04cf439dc15 100644 --- a/llvm/test/CodeGen/X86/div-rem-simplify.ll +++ b/llvm/test/CodeGen/X86/div-rem-simplify.ll @@ -152,7 +152,6 @@ define <4 x i32> @sel_sdiv0_vec(i1 %cond) {  define <4 x i32> @sdiv0elt_vec(<4 x i32> %x) {  ; CHECK-LABEL: sdiv0elt_vec:  ; CHECK:       # BB#0: -; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <u,12,u,4294967292>  ; CHECK-NEXT:    retq    %zero = and <4 x i32> %x, <i32 0, i32 0, i32 0, i32 0>    %some_ones = or <4 x i32> %zero, <i32 0, i32 -1, i32 0, i32 3> @@ -163,7 +162,6 @@ define <4 x i32> @sdiv0elt_vec(<4 x i32> %x) {  define <4 x i32> @udiv0elt_vec(<4 x i32> %x) {  ; CHECK-LABEL: udiv0elt_vec:  ; CHECK:       # BB#0: -; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <u,4,3,u>  ; CHECK-NEXT:    retq    %div = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, <i32 0, i32 3, i32 4, i32 0>    ret <4 x i32> %div @@ -172,7 +170,6 @@ define <4 x i32> @udiv0elt_vec(<4 x i32> %x) {  define <4 x i32> @urem0elt_vec(<4 x i32> %x) {  ; CHECK-LABEL: urem0elt_vec:  ; CHECK:       # BB#0: -; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <u,u,u,2>  ; CHECK-NEXT:    retq    %zero = and <4 x i32> %x, <i32 0, i32 0, i32 0, i32 0>    %some_ones = or <4 x i32> %zero, <i32 0, i32 0, i32 0, i32 3> @@ -183,8 +180,6 @@ define <4 x i32> @urem0elt_vec(<4 x i32> %x) {  define <4 x i32> @srem0elt_vec(<4 x i32> %x) {  ; CHECK-LABEL: srem0elt_vec:  ; CHECK:       # BB#0: -; CHECK-NEXT:    movl $-2, %eax -; CHECK-NEXT:    movd %eax, %xmm0  ; CHECK-NEXT:    retq    %rem = srem <4 x i32> <i32 -11, i32 -12, i32 -13, i32 -14>, <i32 -3, i32 -3, i32 0, i32 2>    ret <4 x i32> %rem diff --git a/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll b/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll index 01a91936e56..f0c9069d8c7 100644 --- a/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll +++ b/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll @@ -184,27 +184,15 @@ entry:    ret <16 x i16> %a0  } -; TODO: The div-by-0 lanes are folded away, so we use scalar ops. Would it be better to keep this in the vector unit? +; Div-by-0 in any lane is UB.  define <4 x i32> @sdiv_non_splat(<4 x i32> %x) {  ; SSE-LABEL: sdiv_non_splat:  ; SSE:       # BB#0: -; SSE-NEXT:    movd %xmm0, %eax -; SSE-NEXT:    movl %eax, %ecx -; SSE-NEXT:    shrl $31, %ecx -; SSE-NEXT:    addl %eax, %ecx -; SSE-NEXT:    sarl %ecx -; SSE-NEXT:    movd %ecx, %xmm0  ; SSE-NEXT:    retq  ;  ; AVX-LABEL: sdiv_non_splat:  ; AVX:       # BB#0: -; AVX-NEXT:    vmovd %xmm0, %eax -; AVX-NEXT:    movl %eax, %ecx -; AVX-NEXT:    shrl $31, %ecx -; AVX-NEXT:    addl %eax, %ecx -; AVX-NEXT:    sarl %ecx -; AVX-NEXT:    vmovd %ecx, %xmm0  ; AVX-NEXT:    retq    %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0>    ret <4 x i32> %y  | 

