diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 19 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/combine-addo.ll | 33 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/combine-subo.ll | 36 | 
3 files changed, 26 insertions, 62 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a839073e63d..2e4c5933b9e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2443,8 +2443,6 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1);    EVT VT = N0.getValueType(); -  if (VT.isVector()) -    return SDValue();    EVT CarryVT = N->getValueType(1);    SDLoc DL(N); @@ -2455,13 +2453,12 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {                       DAG.getUNDEF(CarryVT));    // canonicalize constant to RHS. -  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); -  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); -  if (N0C && !N1C) +  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && +      !DAG.isConstantIntBuildVectorOrConstantInt(N1))      return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);    // fold (uaddo x, 0) -> x + no carry out -  if (isNullConstant(N1)) +  if (isNullOrNullSplat(N1))      return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));    // If it cannot overflow, transform into an add. @@ -2488,7 +2485,9 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {  }  SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { -  auto VT = N0.getValueType(); +  EVT VT = N0.getValueType(); +  if (VT.isVector()) +    return SDValue();    // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)    // If Y + 1 cannot overflow. @@ -2952,8 +2951,6 @@ SDValue DAGCombiner::visitUSUBO(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1);    EVT VT = N0.getValueType(); -  if (VT.isVector()) -    return SDValue();    EVT CarryVT = N->getValueType(1);    SDLoc DL(N); @@ -2969,11 +2966,11 @@ SDValue DAGCombiner::visitUSUBO(SDNode *N) {                       DAG.getConstant(0, DL, CarryVT));    // fold (usubo x, 0) -> x + no borrow -  if (isNullConstant(N1)) +  if (isNullOrNullSplat(N1))      return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));    // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow -  if (isAllOnesConstant(N0)) +  if (isAllOnesOrAllOnesSplat(N0))      return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),                       DAG.getConstant(0, DL, CarryVT)); diff --git a/llvm/test/CodeGen/X86/combine-addo.ll b/llvm/test/CodeGen/X86/combine-addo.ll index 23e5366f5e9..e93254e052a 100644 --- a/llvm/test/CodeGen/X86/combine-addo.ll +++ b/llvm/test/CodeGen/X86/combine-addo.ll @@ -62,18 +62,10 @@ define i32 @combine_uadd_zero(i32 %a0, i32 %a1) {  define <4 x i32> @combine_vec_uadd_zero(<4 x i32> %a0, <4 x i32> %a1) {  ; SSE-LABEL: combine_vec_uadd_zero:  ; SSE:       # %bb.0: -; SSE-NEXT:    movdqa %xmm0, %xmm2 -; SSE-NEXT:    pmaxud %xmm0, %xmm0 -; SSE-NEXT:    pcmpeqd %xmm2, %xmm0 -; SSE-NEXT:    blendvps %xmm0, %xmm2, %xmm1 -; SSE-NEXT:    movaps %xmm1, %xmm0  ; SSE-NEXT:    retq  ;  ; AVX-LABEL: combine_vec_uadd_zero:  ; AVX:       # %bb.0: -; AVX-NEXT:    vpmaxud %xmm0, %xmm0, %xmm2 -; AVX-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm2 -; AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0  ; AVX-NEXT:    retq    %1 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer)    %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 @@ -108,24 +100,23 @@ define i32 @combine_uadd_not(i32 %a0, i32 %a1) {  define <4 x i32> @combine_vec_uadd_not(<4 x i32> %a0, <4 x i32> %a1) {  ; SSE-LABEL: combine_vec_uadd_not:  ; SSE:       # %bb.0: -; SSE-NEXT:    pcmpeqd %xmm2, %xmm2 -; SSE-NEXT:    pxor %xmm2, %xmm0 -; SSE-NEXT:    movdqa %xmm0, %xmm3 -; SSE-NEXT:    psubd %xmm2, %xmm3 -; SSE-NEXT:    pmaxud %xmm3, %xmm0 -; SSE-NEXT:    pcmpeqd %xmm3, %xmm0 -; SSE-NEXT:    blendvps %xmm0, %xmm3, %xmm1 +; SSE-NEXT:    pxor %xmm2, %xmm2 +; SSE-NEXT:    psubd %xmm0, %xmm2 +; SSE-NEXT:    movdqa {{.*#+}} xmm0 = [1,1,1,1] +; SSE-NEXT:    pmaxud %xmm2, %xmm0 +; SSE-NEXT:    pcmpeqd %xmm2, %xmm0 +; SSE-NEXT:    blendvps %xmm0, %xmm2, %xmm1  ; SSE-NEXT:    movaps %xmm1, %xmm0  ; SSE-NEXT:    retq  ;  ; AVX-LABEL: combine_vec_uadd_not:  ; AVX:       # %bb.0: -; AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT:    vpsubd %xmm2, %xmm0, %xmm2 -; AVX-NEXT:    vpmaxud %xmm0, %xmm2, %xmm0 -; AVX-NEXT:    vpcmpeqd %xmm0, %xmm2, %xmm0 -; AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT:    vpsubd %xmm0, %xmm2, %xmm0 +; AVX-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] +; AVX-NEXT:    vpmaxud %xmm2, %xmm0, %xmm2 +; AVX-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm2 +; AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0  ; AVX-NEXT:    retq    %1 = xor <4 x i32> %a0, <i32 -1, i32 -1, i32 -1, i32 -1>    %2 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>) diff --git a/llvm/test/CodeGen/X86/combine-subo.ll b/llvm/test/CodeGen/X86/combine-subo.ll index c162515c257..5113c95f920 100644 --- a/llvm/test/CodeGen/X86/combine-subo.ll +++ b/llvm/test/CodeGen/X86/combine-subo.ll @@ -62,18 +62,10 @@ define i32 @combine_usub_zero(i32 %a0, i32 %a1) {  define <4 x i32> @combine_vec_usub_zero(<4 x i32> %a0, <4 x i32> %a1) {  ; SSE-LABEL: combine_vec_usub_zero:  ; SSE:       # %bb.0: -; SSE-NEXT:    movdqa %xmm0, %xmm2 -; SSE-NEXT:    pminud %xmm0, %xmm0 -; SSE-NEXT:    pcmpeqd %xmm2, %xmm0 -; SSE-NEXT:    blendvps %xmm0, %xmm2, %xmm1 -; SSE-NEXT:    movaps %xmm1, %xmm0  ; SSE-NEXT:    retq  ;  ; AVX-LABEL: combine_vec_usub_zero:  ; AVX:       # %bb.0: -; AVX-NEXT:    vpminud %xmm0, %xmm0, %xmm2 -; AVX-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm2 -; AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0  ; AVX-NEXT:    retq    %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> zeroinitializer)    %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 @@ -138,20 +130,12 @@ define i32 @combine_usub_self(i32 %a0, i32 %a1) {  define <4 x i32> @combine_vec_usub_self(<4 x i32> %a0, <4 x i32> %a1) {  ; SSE-LABEL: combine_vec_usub_self:  ; SSE:       # %bb.0: -; SSE-NEXT:    movdqa %xmm0, %xmm2 -; SSE-NEXT:    psubd %xmm0, %xmm2 -; SSE-NEXT:    pminud %xmm2, %xmm0 -; SSE-NEXT:    pcmpeqd %xmm2, %xmm0 -; SSE-NEXT:    blendvps %xmm0, %xmm2, %xmm1 -; SSE-NEXT:    movaps %xmm1, %xmm0 +; SSE-NEXT:    xorps %xmm0, %xmm0  ; SSE-NEXT:    retq  ;  ; AVX-LABEL: combine_vec_usub_self:  ; AVX:       # %bb.0: -; AVX-NEXT:    vpsubd %xmm0, %xmm0, %xmm2 -; AVX-NEXT:    vpminud %xmm0, %xmm2, %xmm0 -; AVX-NEXT:    vpcmpeqd %xmm0, %xmm2, %xmm0 -; AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0  ; AVX-NEXT:    retq    %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a0, <4 x i32> %a0)    %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0 @@ -183,22 +167,14 @@ define i32 @combine_usub_negone(i32 %a0, i32 %a1) {  define <4 x i32> @combine_vec_usub_negone(<4 x i32> %a0, <4 x i32> %a1) {  ; SSE-LABEL: combine_vec_usub_negone:  ; SSE:       # %bb.0: -; SSE-NEXT:    movdqa %xmm0, %xmm2 -; SSE-NEXT:    pcmpeqd %xmm0, %xmm0 -; SSE-NEXT:    pxor %xmm0, %xmm2 -; SSE-NEXT:    pminud %xmm2, %xmm0 -; SSE-NEXT:    pcmpeqd %xmm2, %xmm0 -; SSE-NEXT:    blendvps %xmm0, %xmm2, %xmm1 -; SSE-NEXT:    movaps %xmm1, %xmm0 +; SSE-NEXT:    pcmpeqd %xmm1, %xmm1 +; SSE-NEXT:    pxor %xmm1, %xmm0  ; SSE-NEXT:    retq  ;  ; AVX-LABEL: combine_vec_usub_negone:  ; AVX:       # %bb.0: -; AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT:    vpminud %xmm2, %xmm0, %xmm2 -; AVX-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm2 -; AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0  ; AVX-NEXT:    retq    %1 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a0)    %2 = extractvalue {<4 x i32>, <4 x i1>} %1, 0  | 

