diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-11-14 13:33:33 +0530 |
|---|---|---|
| committer | Matt Arsenault <arsenm2@gmail.com> | 2019-11-15 13:43:42 +0530 |
| commit | 69fcfb7d3597e0cdb5554b4e672e9032b411b167 (patch) | |
| tree | e256a710df2112667576abf7d2be37a20a9f4e25 | |
| parent | bc276c6379fd0a7bca78d53026e346ea0c8b7cff (diff) | |
| download | bcm5719-llvm-69fcfb7d3597e0cdb5554b4e672e9032b411b167.tar.gz bcm5719-llvm-69fcfb7d3597e0cdb5554b4e672e9032b411b167.zip | |
AMDGPU: Try to commute sub of boolean ext
Avoids another regression in a future patch.
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 29 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll | 43 |
2 files changed, 69 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3e0b580c109..a11926ec2d7 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9586,7 +9586,16 @@ SDValue SITargetLowering::performSubCombine(SDNode *N, // sub x, zext (setcc) => subcarry x, 0, setcc // sub x, sext (setcc) => addcarry x, 0, setcc - unsigned Opc = RHS.getOpcode(); + + bool Commuted = false; + unsigned Opc = LHS.getOpcode(); + if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND || + Opc == ISD::ANY_EXTEND) { + std::swap(RHS, LHS); + Commuted = true; + } + + Opc = RHS.getOpcode(); switch (Opc) { default: break; case ISD::ZERO_EXTEND: @@ -9598,8 +9607,22 @@ SDValue SITargetLowering::performSubCombine(SDNode *N, if (!isBoolSGPR(Cond)) break; SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1); - SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond }; - Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY; + SDValue Zero = DAG.getConstant(0, SL, MVT::i32); + SDValue Args[3]; + Args[2] = Cond; + + if (Commuted) { + // sub zext (setcc), x => addcarry 0, x, setcc + // sub sext (setcc), x => subcarry 0, x, setcc + Args[0] = Zero; + Args[1] = LHS; + Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::SUBCARRY : ISD::ADDCARRY; + } else { + Args[0] = LHS; + Args[1] = Zero; + Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY; + } + return DAG.getNode(Opc, SL, VTList, Args); } } diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll index ed2cb861397..99d2bc9863f 100644 --- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -311,6 +311,49 @@ bb: ret void } +; Check case where sub is commuted with zext +; GCN-LABEL: {{^}}sub_zext_setcc_commute: +; GCN: v_cmp_gt_u32_e32 vcc, v +; GCN-NOT: vcc +; GCN: v_addc_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc +; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADDC]] +define amdgpu_kernel void @sub_zext_setcc_commute(i32 addrspace(1)* nocapture %arg, i32 %a, i32%b) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = zext i1 %cmp to i32 + %adde = sub i32 %v, %ext + %sub = sub i32 %a, %adde + %sub2 = sub i32 %sub, %b + store i32 %sub2, i32 addrspace(1)* %gep, align 4 + ret void +} + +; Check case where sub is commuted with sext +; GCN-LABEL: {{^}}sub_sext_setcc_commute: +; GCN: v_cmp_gt_u32_e32 vcc, v +; GCN-NOT: vcc +; GCN: v_subb_u32_e32 [[SUBB:v[0-9]+]], vcc, 0, v{{[0-9]+}}, vcc +; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[SUBB]] +; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADD]] +define amdgpu_kernel void @sub_sext_setcc_commute(i32 addrspace(1)* nocapture %arg, i32 %a, i32%b) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = sext i1 %cmp to i32 + %adde = sub i32 %v, %ext + %sub = sub i32 %a, %adde + %sub2 = sub i32 %sub, %b + store i32 %sub2, i32 addrspace(1)* %gep, align 4 + ret void +} + declare i1 @llvm.amdgcn.class.f32(float, i32) #0 declare i32 @llvm.amdgcn.workitem.id.x() #0 |

