diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-11-12 09:11:00 +0530 |
|---|---|---|
| committer | Matt Arsenault <arsenm2@gmail.com> | 2019-11-13 07:13:58 +0530 |
| commit | 9d7bccab663f5e71320fb10727abcfa6c2dab046 (patch) | |
| tree | 1d7723b4c5b1c502fb5284f468b76ca0c4112ecc | |
| parent | 4b472139513ba460595804f8113497844b41fbcc (diff) | |
| download | bcm5719-llvm-9d7bccab663f5e71320fb10727abcfa6c2dab046.tar.gz bcm5719-llvm-9d7bccab663f5e71320fb10727abcfa6c2dab046.zip | |
AMDGPU: Extend add x, (ext setcc) combine to sub
This is the same as the add case, but inverts the operation type.
This avoids regressions in a future patch.
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 22 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll | 74 |
2 files changed, 96 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 209c17a117a..3e0b580c109 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9552,6 +9552,8 @@ SDValue SITargetLowering::performAddCombine(SDNode *N, case ISD::SIGN_EXTEND: case ISD::ANY_EXTEND: { auto Cond = RHS.getOperand(0); + // If this won't be a real VOPC output, we would still need to insert an + // extra instruction anyway. if (!isBoolSGPR(Cond)) break; SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1); @@ -9582,6 +9584,26 @@ SDValue SITargetLowering::performSubCombine(SDNode *N, SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + // sub x, zext (setcc) => subcarry x, 0, setcc + // sub x, sext (setcc) => addcarry x, 0, setcc + unsigned Opc = RHS.getOpcode(); + switch (Opc) { + default: break; + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ANY_EXTEND: { + auto Cond = RHS.getOperand(0); + // If this won't be a real VOPC output, we would still need to insert an + // extra instruction anyway. + if (!isBoolSGPR(Cond)) + break; + SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1); + SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond }; + Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY; + return DAG.getNode(Opc, SL, VTList, Args); + } + } + if (LHS.getOpcode() == ISD::SUBCARRY) { // sub (subcarry x, 0, cc), y => subcarry x, y, cc auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1)); diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll index ae30ad37c37..ed2cb861397 100644 --- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -237,6 +237,80 @@ bb: ret void } +; sub x, sext (setcc) => addcarry x, 0, setcc +; GCN-LABEL: {{^}}cmp_sub_sext: +; GCN: v_cmp_gt_u32_e32 vcc, v +; GCN-NOT: vcc +; GCN: v_addc_u32_e32 [[RESULT:v[0-9]+]], vcc, 0, v{{[0-9]+}}, vcc +define amdgpu_kernel void @cmp_sub_sext(i32 addrspace(1)* nocapture %arg) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = sext i1 %cmp to i32 + %add = sub i32 %v, %ext + store i32 %add, i32 addrspace(1)* %gep, align 4 + ret void +} + +; sub x, zext (setcc) => subcarry x, 0, setcc +; GCN-LABEL: {{^}}cmp_sub_zext: +; GCN: v_cmp_gt_u32_e32 vcc, v +; GCN-NOT: vcc +; GCN: v_subbrev_u32_e32 [[RESULT:v[0-9]+]], vcc, 0, v{{[0-9]+}}, vcc +define amdgpu_kernel void @cmp_sub_zext(i32 addrspace(1)* nocapture %arg) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = zext i1 %cmp to i32 + %add = sub i32 %v, %ext + store i32 %add, i32 addrspace(1)* %gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}sub_addcarry: +; GCN: v_cmp_gt_u32_e32 vcc, v +; GCN-NOT: vcc +; GCN: v_addc_u32_e32 [[ADDC:v[0-9]+]], vcc, 0, v{{[0-9]+}}, vcc +; GCN-NOT: vcc +; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, +define amdgpu_kernel void @sub_addcarry(i32 addrspace(1)* nocapture %arg, i32 %a) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = zext i1 %cmp to i32 + %adde = add i32 %v, %ext + %add2 = sub i32 %adde, %a + store i32 %add2, i32 addrspace(1)* %gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}sub_subcarry: +; GCN: v_cmp_gt_u32_e32 vcc, v +; GCN-NOT: vcc +; GCN: v_subb_u32_e32 [[RESULT:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc +define amdgpu_kernel void @sub_subcarry(i32 addrspace(1)* nocapture %arg, i32 %a) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = zext i1 %cmp to i32 + %adde = sub i32 %v, %ext + %add2 = sub i32 %adde, %a + store i32 %add2, i32 addrspace(1)* %gep, align 4 + ret void +} + declare i1 @llvm.amdgcn.class.f32(float, i32) #0 declare i32 @llvm.amdgcn.workitem.id.x() #0 |

