diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-02-21 02:58:00 +0000 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-02-21 02:58:00 +0000 |
commit | 42e229e13082a5dfb39aa43b40a8d7c75df92770 (patch) | |
tree | 1be42eae845fbe619b9b552aa37c91c6b2f85ab2 | |
parent | 500606f270ffcbdc199ee581fb2147700a7bb139 (diff) | |
download | bcm5719-llvm-42e229e13082a5dfb39aa43b40a8d7c75df92770.tar.gz bcm5719-llvm-42e229e13082a5dfb39aa43b40a8d7c75df92770.zip |
[AMDGPU] fix commuted case of sub combine
Differential Revision: https://reviews.llvm.org/D58481
llvm-svn: 354543
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll | 28 |
2 files changed, 29 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 44edd6d1295..96eb619497b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8616,14 +8616,10 @@ SDValue SITargetLowering::performSubCombine(SDNode *N, SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - unsigned Opc = LHS.getOpcode(); - if (Opc != ISD::SUBCARRY) - std::swap(RHS, LHS); - if (LHS.getOpcode() == ISD::SUBCARRY) { // sub (subcarry x, 0, cc), y => subcarry x, y, cc auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1)); - if (!C || C->getZExtValue() != 0) + if (!C || !C->isNullValue()) return SDValue(); SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) }; return DAG.getNode(ISD::SUBCARRY, SDLoc(N), LHS->getVTList(), Args); diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll index 5b3ad15fb1a..ae30ad37c37 100644 --- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -127,6 +127,34 @@ bb: ret void } +; GCN-LABEL: {{^}}sub_sube_commuted: +; GCN-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: buffer_load_dword [[V:v[0-9]+]], +; GCN: v_subbrev_u32_e{{32|64}} [[SUBB:v[0-9]+]], {{[^,]+}}, 0, [[V]], [[CC]] +; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, s{{[0-9]+}}, [[SUBB]] +; GCN: v_add_i32_e32 {{.*}}, 0x64, [[SUB]] + +; GFX9-LABEL: {{^}}sub_sube_commuted: +; GFX9-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GFX9-DAG: global_load_dword [[V:v[0-9]+]], +; GFX9: v_subbrev_co_u32_e{{32|64}} [[SUBB:v[0-9]+]], {{[^,]+}}, 0, [[V]], [[CC]] +; GFX9: v_sub_u32_e32 [[SUB:v[0-9]+]], s{{[0-9]+}}, [[SUBB]] +; GFX9: v_add_u32_e32 {{.*}}, 0x64, [[SUB]] +define amdgpu_kernel void @sub_sube_commuted(i32 addrspace(1)* nocapture %arg, i32 %a) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = sext i1 %cmp to i32 + %adde = add i32 %v, %ext + %sub = sub i32 %adde, %a + %sub2 = sub i32 100, %sub + store i32 %sub2, i32 addrspace(1)* %gep, align 4 + ret void +} + ; GCN-LABEL: {{^}}sube_sub: ; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]] |