summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-11-11 14:46:38 +0530
committerMatt Arsenault <arsenm2@gmail.com>2019-11-15 13:43:47 +0530
commit31479d868e2ace47021324c387d0460cfa737296 (patch)
tree1f07e07147010aafd075c250dfc57bd55a468c56
parent69fcfb7d3597e0cdb5554b4e672e9032b411b167 (diff)
downloadbcm5719-llvm-31479d868e2ace47021324c387d0460cfa737296.tar.gz
bcm5719-llvm-31479d868e2ace47021324c387d0460cfa737296.zip
AMDGPU: Change boolean content type to 0 or 1
The usage of target boolean checks is overly inflexible, since sext and zext of a compare are equally cheap. The choice is arbitrary, but using 0/1 to some degree is the choice of lower resistance since that's what most targets use. This enables a few combines that don't bother to support ZeroOrNegativeOneBooleanContent.
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp10
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll9
-rw-r--r--llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll14
-rw-r--r--llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll4
9 files changed, 33 insertions, 21 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index cc8c36a93fe..f669c98969b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -451,9 +451,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
- setBooleanContents(ZeroOrNegativeOneBooleanContent);
- setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
-
setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 697ea6607d5..6196be8ec83 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -62,6 +62,9 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
computeRegisterProperties(Subtarget->getRegisterInfo());
// Legalize loads and stores to the private address space.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a11926ec2d7..e26f0e3b611 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -160,6 +160,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
computeRegisterProperties(Subtarget->getRegisterInfo());
+ // The boolean content concept here is too inflexible. Compares only ever
+ // really produce a 1-bit result. Any copy/extend from these will turn into a
+ // select, and zext/1 or sext/-1 are equally cheap. Arbitrarily choose 0/1, as
+ // it's what most targets use.
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent);
+
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v3i32, Custom);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 3b0a41b9960..3737d0a7b41 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -542,7 +542,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
RC == &AMDGPU::SReg_32RegClass) {
if (SrcReg == AMDGPU::SCC) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
- .addImm(-1)
+ .addImm(1)
.addImm(0);
return;
}
@@ -840,7 +840,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
- .addImm(-1)
+ .addImm(1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
@@ -855,7 +855,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(0)
- .addImm(-1);
+ .addImm(1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
.addReg(FalseReg)
@@ -900,7 +900,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
.addImm(0);
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
- .addImm(-1)
+ .addImm(1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
@@ -919,7 +919,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
: AMDGPU::S_CSELECT_B64), SReg)
.addImm(0)
- .addImm(-1);
+ .addImm(1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
.addReg(FalseReg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
index 0f259fcb895..f135b67e43a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
@@ -9,7 +9,7 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
; GCN-NEXT: ; implicit-def: $vcc_hi
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_eq_u32 s0, 0
-; GCN-NEXT: s_cselect_b32 s0, -1, 0
+; GCN-NEXT: s_cselect_b32 s0, 1, 0
; GCN-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
; GCN-NEXT: s_or_b32 s0, s0, s1
; GCN-NEXT: v_mov_b32_e32 v0, s0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll
index e6d46f967cd..fc260b57ceb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll
@@ -8,7 +8,7 @@ define amdgpu_kernel void @test_wave64(i32 %arg0, [8 x i32], i64 %saved) {
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_eq_u32 s2, 0
-; GCN-NEXT: s_cselect_b32 s2, -1, 0
+; GCN-NEXT: s_cselect_b32 s2, 1, 0
; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GCN-NEXT: v_mov_b32_e32 v0, s0
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
index f93c96bc553..d38452a3a22 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
@@ -13,7 +13,14 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; GCN-LABEL: {{^}}work_item_info:
; GCN-NOT: v0
-; GCN: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v{{[0-9]+}}, v0
+; GCN: s_load_dword [[IN:s[0-9]+]]
+; GCN-NOT: v0
+
+; GCN-ALLOCA: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v{{[0-9]+}}, v0
+
+; GCN-PROMOTE: v_cmp_eq_u32_e64 vcc, [[IN]], 1
+; GCN-PROMOTE-NEXT: v_addc_u32_e32 [[RESULT:v[0-9]+]], vcc, 0, v0, vcc
+
; GCN: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
index 99d2bc9863f..37ae4d2d5c2 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
; GCN-LABEL: {{^}}add1:
; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
@@ -130,16 +130,14 @@ bb:
; GCN-LABEL: {{^}}sub_sube_commuted:
; GCN-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-DAG: buffer_load_dword [[V:v[0-9]+]],
-; GCN: v_subbrev_u32_e{{32|64}} [[SUBB:v[0-9]+]], {{[^,]+}}, 0, [[V]], [[CC]]
-; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, s{{[0-9]+}}, [[SUBB]]
-; GCN: v_add_i32_e32 {{.*}}, 0x64, [[SUB]]
+; GCN: v_addc_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
+; GCN: v_add_i32_e32 {{.*}}, 0x64, [[ADDC]]
; GFX9-LABEL: {{^}}sub_sube_commuted:
; GFX9-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GFX9-DAG: global_load_dword [[V:v[0-9]+]],
-; GFX9: v_subbrev_co_u32_e{{32|64}} [[SUBB:v[0-9]+]], {{[^,]+}}, 0, [[V]], [[CC]]
-; GFX9: v_sub_u32_e32 [[SUB:v[0-9]+]], s{{[0-9]+}}, [[SUBB]]
-; GFX9: v_add_u32_e32 {{.*}}, 0x64, [[SUB]]
+; GFX9: v_addc_co_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
+; GFX9: v_add_u32_e32 {{.*}}, 0x64, [[ADDC]]
define amdgpu_kernel void @sub_sube_commuted(i32 addrspace(1)* nocapture %arg, i32 %a) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
index a2b68cc4214..eb95efd1860 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll
@@ -306,10 +306,10 @@ entry:
; GCN-LABEL: {{^}}bit128_extelt:
; GCN-NOT: buffer_
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 1, 0,
+; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1
; GCN-DAG: v_mov_b32_e32 [[LASTIDX:v[0-9]+]], 0x7f
; GCN-DAG: v_cmp_ne_u32_e32 [[CL:[^,]+]], s{{[0-9]+}}, [[LASTIDX]]
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, v{{[0-9]+}}, [[CL]]
+; GCN-DAG: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, [[V1]], [[CL]]
; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[VL]]
; GCN: store_dword v[{{[0-9:]+}}], [[RES]]
define amdgpu_kernel void @bit128_extelt(i32 addrspace(1)* %out, i32 %sel) {
OpenPOWER on IntegriCloud