summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-11-01 23:14:20 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-11-01 23:14:20 +0000
commit663ab8c1190c78a534c4bce1d6c8093537e06a72 (patch)
tree8f3f737865ad46fb80dd9d7249f0ede72fda6348 /llvm
parentcfadbd947808e81f760f344477121a7028ff1edc (diff)
downloadbcm5719-llvm-663ab8c1190c78a534c4bce1d6c8093537e06a72.tar.gz
bcm5719-llvm-663ab8c1190c78a534c4bce1d6c8093537e06a72.zip
AMDGPU: Use brev for materializing SGPR constants
This is already done with VGPR immediates and saves 4 bytes. llvm-svn: 285765
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp40
-rw-r--r--llvm/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll63
-rw-r--r--llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll5
-rw-r--r--llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/fneg-fabs.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.round.ll2
7 files changed, 102 insertions, 22 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 26bc2b4e35e..7f9e9cded63 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -206,6 +206,18 @@ static bool isKImmOrKUImmOperand(const SIInstrInfo *TII,
return false;
}
+/// \returns true if the constant in \p Src should be replaced with a bitreverse
+/// of an inline immediate.
+static bool isReverseInlineImm(const SIInstrInfo *TII,
+ const MachineOperand &Src,
+ int32_t &ReverseImm) {
+ if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src, 4))
+ return false;
+
+ ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
+ return ReverseImm >= -16 && ReverseImm <= 64;
+}
+
/// Copy implicit register operands from specified instruction to this
/// instruction that are not part of the instruction definition.
static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF,
@@ -290,14 +302,11 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineOperand &Src = MI.getOperand(1);
if (Src.isImm() &&
TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) {
- int64_t Imm = Src.getImm();
- if (isInt<32>(Imm) && !TII->isInlineConstant(Src, 4)) {
- int32_t ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Imm));
- if (ReverseImm >= -16 && ReverseImm <= 64) {
- MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
- Src.setImm(ReverseImm);
- continue;
- }
+ int32_t ReverseImm;
+ if (isReverseInlineImm(TII, Src, ReverseImm)) {
+ MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
+ Src.setImm(ReverseImm);
+ continue;
}
}
}
@@ -374,10 +383,19 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
- const MachineOperand &Src = MI.getOperand(1);
+ const MachineOperand &Dst = MI.getOperand(0);
+ MachineOperand &Src = MI.getOperand(1);
- if (Src.isImm() && isKImmOperand(TII, Src))
- MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
+ if (Src.isImm() &&
+ TargetRegisterInfo::isPhysicalRegister(Dst.getReg())) {
+ int32_t ReverseImm;
+ if (isKImmOperand(TII, Src))
+ MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
+ else if (isReverseInlineImm(TII, Src, ReverseImm)) {
+ MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
+ Src.setImm(ReverseImm);
+ }
+ }
continue;
}
diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll b/llvm/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll
index 150e3430a5e..f7dc1a9d37e 100644
--- a/llvm/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll
@@ -156,3 +156,66 @@ define void @materialize_rev_1.0_i64(i64 addrspace(1)* %out) {
store i64 508, i64 addrspace(1)* %out
ret void
}
+
+; GCN-LABEL: {{^}}s_materialize_0_i32:
+; GCN: s_mov_b32 s{{[0-9]+}}, 0{{$}}
+define void @s_materialize_0_i32() {
+ call void asm sideeffect "; use $0", "s"(i32 0)
+ ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_1_i32:
+; GCN: s_mov_b32 s{{[0-9]+}}, 1{{$}}
+define void @s_materialize_1_i32() {
+ call void asm sideeffect "; use $0", "s"(i32 1)
+ ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_neg1_i32:
+; GCN: s_mov_b32 s{{[0-9]+}}, -1{{$}}
+define void @s_materialize_neg1_i32() {
+ call void asm sideeffect "; use $0", "s"(i32 -1)
+ ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_signbit_i32:
+; GCN: s_brev_b32 s{{[0-9]+}}, 1{{$}}
+define void @s_materialize_signbit_i32() {
+ call void asm sideeffect "; use $0", "s"(i32 -2147483648)
+ ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_rev_64_i32:
+; GCN: s_brev_b32 s{{[0-9]+}}, 64{{$}}
+define void @s_materialize_rev_64_i32() {
+ call void asm sideeffect "; use $0", "s"(i32 33554432)
+ ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_rev_65_i32:
+; GCN: s_mov_b32 s{{[0-9]+}}, 0x82000000{{$}}
+define void @s_materialize_rev_65_i32() {
+ call void asm sideeffect "; use $0", "s"(i32 -2113929216)
+ ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_rev_neg16_i32:
+; GCN: s_brev_b32 s{{[0-9]+}}, -16{{$}}
+define void @s_materialize_rev_neg16_i32() {
+ call void asm sideeffect "; use $0", "s"(i32 268435455)
+ ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_rev_neg17_i32:
+; GCN: s_mov_b32 s{{[0-9]+}}, 0xf7ffffff{{$}}
+define void @s_materialize_rev_neg17_i32() {
+ call void asm sideeffect "; use $0", "s"(i32 -134217729)
+ ret void
+}
+
+; GCN-LABEL: {{^}}s_materialize_rev_1.0_i32:
+; GCN: s_movk_i32 s{{[0-9]+}}, 0x1fc{{$}}
+define void @s_materialize_rev_1.0_i32() {
+ call void asm sideeffect "; use $0", "s"(i32 508)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
index b719d5a3978..9f01a660be3 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
@@ -1,8 +1,7 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
declare float @llvm.copysign.f32(float, float) nounwind readnone
declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind readnone
declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind readnone
@@ -15,7 +14,7 @@ declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind read
; VI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0x30
; GCN-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
; GCN-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
-; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; GCN-DAG: s_brev_b32 [[SCONST:s[0-9]+]], -2
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
index 9a1287d4baa..b34a4695387 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
declare double @llvm.copysign.f64(double, double) nounwind readnone
@@ -12,7 +12,7 @@ declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind r
; VI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
; GCN-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
-; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; GCN-DAG: s_brev_b32 [[SCONST:s[0-9]+]], -2
; GCN-DAG: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
; GCN-DAG: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
@@ -26,7 +26,7 @@ define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %s
; FUNC-LABEL: {{^}}test_copysign_f64_f32:
; GCN-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}
; GCN-DAG: s_load_dword s[[SSIGN:[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}
-; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; GCN-DAG: s_brev_b32 [[SCONST:s[0-9]+]], -2{{$}}
; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
; GCN-DAG: v_mov_b32_e32 v[[VSIGN:[0-9]+]], s[[SSIGN]]
; GCN-DAG: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN]]
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
index 1362fa7a908..9ee1171306c 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -83,7 +83,7 @@ define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in)
; R600: -PV
; FIXME: In this case two uses of the constant should be folded
-; SI: s_mov_b32 [[SIGNBITK:s[0-9]+]], 0x80000000
+; SI: s_brev_b32 [[SIGNBITK:s[0-9]+]], 1{{$}}
; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
@@ -94,7 +94,7 @@ define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
}
; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
-; SI: s_mov_b32 [[SIGNBITK:s[0-9]+]], 0x80000000
+; SI: s_brev_b32 [[SIGNBITK:s[0-9]+]], 1{{$}}
; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
index d5187adc0e6..3ea4551f0ee 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}round_f64:
; SI: s_endpgm
@@ -20,7 +20,7 @@ define void @round_f64(double addrspace(1)* %out, double %x) #0 {
; SI-DAG: v_cmp_eq_u32
-; SI-DAG: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff
+; SI-DAG: s_brev_b32 [[BFIMASK:s[0-9]+]], -2{{$}}
; SI-DAG: v_cmp_gt_i32
; SI-DAG: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.ll
index d450c6fc0fa..86002662e0b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.ll
@@ -4,7 +4,7 @@
; FUNC-LABEL: {{^}}round_f32:
; SI-DAG: s_load_dword [[SX:s[0-9]+]]
-; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x7fffffff
+; SI-DAG: s_brev_b32 [[K:s[0-9]+]], -2{{$}}
; SI-DAG: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[SX]]
; SI-DAG: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]]
; SI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]]
OpenPOWER on IntegriCloud