summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2015-02-20 22:10:45 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2015-02-20 22:10:45 +0000
commit20711b7baebab2eeb51015453588c46d8b9d62e9 (patch)
treee6bad2cb187d114e43db86d5e20eca05d1a2373a
parent8d6300346f77b8729b7e80d5dd736d90d595f88b (diff)
downloadbcm5719-llvm-20711b7baebab2eeb51015453588c46d8b9d62e9.tar.gz
bcm5719-llvm-20711b7baebab2eeb51015453588c46d8b9d62e9.zip
R600/SI: Remove v_sub_f64 pseudo
The expansion code does the same thing. Since the operands were not defined with the correct types, this has the side effect of fixing operand folding since the expanded pseudo would never use SGPRs or inline immediates. llvm-svn: 230072
-rw-r--r--llvm/lib/Target/R600/AMDGPUISelLowering.cpp3
-rw-r--r--llvm/lib/Target/R600/SIISelLowering.cpp15
-rw-r--r--llvm/lib/Target/R600/SIInstructions.td11
-rw-r--r--llvm/test/CodeGen/R600/fneg-fabs.f64.ll4
-rw-r--r--llvm/test/CodeGen/R600/fneg.f64.ll3
-rw-r--r--llvm/test/CodeGen/R600/fsub64.ll104
6 files changed, 106 insertions, 34 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
index 18697c8d3f6..4707279ce87 100644
--- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -137,6 +137,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
if (!Subtarget->hasFP32Denormals())
setOperationAction(ISD::FMAD, MVT::f32, Legal);
+ // Expand to fneg + fadd.
+ setOperationAction(ISD::FSUB, MVT::f64, Expand);
+
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::STORE, MVT::f32, Promote);
diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp
index 74287144cc2..30b8c452908 100644
--- a/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/llvm/lib/Target/R600/SIISelLowering.cpp
@@ -604,19 +604,8 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
- case AMDGPU::BRANCH: return BB;
- case AMDGPU::V_SUB_F64: {
- unsigned DestReg = MI->getOperand(0).getReg();
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg)
- .addImm(0) // SRC0 modifiers
- .addReg(MI->getOperand(1).getReg())
- .addImm(1) // SRC1 modifiers
- .addReg(MI->getOperand(2).getReg())
- .addImm(0) // CLAMP
- .addImm(0); // OMOD
- MI->eraseFromParent();
- break;
- }
+ case AMDGPU::BRANCH:
+ return BB;
case AMDGPU::SI_RegisterStorePseudo: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td
index 4c5493ea5e4..4c51b6fbad8 100644
--- a/llvm/lib/Target/R600/SIInstructions.td
+++ b/llvm/lib/Target/R600/SIInstructions.td
@@ -1970,17 +1970,6 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
-let usesCustomInserter = 1 in {
-
-def V_SUB_F64 : InstSI <
- (outs VReg_64:$dst),
- (ins VReg_64:$src0, VReg_64:$src1),
- "v_sub_f64 $dst, $src0, $src1",
- [(set f64:$dst, (fsub f64:$src0, f64:$src1))]
->;
-
-} // end usesCustomInserter
-
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
let UseNamedOperandTable = 1 in {
diff --git a/llvm/test/CodeGen/R600/fneg-fabs.f64.ll b/llvm/test/CodeGen/R600/fneg-fabs.f64.ll
index ee9f82d2f09..7e6ede69871 100644
--- a/llvm/test/CodeGen/R600/fneg-fabs.f64.ll
+++ b/llvm/test/CodeGen/R600/fneg-fabs.f64.ll
@@ -5,9 +5,7 @@
; into 2 modifiers, although theoretically that should work.
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f64:
-; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff
-; SI: v_and_b32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]]
-; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
diff --git a/llvm/test/CodeGen/R600/fneg.f64.ll b/llvm/test/CodeGen/R600/fneg.f64.ll
index 9ea189c3143..aa6df209035 100644
--- a/llvm/test/CodeGen/R600/fneg.f64.ll
+++ b/llvm/test/CodeGen/R600/fneg.f64.ll
@@ -39,8 +39,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double>
; unless the target returns true for isNegFree()
; FUNC-LABEL: {{^}}fneg_free_f64:
-; FIXME: Unnecessary copy to VGPRs
-; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
+; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, 0, -{{s\[[0-9]+:[0-9]+\]$}}
define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
%bc = bitcast i64 %in to double
%fsub = fsub double 0.0, %bc
diff --git a/llvm/test/CodeGen/R600/fsub64.ll b/llvm/test/CodeGen/R600/fsub64.ll
index 62f46142fe0..2d85cc5bcf7 100644
--- a/llvm/test/CodeGen/R600/fsub64.ll
+++ b/llvm/test/CodeGen/R600/fsub64.ll
@@ -1,13 +1,107 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+declare double @llvm.fabs.f64(double) #0
+
; SI-LABEL: {{^}}fsub_f64:
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
- %r2 = fsub double %r0, %r1
- store double %r2, double addrspace(1)* %out
- ret void
+ %r0 = load double addrspace(1)* %in1
+ %r1 = load double addrspace(1)* %in2
+ %r2 = fsub double %r0, %r1
+ store double %r2, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}fsub_fabs_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}}
+define void @fsub_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+ double addrspace(1)* %in2) {
+ %r0 = load double addrspace(1)* %in1
+ %r1 = load double addrspace(1)* %in2
+ %r1.fabs = call double @llvm.fabs.f64(double %r1) #0
+ %r2 = fsub double %r0, %r1.fabs
+ store double %r2, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}fsub_fabs_inv_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, -v\[[0-9]+:[0-9]+\]}}
+define void @fsub_fabs_inv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+ double addrspace(1)* %in2) {
+ %r0 = load double addrspace(1)* %in1
+ %r1 = load double addrspace(1)* %in2
+ %r0.fabs = call double @llvm.fabs.f64(double %r0) #0
+ %r2 = fsub double %r0.fabs, %r1
+ store double %r2, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_fsub_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+define void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) {
+ %sub = fsub double %a, %b
+ store double %sub, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_fsub_imm_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], 4.0, -s\[[0-9]+:[0-9]+\]}}
+define void @s_fsub_imm_f64(double addrspace(1)* %out, double %a, double %b) {
+ %sub = fsub double 4.0, %a
+ store double %sub, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_fsub_imm_inv_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -4.0, s\[[0-9]+:[0-9]+\]}}
+define void @s_fsub_imm_inv_f64(double addrspace(1)* %out, double %a, double %b) {
+ %sub = fsub double %a, 4.0
+ store double %sub, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_fsub_self_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -s\[[0-9]+:[0-9]+\]}}
+define void @s_fsub_self_f64(double addrspace(1)* %out, double %a) {
+ %sub = fsub double %a, %a
+ store double %sub, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}fsub_v2f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+define void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) {
+ %sub = fsub <2 x double> %a, %b
+ store <2 x double> %sub, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}fsub_v4f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+define void @fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) {
+ %b_ptr = getelementptr <4 x double> addrspace(1)* %in, i32 1
+ %a = load <4 x double> addrspace(1)* %in
+ %b = load <4 x double> addrspace(1)* %b_ptr
+ %result = fsub <4 x double> %a, %b
+ store <4 x double> %result, <4 x double> addrspace(1)* %out
+ ret void
}
+
+; SI-LABEL: {{^}}s_fsub_v4f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+define void @s_fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) {
+ %result = fsub <4 x double> %a, %b
+ store <4 x double> %result, <4 x double> addrspace(1)* %out, align 16
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
OpenPOWER on IntegriCloud