diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-02-20 22:10:45 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-02-20 22:10:45 +0000 |
commit | 20711b7baebab2eeb51015453588c46d8b9d62e9 (patch) | |
tree | e6bad2cb187d114e43db86d5e20eca05d1a2373a /llvm/test | |
parent | 8d6300346f77b8729b7e80d5dd736d90d595f88b (diff) | |
download | bcm5719-llvm-20711b7baebab2eeb51015453588c46d8b9d62e9.tar.gz bcm5719-llvm-20711b7baebab2eeb51015453588c46d8b9d62e9.zip |
R600/SI: Remove v_sub_f64 pseudo
The expansion code does the same thing. Since
the operands were not defined with the correct
types, this has the side effect of fixing operand
folding since the expanded pseudo would never use
SGPRs or inline immediates.
llvm-svn: 230072
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/R600/fneg-fabs.f64.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/fneg.f64.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/fsub64.ll | 104 |
3 files changed, 101 insertions, 10 deletions
diff --git a/llvm/test/CodeGen/R600/fneg-fabs.f64.ll b/llvm/test/CodeGen/R600/fneg-fabs.f64.ll index ee9f82d2f09..7e6ede69871 100644 --- a/llvm/test/CodeGen/R600/fneg-fabs.f64.ll +++ b/llvm/test/CodeGen/R600/fneg-fabs.f64.ll @@ -5,9 +5,7 @@ ; into 2 modifiers, although theoretically that should work. ; FUNC-LABEL: {{^}}fneg_fabs_fadd_f64: -; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff -; SI: v_and_b32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]] -; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}| define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) { %fabs = call double @llvm.fabs.f64(double %x) %fsub = fsub double -0.000000e+00, %fabs diff --git a/llvm/test/CodeGen/R600/fneg.f64.ll b/llvm/test/CodeGen/R600/fneg.f64.ll index 9ea189c3143..aa6df209035 100644 --- a/llvm/test/CodeGen/R600/fneg.f64.ll +++ b/llvm/test/CodeGen/R600/fneg.f64.ll @@ -39,8 +39,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> ; unless the target returns true for isNegFree() ; FUNC-LABEL: {{^}}fneg_free_f64: -; FIXME: Unnecessary copy to VGPRs -; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}} +; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, 0, -{{s\[[0-9]+:[0-9]+\]$}} define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) { %bc = bitcast i64 %in to double %fsub = fsub double 0.0, %bc diff --git a/llvm/test/CodeGen/R600/fsub64.ll b/llvm/test/CodeGen/R600/fsub64.ll index 62f46142fe0..2d85cc5bcf7 100644 --- a/llvm/test/CodeGen/R600/fsub64.ll +++ b/llvm/test/CodeGen/R600/fsub64.ll @@ -1,13 +1,107 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +declare double @llvm.fabs.f64(double) #0 + ; SI-LABEL: {{^}}fsub_f64: ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { - %r0 = load double addrspace(1)* %in1 - %r1 = load double addrspace(1)* %in2 - %r2 = fsub double %r0, %r1 - store double %r2, double addrspace(1)* %out - ret void + %r0 = load double addrspace(1)* %in1 + %r1 = load double addrspace(1)* %in2 + %r2 = fsub double %r0, %r1 + store double %r2, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}fsub_fabs_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}} +define void @fsub_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in1, + double addrspace(1)* %in2) { + %r0 = load double addrspace(1)* %in1 + %r1 = load double addrspace(1)* %in2 + %r1.fabs = call double @llvm.fabs.f64(double %r1) #0 + %r2 = fsub double %r0, %r1.fabs + store double %r2, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}fsub_fabs_inv_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, -v\[[0-9]+:[0-9]+\]}} +define void @fsub_fabs_inv_f64(double addrspace(1)* %out, double addrspace(1)* %in1, + double addrspace(1)* %in2) { + %r0 = load double addrspace(1)* %in1 + %r1 = load double addrspace(1)* %in2 + %r0.fabs = call double @llvm.fabs.f64(double %r0) #0 + %r2 = fsub double %r0.fabs, %r1 + store double %r2, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_fsub_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +define void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) { + %sub = fsub double %a, %b + store double %sub, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_fsub_imm_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], 4.0, -s\[[0-9]+:[0-9]+\]}} +define void @s_fsub_imm_f64(double addrspace(1)* %out, double %a, double %b) { + %sub = fsub double 4.0, %a + store double %sub, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_fsub_imm_inv_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -4.0, s\[[0-9]+:[0-9]+\]}} +define void @s_fsub_imm_inv_f64(double addrspace(1)* %out, double %a, double %b) { + %sub = fsub double %a, 4.0 + store double %sub, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_fsub_self_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -s\[[0-9]+:[0-9]+\]}} +define void @s_fsub_self_f64(double addrspace(1)* %out, double %a) { + %sub = fsub double %a, %a + store double %sub, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}fsub_v2f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +define void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) { + %sub = fsub <2 x double> %a, %b + store <2 x double> %sub, <2 x double> addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}fsub_v4f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +define void @fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x double> addrspace(1)* %in, i32 1 + %a = load <4 x double> addrspace(1)* %in + %b = load <4 x double> addrspace(1)* %b_ptr + %result = fsub <4 x double> %a, %b + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void } + +; SI-LABEL: {{^}}s_fsub_v4f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +define void @s_fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) { + %result = fsub <4 x double> %a, %b + store <4 x double> %result, <4 x double> addrspace(1)* %out, align 16 + ret void +} + +attributes #0 = { nounwind readnone } |