AMDGPU: Use clamp with f64

llvm-svn: 295908
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-02-22 23:53:37 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-02-22 23:53:37 +0000
commit: 79a45db7f5752be9fd1f4f718c7ac90a70972662 (patch)
tree: e8e35cfc031857f787b26cd01732357f884dcc5c /llvm/test
parent: 98f9bcc1c92845e4a0d3995f39bf1cb0d6b28795 (diff)
download: bcm5719-llvm-79a45db7f5752be9fd1f4f718c7ac90a70972662.tar.gz
bcm5719-llvm-79a45db7f5752be9fd1f4f718c7ac90a70972662.zip
2 files changed, 18 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
index f5c53c64db1..186bd349ecc 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
@@ -153,6 +153,21 @@ define amdgpu_kernel void @v_clamp_add_src_v2f32(<2 x float> addrspace(1)* %out,
   ret void
 }
 
+; GCN-LABEL: {{^}}v_clamp_add_src_f64:
+; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
+; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], 1.0 clamp{{$}}
+define amdgpu_kernel void @v_clamp_add_src_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
+  %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
+  %a = load double, double addrspace(1)* %gep0
+  %add = fadd double %a, 1.0
+  %max = call double @llvm.maxnum.f64(double %add, double 0.0)
+  %clamp = call double @llvm.minnum.f64(double %max, double 1.0)
+  store double %clamp, double addrspace(1)* %out.gep
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare float @llvm.fabs.f32(float) #1
 declare float @llvm.floor.f32(float) #1
diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll
index a0075066f68..6a78290f9a8 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -147,8 +147,7 @@ define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addr
 ; FIXME: Do f64 instructions support clamp?
 ; GCN-LABEL: {{^}}v_clamp_f64:
 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_max_f64
-; GCN: v_min_f64
+; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], [[A]] clamp{{$}}
 define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
@@ -163,8 +162,7 @@ define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspa
 
 ; GCN-LABEL: {{^}}v_clamp_neg_f64:
 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_max_f64
-; GCN: v_min_f64
+; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -[[A]], -[[A]] clamp{{$}}
 define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
@@ -180,8 +178,7 @@ define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double add
 
 ; GCN-LABEL: {{^}}v_clamp_negabs_f64:
 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_max_f64
-; GCN: v_min_f64
+; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -|[[A]]|, -|[[A]]| clamp{{$}}
 define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-02-22 23:53:37 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-02-22 23:53:37 +0000
commit	79a45db7f5752be9fd1f4f718c7ac90a70972662 (patch)
tree	e8e35cfc031857f787b26cd01732357f884dcc5c /llvm/test
parent	98f9bcc1c92845e4a0d3995f39bf1cb0d6b28795 (diff)
download	bcm5719-llvm-79a45db7f5752be9fd1f4f718c7ac90a70972662.tar.gz bcm5719-llvm-79a45db7f5752be9fd1f4f718c7ac90a70972662.zip