summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-02-22 23:53:37 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-02-22 23:53:37 +0000
commit79a45db7f5752be9fd1f4f718c7ac90a70972662 (patch)
treee8e35cfc031857f787b26cd01732357f884dcc5c /llvm/test
parent98f9bcc1c92845e4a0d3995f39bf1cb0d6b28795 (diff)
downloadbcm5719-llvm-79a45db7f5752be9fd1f4f718c7ac90a70972662.tar.gz
bcm5719-llvm-79a45db7f5752be9fd1f4f718c7ac90a70972662.zip
AMDGPU: Use clamp with f64
llvm-svn: 295908
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/clamp-modifier.ll15
-rw-r--r--llvm/test/CodeGen/AMDGPU/clamp.ll9
2 files changed, 18 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
index f5c53c64db1..186bd349ecc 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
@@ -153,6 +153,21 @@ define amdgpu_kernel void @v_clamp_add_src_v2f32(<2 x float> addrspace(1)* %out,
ret void
}
+; GCN-LABEL: {{^}}v_clamp_add_src_f64:
+; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
+; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], 1.0 clamp{{$}}
+define amdgpu_kernel void @v_clamp_add_src_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %a = load double, double addrspace(1)* %gep0
+ %add = fadd double %a, 1.0
+ %max = call double @llvm.maxnum.f64(double %add, double 0.0)
+ %clamp = call double @llvm.minnum.f64(double %max, double 1.0)
+ store double %clamp, double addrspace(1)* %out.gep
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare float @llvm.fabs.f32(float) #1
declare float @llvm.floor.f32(float) #1
diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll
index a0075066f68..6a78290f9a8 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -147,8 +147,7 @@ define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addr
; FIXME: Do f64 instructions support clamp?
; GCN-LABEL: {{^}}v_clamp_f64:
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_max_f64
-; GCN: v_min_f64
+; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], [[A]] clamp{{$}}
define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
@@ -163,8 +162,7 @@ define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspa
; GCN-LABEL: {{^}}v_clamp_neg_f64:
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_max_f64
-; GCN: v_min_f64
+; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -[[A]], -[[A]] clamp{{$}}
define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
@@ -180,8 +178,7 @@ define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double add
; GCN-LABEL: {{^}}v_clamp_negabs_f64:
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_max_f64
-; GCN: v_min_f64
+; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -|[[A]]|, -|[[A]]| clamp{{$}}
define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
OpenPOWER on IntegriCloud