diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll | 26 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll | 26 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll | 28 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll | 28 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll | 26 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll | 28 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll | 28 |
7 files changed, 155 insertions, 35 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll index 65ab3e04237..7efb1850a27 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll @@ -1,10 +1,10 @@ ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX906 -declare float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c) +declare float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 %clamp) -; GFX906-LABEL: {{^}}test_llvm_amdgcn_fdot2 -; GFX906: v_dot2_f32_f16 -define amdgpu_kernel void @test_llvm_amdgcn_fdot2( +; GFX906-LABEL: {{^}}test_llvm_amdgcn_fdot2_clamp +; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_fdot2_clamp( float addrspace(1)* %r, <2 x half> addrspace(1)* %a, <2 x half> addrspace(1)* %b, @@ -13,7 +13,23 @@ entry: %a.val = load <2 x half>, <2 x half> addrspace(1)* %a %b.val = load <2 x half>, <2 x half> addrspace(1)* %b %c.val = load float, float addrspace(1)* %c - %r.val = call float @llvm.amdgcn.fdot2(<2 x half> %a.val, <2 x half> %b.val, float %c.val) + %r.val = call float @llvm.amdgcn.fdot2(<2 x half> %a.val, <2 x half> %b.val, float %c.val, i1 1) + store float %r.val, float addrspace(1)* %r + ret void +} + +; GFX906-LABEL: {{^}}test_llvm_amdgcn_fdot2_no_clamp +; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_fdot2_no_clamp( + float addrspace(1)* %r, + <2 x half> addrspace(1)* %a, + <2 x half> addrspace(1)* %b, + float addrspace(1)* %c) { +entry: + %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + %b.val = load <2 x half>, <2 x half> addrspace(1)* %b + %c.val = load float, float addrspace(1)* %c + %r.val = call float @llvm.amdgcn.fdot2(<2 x half> %a.val, <2 x half> %b.val, float %c.val, i1 0) store float %r.val, float addrspace(1)* %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll index 0d8f28bbef1..f1894cc14cc 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll @@ -1,10 +1,10 @@ ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906 -declare i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c) +declare i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp) -; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2 -; GFX906: v_dot2_i32_i16 -define amdgpu_kernel void @test_llvm_amdgcn_sdot2( +; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2_clamp +; GFX906: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_sdot2_clamp( i32 addrspace(1)* %r, <2 x i16> addrspace(1)* %a, <2 x i16> addrspace(1)* %b, @@ -13,7 +13,23 @@ entry: %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b %c.val = load i32, i32 addrspace(1)* %c - %r.val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val) + %r.val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 1) + store i32 %r.val, i32 addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2_no_clamp +; GFX906: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_sdot2_no_clamp( + i32 addrspace(1)* %r, + <2 x i16> addrspace(1)* %a, + <2 x i16> addrspace(1)* %b, + i32 addrspace(1)* %c) { +entry: + %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a + %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b + %c.val = load i32, i32 addrspace(1)* %c + %r.val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 0) store i32 %r.val, i32 addrspace(1)* %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll index 8b664e6f9a4..2651200a344 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll @@ -1,10 +1,10 @@ ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906 -declare i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c) +declare i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 %clamp) -; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4 -; GFX906: v_dot4_i32_i8 -define amdgpu_kernel void @test_llvm_amdgcn_sdot4( +; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_clamp +; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_sdot4_clamp( i32 addrspace(1)* %r, <4 x i8> addrspace(1)* %a, <4 x i8> addrspace(1)* %b, @@ -15,7 +15,25 @@ entry: %a.val.cast = bitcast <4 x i8> %a.val to i32 %b.val.cast = bitcast <4 x i8> %b.val to i32 %c.val = load i32, i32 addrspace(1)* %c - %r.val = call i32 @llvm.amdgcn.sdot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val) + %r.val = call i32 @llvm.amdgcn.sdot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1) + store i32 %r.val, i32 addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_no_clamp +; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_sdot4_no_clamp( + i32 addrspace(1)* %r, + <4 x i8> addrspace(1)* %a, + <4 x i8> addrspace(1)* %b, + i32 addrspace(1)* %c) { +entry: + %a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a + %b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b + %a.val.cast = bitcast <4 x i8> %a.val to i32 + %b.val.cast = bitcast <4 x i8> %b.val to i32 + %c.val = load i32, i32 addrspace(1)* %c + %r.val = call i32 @llvm.amdgcn.sdot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0) store i32 %r.val, i32 addrspace(1)* %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll index e2466eae539..456421c4984 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll @@ -1,10 +1,10 @@ ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906 -declare i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c) +declare i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 %clamp) -; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8 -; GFX906: v_dot8_i32_i4 -define amdgpu_kernel void @test_llvm_amdgcn_sdot8( +; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8_clamp +; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_sdot8_clamp( i32 addrspace(1)* %r, <8 x i4> addrspace(1)* %a, <8 x i4> addrspace(1)* %b, @@ -15,7 +15,25 @@ entry: %a.val.cast = bitcast <8 x i4> %a.val to i32 %b.val.cast = bitcast <8 x i4> %b.val to i32 %c.val = load i32, i32 addrspace(1)* %c - %r.val = call i32 @llvm.amdgcn.sdot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val) + %r.val = call i32 @llvm.amdgcn.sdot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1) + store i32 %r.val, i32 addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8_no_clamp +; GFX906: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_sdot8_no_clamp( + i32 addrspace(1)* %r, + <8 x i4> addrspace(1)* %a, + <8 x i4> addrspace(1)* %b, + i32 addrspace(1)* %c) { +entry: + %a.val = load <8 x i4>, <8 x i4> addrspace(1)* %a + %b.val = load <8 x i4>, <8 x i4> addrspace(1)* %b + %a.val.cast = bitcast <8 x i4> %a.val to i32 + %b.val.cast = bitcast <8 x i4> %b.val to i32 + %c.val = load i32, i32 addrspace(1)* %c + %r.val = call i32 @llvm.amdgcn.sdot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0) store i32 %r.val, i32 addrspace(1)* %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll index b2912cb2334..18ca71d33bc 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll @@ -1,10 +1,10 @@ ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906 -declare i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c) +declare i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp) -; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2 -; GFX906: v_dot2_u32_u16 -define amdgpu_kernel void @test_llvm_amdgcn_udot2( +; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_clamp +; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_udot2_clamp( i32 addrspace(1)* %r, <2 x i16> addrspace(1)* %a, <2 x i16> addrspace(1)* %b, @@ -13,7 +13,23 @@ entry: %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b %c.val = load i32, i32 addrspace(1)* %c - %r.val = call i32 @llvm.amdgcn.udot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val) + %r.val = call i32 @llvm.amdgcn.udot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 1) + store i32 %r.val, i32 addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_no_clamp +; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_udot2_no_clamp( + i32 addrspace(1)* %r, + <2 x i16> addrspace(1)* %a, + <2 x i16> addrspace(1)* %b, + i32 addrspace(1)* %c) { +entry: + %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a + %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b + %c.val = load i32, i32 addrspace(1)* %c + %r.val = call i32 @llvm.amdgcn.udot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 0) store i32 %r.val, i32 addrspace(1)* %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll index 5ce060de700..73d6a9ce968 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll @@ -1,10 +1,10 @@ ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906 -declare i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c) +declare i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c, i1 %clamp) -; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4 -; GFX906: v_dot4_u32_u8 -define amdgpu_kernel void @test_llvm_amdgcn_udot4( +; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4_clamp +; GFX906: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_udot4_clamp( i32 addrspace(1)* %r, <4 x i8> addrspace(1)* %a, <4 x i8> addrspace(1)* %b, @@ -15,7 +15,25 @@ entry: %a.val.cast = bitcast <4 x i8> %a.val to i32 %b.val.cast = bitcast <4 x i8> %b.val to i32 %c.val = load i32, i32 addrspace(1)* %c - %r.val = call i32 @llvm.amdgcn.udot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val) + %r.val = call i32 @llvm.amdgcn.udot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1) + store i32 %r.val, i32 addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4_no_clamp +; GFX906: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_udot4_no_clamp( + i32 addrspace(1)* %r, + <4 x i8> addrspace(1)* %a, + <4 x i8> addrspace(1)* %b, + i32 addrspace(1)* %c) { +entry: + %a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a + %b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b + %a.val.cast = bitcast <4 x i8> %a.val to i32 + %b.val.cast = bitcast <4 x i8> %b.val to i32 + %c.val = load i32, i32 addrspace(1)* %c + %r.val = call i32 @llvm.amdgcn.udot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0) store i32 %r.val, i32 addrspace(1)* %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll index 2599305bc8e..c2f80cac8f7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll @@ -1,10 +1,10 @@ ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906 -declare i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c) +declare i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c, i1 %clamp) -; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8 -; GFX906: v_dot8_u32_u4 -define amdgpu_kernel void @test_llvm_amdgcn_udot8( +; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8_clamp +; GFX906: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_udot8_clamp( i32 addrspace(1)* %r, <8 x i4> addrspace(1)* %a, <8 x i4> addrspace(1)* %b, @@ -15,7 +15,25 @@ entry: %a.val.cast = bitcast <8 x i4> %a.val to i32 %b.val.cast = bitcast <8 x i4> %b.val to i32 %c.val = load i32, i32 addrspace(1)* %c - %r.val = call i32 @llvm.amdgcn.udot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val) + %r.val = call i32 @llvm.amdgcn.udot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1) + store i32 %r.val, i32 addrspace(1)* %r + ret void +} + +; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8_no_clamp +; GFX906: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @test_llvm_amdgcn_udot8_no_clamp( + i32 addrspace(1)* %r, + <8 x i4> addrspace(1)* %a, + <8 x i4> addrspace(1)* %b, + i32 addrspace(1)* %c) { +entry: + %a.val = load <8 x i4>, <8 x i4> addrspace(1)* %a + %b.val = load <8 x i4>, <8 x i4> addrspace(1)* %b + %a.val.cast = bitcast <8 x i4> %a.val to i32 + %b.val.cast = bitcast <8 x i4> %b.val to i32 + %c.val = load i32, i32 addrspace(1)* %c + %r.val = call i32 @llvm.amdgcn.udot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0) store i32 %r.val, i32 addrspace(1)* %r ret void } |