diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-03-29 19:14:54 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-03-29 19:14:54 +0000 |
| commit | 055e4dce45c3f2194c0610db1cee6dedfa6040ab (patch) | |
| tree | 5ab91d97a6f8a6965f38f403c5db59f446ed81bb /llvm/test/CodeGen | |
| parent | d395bc1cc23e1c77ede517611c29c1037a4d8a4b (diff) | |
| download | bcm5719-llvm-055e4dce45c3f2194c0610db1cee6dedfa6040ab.tar.gz bcm5719-llvm-055e4dce45c3f2194c0610db1cee6dedfa6040ab.zip | |
AMDGPU: Remove dx10-clamp from subtarget features
Since this can be set with s_setreg*, it should not be a subtarget
property. Set a default based on the calling convention, and Introduce
a new amdgpu-dx10-clamp attribute to override this if desired.
Also introduce a new amdgpu-ieee attribute to match.
The values need to match to allow inlining. I think it is OK for the
caller's dx10-clamp attribute to override the callee, but there
doesn't appear to be the infrastructure to do this currently without
definining the attribute in the generic Attributes.td.
Eventually the calling convention lowering will need to insert a mode
switch somewhere for these.
llvm-svn: 357302
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll | 188 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/clamp.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll | 24 |
3 files changed, 214 insertions, 4 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll new file mode 100644 index 00000000000..1e6cc0523dc --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll @@ -0,0 +1,188 @@ +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}kernel_ieee_mode_default: +; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] +; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] +; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] +; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] +; GCN-NOT: v_mul_f32 +define amdgpu_kernel void @kernel_ieee_mode_default() #0 { + %val0 = load volatile float, float addrspace(1)* undef + %val1 = load volatile float, float addrspace(1)* undef + %min = call float @llvm.minnum.f32(float %val0, float %val1) + store volatile float %min, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}kernel_ieee_mode_on: +; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] +; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] +; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] +; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] +; GCN-NOT: v_mul_f32 +define amdgpu_kernel void @kernel_ieee_mode_on() #1 { + %val0 = load volatile float, float addrspace(1)* undef + %val1 = load volatile float, float addrspace(1)* undef + %min = call float @llvm.minnum.f32(float %val0, float %val1) + store volatile float %min, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}kernel_ieee_mode_off: +; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] +; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] +; GCN-NOT: [[VAL0]] +; GCN-NOT: [[VAL1]] +; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]] +; GCN-NOT: v_mul_f32 +define amdgpu_kernel void @kernel_ieee_mode_off() #2 { + %val0 = load volatile float, float addrspace(1)* undef + %val1 = load volatile float, float addrspace(1)* undef + %min = call float @llvm.minnum.f32(float %val0, float %val1) + store volatile float %min, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}func_ieee_mode_default: +; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] +; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] +; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] +; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] +; GCN-NOT: v_mul_f32 +define void @func_ieee_mode_default() #0 { + %val0 = load volatile float, float addrspace(1)* undef + %val1 = load volatile float, float addrspace(1)* undef + %min = call float @llvm.minnum.f32(float %val0, float %val1) + store volatile float %min, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}func_ieee_mode_on: +; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] +; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] +; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] +; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] +; GCN-NOT: v_mul_f32 +define void @func_ieee_mode_on() #1 { + %val0 = load volatile float, float addrspace(1)* undef + %val1 = load volatile float, float addrspace(1)* undef + %min = call float @llvm.minnum.f32(float %val0, float %val1) + store volatile float %min, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}func_ieee_mode_off: +; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] +; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] +; GCN-NOT: [[VAL0]] +; GCN-NOT: [[VAL1]] +; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]] +; GCN-NOT: v_mul_f32 +define void @func_ieee_mode_off() #2 { + %val0 = load volatile float, float addrspace(1)* undef + %val1 = load volatile float, float addrspace(1)* undef + %min = call float @llvm.minnum.f32(float %val0, float %val1) + store volatile float %min, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}cs_ieee_mode_default: +; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] +; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] +; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] +; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] +; GCN-NOT: v_mul_f32 +define amdgpu_cs void @cs_ieee_mode_default() #0 { + %val0 = load volatile float, float addrspace(1)* undef + %val1 = load volatile float, float addrspace(1)* undef + %min = call float @llvm.minnum.f32(float %val0, float %val1) + store volatile float %min, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}cs_ieee_mode_on: +; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] +; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] +; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] +; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] +; GCN-NOT: v_mul_f32 +define amdgpu_cs void @cs_ieee_mode_on() #1 { + %val0 = load volatile float, float addrspace(1)* undef + %val1 = load volatile float, float addrspace(1)* undef + %min = call float @llvm.minnum.f32(float %val0, float %val1) + store volatile float %min, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}cs_ieee_mode_off: +; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] +; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] +; GCN-NOT: [[VAL0]] +; GCN-NOT: [[VAL1]] +; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]] +; GCN-NOT: v_mul_f32 +define amdgpu_cs void @cs_ieee_mode_off() #2 { + %val0 = load volatile float, float addrspace(1)* undef + %val1 = load volatile float, float addrspace(1)* undef + %min = call float @llvm.minnum.f32(float %val0, float %val1) + store volatile float %min, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}ps_ieee_mode_default: +; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] +; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] +; GCN-NOT: [[VAL0]] +; GCN-NOT: [[VAL1]] +; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]] +; GCN-NOT: v_mul_f32 +define amdgpu_ps void @ps_ieee_mode_default() #0 { + %val0 = load volatile float, float addrspace(1)* undef + %val1 = load volatile float, float addrspace(1)* undef + %min = call float @llvm.minnum.f32(float %val0, float %val1) + store volatile float %min, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}ps_ieee_mode_on: +; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] +; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] +; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]] +; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]] +; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]] +; GCN-NOT: v_mul_f32 +define amdgpu_ps void @ps_ieee_mode_on() #1 { + %val0 = load volatile float, float addrspace(1)* undef + %val1 = load volatile float, float addrspace(1)* undef + %min = call float @llvm.minnum.f32(float %val0, float %val1) + store volatile float %min, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}ps_ieee_mode_off: +; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]] +; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]] +; GCN-NOT: [[VAL0]] +; GCN-NOT: [[VAL1]] +; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]] +; GCN-NOT: v_mul_f32 +define amdgpu_ps void @ps_ieee_mode_off() #2 { + %val0 = load volatile float, float addrspace(1)* undef + %val1 = load volatile float, float addrspace(1)* undef + %min = call float @llvm.minnum.f32(float %val0, float %val1) + store volatile float %min, float addrspace(1)* undef + ret void +} + +declare float @llvm.minnum.f32(float, float) #3 + +attributes #0 = { nounwind } +attributes #1 = { nounwind "amdgpu-ieee"="true" } +attributes #2 = { nounwind "amdgpu-ieee"="false" } +attributes #3 = { nounwind readnone speculatable } diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll index d98b56062cd..1fcdfd2ff24 100644 --- a/llvm/test/CodeGen/AMDGPU/clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/clamp.ll @@ -769,6 +769,6 @@ declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } -attributes #2 = { nounwind "target-features"="-dx10-clamp,-fp-exceptions" "no-nans-fp-math"="false" } -attributes #3 = { nounwind "target-features"="+dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" } -attributes #4 = { nounwind "target-features"="-dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" } +attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp-exceptions" "no-nans-fp-math"="false" } +attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="+fp-exceptions" "no-nans-fp-math"="false" } +attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="+fp-exceptions" "no-nans-fp-math"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll index a454fa02579..53e5dadc786 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll @@ -70,10 +70,32 @@ define amdgpu_kernel void @test_no_dx10_clamp_vi(float addrspace(1)* %out0, doub ret void } +; GCN-LABEL: {{^}}test_no_ieee_mode_vi: +; GCN: float_mode = 192 +; GCN: enable_dx10_clamp = 1 +; GCN: enable_ieee_mode = 0 +define amdgpu_kernel void @test_no_ieee_mode_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #7 { + store float 0.0, float addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + +; GCN-LABEL: {{^}}test_no_ieee_mode_no_dx10_clamp_vi: +; GCN: float_mode = 192 +; GCN: enable_dx10_clamp = 0 +; GCN: enable_ieee_mode = 0 +define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #8 { + store float 0.0, float addrspace(1)* %out0 + store double 0.0, double addrspace(1)* %out1 + ret void +} + attributes #0 = { nounwind "target-cpu"="kaveri" "target-features"="-code-object-v3" } attributes #1 = { nounwind "target-cpu"="fiji" "target-features"="-code-object-v3" } attributes #2 = { nounwind "target-features"="-code-object-v3,-fp32-denormals,+fp64-fp16-denormals" } attributes #3 = { nounwind "target-features"="-code-object-v3,+fp32-denormals,-fp64-fp16-denormals" } attributes #4 = { nounwind "target-features"="-code-object-v3,+fp32-denormals,+fp64-fp16-denormals" } attributes #5 = { nounwind "target-features"="-code-object-v3,-fp32-denormals,-fp64-fp16-denormals" } -attributes #6 = { nounwind "target-cpu"="fiji" "target-features"="-code-object-v3,-dx10-clamp" } +attributes #6 = { nounwind "amdgpu-dx10-clamp"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" } +attributes #7 = { nounwind "amdgpu-ieee"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" } +attributes #8 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" } |

