diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/fma-combine.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fma-combine.ll | 52 |
1 files changed, 26 insertions, 26 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll index 50c5a5abf7f..4113ba8dc1f 100644 --- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll @@ -18,7 +18,7 @@ declare float @llvm.fma.f32(float, float, float) #0 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]] ; SI: buffer_store_dwordx2 [[RESULT]] -define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -46,7 +46,7 @@ define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addr ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI: s_endpgm -define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -75,7 +75,7 @@ define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]] ; SI: buffer_store_dwordx2 [[RESULT]] -define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -99,7 +99,7 @@ define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addr ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]] ; SI: buffer_store_dwordx2 [[RESULT]] -define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -127,7 +127,7 @@ define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI: s_endpgm -define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -156,7 +156,7 @@ define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, d ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]] ; SI: buffer_store_dwordx2 [[RESULT]] -define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -184,7 +184,7 @@ define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI: s_endpgm -define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -213,7 +213,7 @@ define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, d ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]] ; SI: buffer_store_dwordx2 [[RESULT]] -define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -242,7 +242,7 @@ define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI: s_endpgm -define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -276,7 +276,7 @@ define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %o ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI: s_endpgm -define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -317,7 +317,7 @@ define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %o ; SI-UNSAFE: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]] ; SI: buffer_store_dwordx2 [[RESULT]] -define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -358,7 +358,7 @@ define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias % ; SI-UNSAFE: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]] ; SI: buffer_store_dwordx2 [[RESULT]] -define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -390,7 +390,7 @@ define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias % ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]] -define void @test_f32_mul_add_x_one_y(float addrspace(1)* %out, +define amdgpu_kernel void @test_f32_mul_add_x_one_y(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { %x = load volatile float, float addrspace(1)* %in1 @@ -406,7 +406,7 @@ define void @test_f32_mul_add_x_one_y(float addrspace(1)* %out, ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]] -define void @test_f32_mul_y_add_x_one(float addrspace(1)* %out, +define amdgpu_kernel void @test_f32_mul_y_add_x_one(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { %x = load volatile float, float addrspace(1)* %in1 @@ -422,7 +422,7 @@ define void @test_f32_mul_y_add_x_one(float addrspace(1)* %out, ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] -define void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out, +define amdgpu_kernel void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 @@ -438,7 +438,7 @@ define void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out, ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] -define void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out, +define amdgpu_kernel void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 @@ -454,7 +454,7 @@ define void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out, ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]] -define void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out, +define amdgpu_kernel void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 @@ -470,7 +470,7 @@ define void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out, ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]] -define void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out, +define amdgpu_kernel void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 @@ -486,7 +486,7 @@ define void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out, ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] -define void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out, +define amdgpu_kernel void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 @@ -502,7 +502,7 @@ define void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out, ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] -define void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out, +define amdgpu_kernel void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 @@ -518,7 +518,7 @@ define void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out, ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] -define void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out, +define amdgpu_kernel void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 @@ -534,7 +534,7 @@ define void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out, ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] -define void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out, +define amdgpu_kernel void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 @@ -550,7 +550,7 @@ define void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out, ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]] -define void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out, +define amdgpu_kernel void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 @@ -566,7 +566,7 @@ define void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out, ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]] ; ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]] -define void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out, +define amdgpu_kernel void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { %x = load float, float addrspace(1)* %in1 @@ -588,7 +588,7 @@ define void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out, ; ; SI-FMA: v_fma_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]] ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VT]], [[VR]] -define void @test_f32_interp(float addrspace(1)* %out, +define amdgpu_kernel void @test_f32_interp(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2, float addrspace(1)* %in3) { @@ -610,7 +610,7 @@ define void @test_f32_interp(float addrspace(1)* %out, ; ; SI-FMA: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VY]] ; SI-FMA: v_fma_f64 v{{\[[0-9]+:[0-9]+\]}}, [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VR]] -define void @test_f64_interp(double addrspace(1)* %out, +define amdgpu_kernel void @test_f64_interp(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2, double addrspace(1)* %in3) { |