diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll | 104 |
1 files changed, 52 insertions, 52 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index 8d13354e104..e76c706843c 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -227,7 +227,7 @@ define amdgpu_kernel void @udiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) -; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) ; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) @@ -254,7 +254,7 @@ define amdgpu_kernel void @urem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) -; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) ; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) @@ -286,7 +286,7 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) ; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) @@ -317,7 +317,7 @@ define amdgpu_kernel void @srem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) ; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) @@ -347,7 +347,7 @@ define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) -; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) ; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) @@ -374,7 +374,7 @@ define amdgpu_kernel void @urem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) -; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) ; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) @@ -406,7 +406,7 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) ; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) @@ -437,7 +437,7 @@ define amdgpu_kernel void @srem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) ; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) @@ -1265,7 +1265,7 @@ define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) -; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) ; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) @@ -1285,7 +1285,7 @@ define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP27:%.*]] = fdiv fast float 1.000000e+00, [[TMP26]] ; CHECK-NEXT: [[TMP28:%.*]] = fmul fast float [[TMP25]], [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]]) -; CHECK-NEXT: [[TMP30:%.*]] = fneg fast float [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = fsub fast float -0.000000e+00, [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]]) ; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32 ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]]) @@ -1305,7 +1305,7 @@ define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP47:%.*]] = fdiv fast float 1.000000e+00, [[TMP46]] ; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP45]], [[TMP47]] ; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]]) -; CHECK-NEXT: [[TMP50:%.*]] = fneg fast float [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = fsub fast float -0.000000e+00, [[TMP49]] ; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]]) ; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32 ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]]) @@ -1325,7 +1325,7 @@ define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP67:%.*]] = fdiv fast float 1.000000e+00, [[TMP66]] ; CHECK-NEXT: [[TMP68:%.*]] = fmul fast float [[TMP65]], [[TMP67]] ; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.trunc.f32(float [[TMP68]]) -; CHECK-NEXT: [[TMP70:%.*]] = fneg fast float [[TMP69]] +; CHECK-NEXT: [[TMP70:%.*]] = fsub fast float -0.000000e+00, [[TMP69]] ; CHECK-NEXT: [[TMP71:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP70]], float [[TMP66]], float [[TMP65]]) ; CHECK-NEXT: [[TMP72:%.*]] = fptoui float [[TMP69]] to i32 ; CHECK-NEXT: [[TMP73:%.*]] = call fast float @llvm.fabs.f32(float [[TMP71]]) @@ -1355,7 +1355,7 @@ define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) -; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) ; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) @@ -1377,7 +1377,7 @@ define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast float 1.000000e+00, [[TMP28]] ; CHECK-NEXT: [[TMP30:%.*]] = fmul fast float [[TMP27]], [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]]) -; CHECK-NEXT: [[TMP32:%.*]] = fneg fast float [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = fsub fast float -0.000000e+00, [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]]) ; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32 ; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) @@ -1399,7 +1399,7 @@ define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP51:%.*]] = fdiv fast float 1.000000e+00, [[TMP50]] ; CHECK-NEXT: [[TMP52:%.*]] = fmul fast float [[TMP49]], [[TMP51]] ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]]) -; CHECK-NEXT: [[TMP54:%.*]] = fneg fast float [[TMP53]] +; CHECK-NEXT: [[TMP54:%.*]] = fsub fast float -0.000000e+00, [[TMP53]] ; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]]) ; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32 ; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]]) @@ -1421,7 +1421,7 @@ define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP73:%.*]] = fdiv fast float 1.000000e+00, [[TMP72]] ; CHECK-NEXT: [[TMP74:%.*]] = fmul fast float [[TMP71]], [[TMP73]] ; CHECK-NEXT: [[TMP75:%.*]] = call fast float @llvm.trunc.f32(float [[TMP74]]) -; CHECK-NEXT: [[TMP76:%.*]] = fneg fast float [[TMP75]] +; CHECK-NEXT: [[TMP76:%.*]] = fsub fast float -0.000000e+00, [[TMP75]] ; CHECK-NEXT: [[TMP77:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP76]], float [[TMP72]], float [[TMP71]]) ; CHECK-NEXT: [[TMP78:%.*]] = fptoui float [[TMP75]] to i32 ; CHECK-NEXT: [[TMP79:%.*]] = call fast float @llvm.fabs.f32(float [[TMP77]]) @@ -1456,7 +1456,7 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) -; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) ; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) @@ -1480,7 +1480,7 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP34:%.*]] = fdiv fast float 1.000000e+00, [[TMP33]] ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]] ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]]) -; CHECK-NEXT: [[TMP37:%.*]] = fneg fast float [[TMP36]] +; CHECK-NEXT: [[TMP37:%.*]] = fsub fast float -0.000000e+00, [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]]) ; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32 ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]]) @@ -1504,7 +1504,7 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP58:%.*]] = fdiv fast float 1.000000e+00, [[TMP57]] ; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]] ; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]]) -; CHECK-NEXT: [[TMP61:%.*]] = fneg fast float [[TMP60]] +; CHECK-NEXT: [[TMP61:%.*]] = fsub fast float -0.000000e+00, [[TMP60]] ; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]]) ; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32 ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]]) @@ -1528,7 +1528,7 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP82:%.*]] = fdiv fast float 1.000000e+00, [[TMP81]] ; CHECK-NEXT: [[TMP83:%.*]] = fmul fast float [[TMP80]], [[TMP82]] ; CHECK-NEXT: [[TMP84:%.*]] = call fast float @llvm.trunc.f32(float [[TMP83]]) -; CHECK-NEXT: [[TMP85:%.*]] = fneg fast float [[TMP84]] +; CHECK-NEXT: [[TMP85:%.*]] = fsub fast float -0.000000e+00, [[TMP84]] ; CHECK-NEXT: [[TMP86:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP85]], float [[TMP81]], float [[TMP80]]) ; CHECK-NEXT: [[TMP87:%.*]] = fptosi float [[TMP84]] to i32 ; CHECK-NEXT: [[TMP88:%.*]] = call fast float @llvm.fabs.f32(float [[TMP86]]) @@ -1562,7 +1562,7 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) -; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) ; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) @@ -1588,7 +1588,7 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP36:%.*]] = fdiv fast float 1.000000e+00, [[TMP35]] ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]]) -; CHECK-NEXT: [[TMP39:%.*]] = fneg fast float [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = fsub fast float -0.000000e+00, [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]]) ; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32 ; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]]) @@ -1614,7 +1614,7 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP62:%.*]] = fdiv fast float 1.000000e+00, [[TMP61]] ; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]] ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]]) -; CHECK-NEXT: [[TMP65:%.*]] = fneg fast float [[TMP64]] +; CHECK-NEXT: [[TMP65:%.*]] = fsub fast float -0.000000e+00, [[TMP64]] ; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]]) ; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32 ; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) @@ -1640,7 +1640,7 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; CHECK-NEXT: [[TMP88:%.*]] = fdiv fast float 1.000000e+00, [[TMP87]] ; CHECK-NEXT: [[TMP89:%.*]] = fmul fast float [[TMP86]], [[TMP88]] ; CHECK-NEXT: [[TMP90:%.*]] = call fast float @llvm.trunc.f32(float [[TMP89]]) -; CHECK-NEXT: [[TMP91:%.*]] = fneg fast float [[TMP90]] +; CHECK-NEXT: [[TMP91:%.*]] = fsub fast float -0.000000e+00, [[TMP90]] ; CHECK-NEXT: [[TMP92:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP91]], float [[TMP87]], float [[TMP86]]) ; CHECK-NEXT: [[TMP93:%.*]] = fptosi float [[TMP90]] to i32 ; CHECK-NEXT: [[TMP94:%.*]] = call fast float @llvm.fabs.f32(float [[TMP92]]) @@ -1671,7 +1671,7 @@ define amdgpu_kernel void @udiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) -; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) ; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) @@ -1698,7 +1698,7 @@ define amdgpu_kernel void @urem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) -; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) ; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) @@ -1730,7 +1730,7 @@ define amdgpu_kernel void @sdiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) ; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) @@ -1761,7 +1761,7 @@ define amdgpu_kernel void @srem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) ; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) @@ -1793,7 +1793,7 @@ define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) -; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) ; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) @@ -1813,7 +1813,7 @@ define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; CHECK-NEXT: [[TMP27:%.*]] = fdiv fast float 1.000000e+00, [[TMP26]] ; CHECK-NEXT: [[TMP28:%.*]] = fmul fast float [[TMP25]], [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]]) -; CHECK-NEXT: [[TMP30:%.*]] = fneg fast float [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = fsub fast float -0.000000e+00, [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]]) ; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32 ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]]) @@ -1833,7 +1833,7 @@ define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; CHECK-NEXT: [[TMP47:%.*]] = fdiv fast float 1.000000e+00, [[TMP46]] ; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP45]], [[TMP47]] ; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]]) -; CHECK-NEXT: [[TMP50:%.*]] = fneg fast float [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = fsub fast float -0.000000e+00, [[TMP49]] ; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]]) ; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32 ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]]) @@ -1863,7 +1863,7 @@ define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) -; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) ; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) @@ -1885,7 +1885,7 @@ define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast float 1.000000e+00, [[TMP28]] ; CHECK-NEXT: [[TMP30:%.*]] = fmul fast float [[TMP27]], [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]]) -; CHECK-NEXT: [[TMP32:%.*]] = fneg fast float [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = fsub fast float -0.000000e+00, [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]]) ; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32 ; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) @@ -1907,7 +1907,7 @@ define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; CHECK-NEXT: [[TMP51:%.*]] = fdiv fast float 1.000000e+00, [[TMP50]] ; CHECK-NEXT: [[TMP52:%.*]] = fmul fast float [[TMP49]], [[TMP51]] ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]]) -; CHECK-NEXT: [[TMP54:%.*]] = fneg fast float [[TMP53]] +; CHECK-NEXT: [[TMP54:%.*]] = fsub fast float -0.000000e+00, [[TMP53]] ; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]]) ; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32 ; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]]) @@ -1942,7 +1942,7 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) -; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) ; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) @@ -1966,7 +1966,7 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; CHECK-NEXT: [[TMP34:%.*]] = fdiv fast float 1.000000e+00, [[TMP33]] ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]] ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]]) -; CHECK-NEXT: [[TMP37:%.*]] = fneg fast float [[TMP36]] +; CHECK-NEXT: [[TMP37:%.*]] = fsub fast float -0.000000e+00, [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]]) ; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32 ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]]) @@ -1990,7 +1990,7 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; CHECK-NEXT: [[TMP58:%.*]] = fdiv fast float 1.000000e+00, [[TMP57]] ; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]] ; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]]) -; CHECK-NEXT: [[TMP61:%.*]] = fneg fast float [[TMP60]] +; CHECK-NEXT: [[TMP61:%.*]] = fsub fast float -0.000000e+00, [[TMP60]] ; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]]) ; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32 ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]]) @@ -2024,7 +2024,7 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) -; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) ; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) @@ -2050,7 +2050,7 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; CHECK-NEXT: [[TMP36:%.*]] = fdiv fast float 1.000000e+00, [[TMP35]] ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]]) -; CHECK-NEXT: [[TMP39:%.*]] = fneg fast float [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = fsub fast float -0.000000e+00, [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]]) ; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32 ; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]]) @@ -2076,7 +2076,7 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; CHECK-NEXT: [[TMP62:%.*]] = fdiv fast float 1.000000e+00, [[TMP61]] ; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]] ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]]) -; CHECK-NEXT: [[TMP65:%.*]] = fneg fast float [[TMP64]] +; CHECK-NEXT: [[TMP65:%.*]] = fsub fast float -0.000000e+00, [[TMP64]] ; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]]) ; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32 ; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) @@ -2109,7 +2109,7 @@ define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) -; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) ; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) @@ -2129,7 +2129,7 @@ define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; CHECK-NEXT: [[TMP27:%.*]] = fdiv fast float 1.000000e+00, [[TMP26]] ; CHECK-NEXT: [[TMP28:%.*]] = fmul fast float [[TMP25]], [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]]) -; CHECK-NEXT: [[TMP30:%.*]] = fneg fast float [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = fsub fast float -0.000000e+00, [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]]) ; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32 ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]]) @@ -2149,7 +2149,7 @@ define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; CHECK-NEXT: [[TMP47:%.*]] = fdiv fast float 1.000000e+00, [[TMP46]] ; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP45]], [[TMP47]] ; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]]) -; CHECK-NEXT: [[TMP50:%.*]] = fneg fast float [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = fsub fast float -0.000000e+00, [[TMP49]] ; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]]) ; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32 ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]]) @@ -2179,7 +2179,7 @@ define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) -; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) ; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) @@ -2201,7 +2201,7 @@ define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast float 1.000000e+00, [[TMP28]] ; CHECK-NEXT: [[TMP30:%.*]] = fmul fast float [[TMP27]], [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]]) -; CHECK-NEXT: [[TMP32:%.*]] = fneg fast float [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = fsub fast float -0.000000e+00, [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]]) ; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32 ; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) @@ -2223,7 +2223,7 @@ define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; CHECK-NEXT: [[TMP51:%.*]] = fdiv fast float 1.000000e+00, [[TMP50]] ; CHECK-NEXT: [[TMP52:%.*]] = fmul fast float [[TMP49]], [[TMP51]] ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]]) -; CHECK-NEXT: [[TMP54:%.*]] = fneg fast float [[TMP53]] +; CHECK-NEXT: [[TMP54:%.*]] = fsub fast float -0.000000e+00, [[TMP53]] ; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]]) ; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32 ; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]]) @@ -2258,7 +2258,7 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) -; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) ; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) @@ -2282,7 +2282,7 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; CHECK-NEXT: [[TMP34:%.*]] = fdiv fast float 1.000000e+00, [[TMP33]] ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]] ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]]) -; CHECK-NEXT: [[TMP37:%.*]] = fneg fast float [[TMP36]] +; CHECK-NEXT: [[TMP37:%.*]] = fsub fast float -0.000000e+00, [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]]) ; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32 ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]]) @@ -2306,7 +2306,7 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; CHECK-NEXT: [[TMP58:%.*]] = fdiv fast float 1.000000e+00, [[TMP57]] ; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]] ; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]]) -; CHECK-NEXT: [[TMP61:%.*]] = fneg fast float [[TMP60]] +; CHECK-NEXT: [[TMP61:%.*]] = fsub fast float -0.000000e+00, [[TMP60]] ; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]]) ; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32 ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]]) @@ -2340,7 +2340,7 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) -; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) ; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) @@ -2366,7 +2366,7 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; CHECK-NEXT: [[TMP36:%.*]] = fdiv fast float 1.000000e+00, [[TMP35]] ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]]) -; CHECK-NEXT: [[TMP39:%.*]] = fneg fast float [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = fsub fast float -0.000000e+00, [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]]) ; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32 ; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]]) @@ -2392,7 +2392,7 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; CHECK-NEXT: [[TMP62:%.*]] = fdiv fast float 1.000000e+00, [[TMP61]] ; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]] ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]]) -; CHECK-NEXT: [[TMP65:%.*]] = fneg fast float [[TMP64]] +; CHECK-NEXT: [[TMP65:%.*]] = fsub fast float -0.000000e+00, [[TMP64]] ; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]]) ; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32 ; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) |