From e14df4b2365948f67069b9ec378852baf6c9da88 Mon Sep 17 00:00:00 2001 From: Konstantin Zhuravlyov Date: Wed, 28 Sep 2016 20:05:39 +0000 Subject: [AMDGPU] Promote uniform i16 ops to i32 ops for targets that have 16 bit instructions Differential Revision: https://reviews.llvm.org/D24125 llvm-svn: 282624 --- .../CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll | 246 ++++++ .../AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll | 856 +++++++++++++++++++++ llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll | 246 ------ llvm/test/CodeGen/AMDGPU/ctlz.ll | 158 ++-- llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll | 212 +++++ llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll | 83 ++ llvm/test/CodeGen/AMDGPU/mul_uint24.ll | 197 ----- 7 files changed, 1476 insertions(+), 522 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll create mode 100644 llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll create mode 100644 llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/mul_uint24.ll (limited to 'llvm/test/CodeGen/AMDGPU') diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll new file mode 100644 index 00000000000..d78c75165be --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll @@ -0,0 +1,246 @@ +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s +; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s +; Make sure this doesn't crash with no triple + +; NOOP-LABEL: @noop_fdiv_fpmath( +; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0 +define void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 { + %md.25ulp = fdiv float %a, %b, !fpmath !0 + store volatile float %md.25ulp, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: @fdiv_fpmath( +; CHECK: %no.md = fdiv float %a, %b{{$}} +; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1 +; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2 +; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3 +; CHECK: %fast.md.25ulp = call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +; CHECK: arcp.md.25ulp = call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +define void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 { + %no.md = fdiv float %a, %b + store volatile float %no.md, float addrspace(1)* %out + + %md.half.ulp = fdiv float %a, %b, !fpmath !1 + store volatile float %md.half.ulp, float addrspace(1)* %out + + %md.1ulp = fdiv float %a, %b, !fpmath !2 + store volatile float %md.1ulp, float addrspace(1)* %out + + %md.25ulp = fdiv float %a, %b, !fpmath !0 + store volatile float %md.25ulp, float addrspace(1)* %out + + %md.3ulp = fdiv float %a, %b, !fpmath !3 + store volatile float %md.3ulp, float addrspace(1)* %out + + %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 + store volatile float %fast.md.25ulp, float addrspace(1)* %out + + %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 + store volatile float %arcp.md.25ulp, float addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @rcp_fdiv_fpmath( +; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}} +; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0 +; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1 +; CHECK: %arcp.no.md = fdiv arcp float 1.000000e+00, %x{{$}} +; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0 +; CHECK: %fast.no.md = fdiv fast float 1.000000e+00, %x{{$}} +; CHECK: %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0 +define void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 { + %no.md = fdiv float 1.0, %x + store volatile float %no.md, float addrspace(1)* %out + + %md.25ulp = fdiv float 1.0, %x, !fpmath !0 + store volatile float %md.25ulp, float addrspace(1)* %out + + %md.half.ulp = fdiv float 1.0, %x, !fpmath !1 + store volatile float %md.half.ulp, float addrspace(1)* %out + + %arcp.no.md = fdiv arcp float 1.0, %x + store volatile float %arcp.no.md, float addrspace(1)* %out + + %arcp.25ulp = fdiv arcp float 1.0, %x, !fpmath !0 + store volatile float %arcp.25ulp, float addrspace(1)* %out + + %fast.no.md = fdiv fast float 1.0, %x + store volatile float %fast.no.md, float addrspace(1)* %out + + %fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0 + store volatile float %fast.25ulp, float addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @fdiv_fpmath_vector( +; CHECK: %no.md = fdiv <2 x float> %a, %b{{$}} +; CHECK: %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1 +; CHECK: %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2 + +; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0 +; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0 +; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]]), !fpmath !0 +; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0 +; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1 +; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1 +; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]), !fpmath !0 +; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1 +define void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 { + %no.md = fdiv <2 x float> %a, %b + store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out + + %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1 + store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out + + %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2 + store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out + + %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0 + store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @rcp_fdiv_fpmath_vector( +; CHECK: %no.md = fdiv <2 x float> , %x{{$}} +; CHECK: %md.half.ulp = fdiv <2 x float> , %x, !fpmath !1 +; CHECK: %arcp.no.md = fdiv arcp <2 x float> , %x{{$}} +; CHECK: %fast.no.md = fdiv fast <2 x float> , %x{{$}} + +; CHECK: extractelement <2 x float> %x +; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 +; CHECK: extractelement <2 x float> %x +; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 +; CHECK: store volatile <2 x float> %arcp.25ulp + +; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 +; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 +; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out +define void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 { + %no.md = fdiv <2 x float> , %x + store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out + + %md.half.ulp = fdiv <2 x float> , %x, !fpmath !1 + store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out + + %arcp.no.md = fdiv arcp <2 x float> , %x + store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out + + %fast.no.md = fdiv fast <2 x float> , %x + store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out + + %arcp.25ulp = fdiv arcp <2 x float> , %x, !fpmath !0 + store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out + + %fast.25ulp = fdiv fast <2 x float> , %x, !fpmath !0 + store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat( +; CHECK: %no.md = fdiv <2 x float> , %x +; CHECK: %arcp.no.md = fdiv arcp <2 x float> , %x +; CHECK: %fast.no.md = fdiv fast <2 x float> , %x{{$}} + +; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0 +; CHECK: fdiv arcp float 1.000000e+00, %[[X0]], !fpmath !0 +; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1 +; CHECK: fdiv arcp float 2.000000e+00, %[[X1]], !fpmath !0 +; CHECK: store volatile <2 x float> %arcp.25ulp + +; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0 +; CHECK: fdiv fast float 1.000000e+00, %[[X0]], !fpmath !0 +; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1 +; CHECK: fdiv fast float 2.000000e+00, %[[X1]], !fpmath !0 +; CHECK: store volatile <2 x float> %fast.25ulp +define void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 { + %no.md = fdiv <2 x float> , %x + store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out + + %arcp.no.md = fdiv arcp <2 x float> , %x + store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out + + %fast.no.md = fdiv fast <2 x float> , %x + store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out + + %arcp.25ulp = fdiv arcp <2 x float> , %x, !fpmath !0 + store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out + + %fast.25ulp = fdiv fast <2 x float> , %x, !fpmath !0 + store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out + + ret void +} + +; FIXME: Should be able to get fdiv for 1.0 component +; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant( +; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 +; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 +; CHECK: store volatile <2 x float> %arcp.25ulp + +; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 +; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 +; CHECK: store volatile <2 x float> %fast.25ulp +define void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 { + %x.insert = insertelement <2 x float> %x, float 1.0, i32 0 + + %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0 + store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out + + %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0 + store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @fdiv_fpmath_f32_denormals( +; CHECK: %no.md = fdiv float %a, %b{{$}} +; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1 +; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2 +; CHECK: %md.25ulp = fdiv float %a, %b, !fpmath !0 +; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3 +; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +define void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 { + %no.md = fdiv float %a, %b + store volatile float %no.md, float addrspace(1)* %out + + %md.half.ulp = fdiv float %a, %b, !fpmath !1 + store volatile float %md.half.ulp, float addrspace(1)* %out + + %md.1ulp = fdiv float %a, %b, !fpmath !2 + store volatile float %md.1ulp, float addrspace(1)* %out + + %md.25ulp = fdiv float %a, %b, !fpmath !0 + store volatile float %md.25ulp, float addrspace(1)* %out + + %md.3ulp = fdiv float %a, %b, !fpmath !3 + store volatile float %md.3ulp, float addrspace(1)* %out + + %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 + store volatile float %fast.md.25ulp, float addrspace(1)* %out + + %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 + store volatile float %arcp.md.25ulp, float addrspace(1)* %out + + ret void +} + +attributes #0 = { nounwind optnone noinline } +attributes #1 = { nounwind } +attributes #2 = { nounwind "target-features"="+fp32-denormals" } + +; CHECK: !0 = !{float 2.500000e+00} +; CHECK: !1 = !{float 5.000000e-01} +; CHECK: !2 = !{float 1.000000e+00} +; CHECK: !3 = !{float 3.000000e+00} + +!0 = !{float 2.500000e+00} +!1 = !{float 5.000000e-01} +!2 = !{float 1.000000e+00} +!3 = !{float 3.000000e+00} diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll new file mode 100644 index 00000000000..ed512b4ddd9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll @@ -0,0 +1,856 @@ +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=SI %s +; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=VI %s + +; SI-NOT: zext +; SI-NOT: sext +; SI-NOT: trunc + +; VI-LABEL: @add_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = add i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @add_i16(i16 %a, i16 %b) { + %r = add i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @add_nsw_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = add nsw i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @add_nsw_i16(i16 %a, i16 %b) { + %r = add nsw i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @add_nuw_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = add nuw i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @add_nuw_i16(i16 %a, i16 %b) { + %r = add nuw i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @add_nuw_nsw_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @add_nuw_nsw_i16(i16 %a, i16 %b) { + %r = add nuw nsw i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @sub_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = sub i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @sub_i16(i16 %a, i16 %b) { + %r = sub i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @sub_nsw_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @sub_nsw_i16(i16 %a, i16 %b) { + %r = sub nsw i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @sub_nuw_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = sub nuw i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @sub_nuw_i16(i16 %a, i16 %b) { + %r = sub nuw i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @sub_nuw_nsw_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @sub_nuw_nsw_i16(i16 %a, i16 %b) { + %r = sub nuw nsw i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @mul_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = mul i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @mul_i16(i16 %a, i16 %b) { + %r = mul i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @mul_nsw_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = mul nsw i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @mul_nsw_i16(i16 %a, i16 %b) { + %r = mul nsw i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @mul_nuw_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @mul_nuw_i16(i16 %a, i16 %b) { + %r = mul nuw i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @mul_nuw_nsw_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @mul_nuw_nsw_i16(i16 %a, i16 %b) { + %r = mul nuw nsw i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @urem_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = urem i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @urem_i16(i16 %a, i16 %b) { + %r = urem i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @srem_i16( +; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = sext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = srem i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @srem_i16(i16 %a, i16 %b) { + %r = srem i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @shl_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = shl i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @shl_i16(i16 %a, i16 %b) { + %r = shl i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @shl_nsw_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = shl nsw i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @shl_nsw_i16(i16 %a, i16 %b) { + %r = shl nsw i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @shl_nuw_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = shl nuw i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @shl_nuw_i16(i16 %a, i16 %b) { + %r = shl nuw i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @shl_nuw_nsw_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @shl_nuw_nsw_i16(i16 %a, i16 %b) { + %r = shl nuw nsw i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @lshr_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @lshr_i16(i16 %a, i16 %b) { + %r = lshr i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @lshr_exact_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @lshr_exact_i16(i16 %a, i16 %b) { + %r = lshr exact i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @ashr_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @ashr_i16(i16 %a, i16 %b) { + %r = ashr i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @ashr_exact_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @ashr_exact_i16(i16 %a, i16 %b) { + %r = ashr exact i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @and_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @and_i16(i16 %a, i16 %b) { + %r = and i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @or_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @or_i16(i16 %a, i16 %b) { + %r = or i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @xor_i16( +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI: ret i16 %[[R_16]] +define i16 @xor_i16(i16 %a, i16 %b) { + %r = xor i16 %a, %b + ret i16 %r +} + +; VI-LABEL: @select_eq_i16( +; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 +; VI: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 +; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI: ret i16 %[[SEL_16]] +define i16 @select_eq_i16(i16 %a, i16 %b) { + %cmp = icmp eq i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +; VI-LABEL: @select_ne_i16( +; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 +; VI: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 +; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI: ret i16 %[[SEL_16]] +define i16 @select_ne_i16(i16 %a, i16 %b) { + %cmp = icmp ne i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +; VI-LABEL: @select_ugt_i16( +; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 +; VI: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 +; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI: ret i16 %[[SEL_16]] +define i16 @select_ugt_i16(i16 %a, i16 %b) { + %cmp = icmp ugt i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +; VI-LABEL: @select_uge_i16( +; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 +; VI: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 +; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI: ret i16 %[[SEL_16]] +define i16 @select_uge_i16(i16 %a, i16 %b) { + %cmp = icmp uge i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +; VI-LABEL: @select_ult_i16( +; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 +; VI: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 +; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI: ret i16 %[[SEL_16]] +define i16 @select_ult_i16(i16 %a, i16 %b) { + %cmp = icmp ult i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +; VI-LABEL: @select_ule_i16( +; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 +; VI: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = zext i16 %a to i32 +; VI: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 +; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI: ret i16 %[[SEL_16]] +define i16 @select_ule_i16(i16 %a, i16 %b) { + %cmp = icmp ule i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +; VI-LABEL: @select_sgt_i16( +; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 +; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 +; VI: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 +; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 +; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI: ret i16 %[[SEL_16]] +define i16 @select_sgt_i16(i16 %a, i16 %b) { + %cmp = icmp sgt i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +; VI-LABEL: @select_sge_i16( +; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 +; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 +; VI: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 +; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 +; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI: ret i16 %[[SEL_16]] +define i16 @select_sge_i16(i16 %a, i16 %b) { + %cmp = icmp sge i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +; VI-LABEL: @select_slt_i16( +; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 +; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 +; VI: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 +; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 +; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI: ret i16 %[[SEL_16]] +define i16 @select_slt_i16(i16 %a, i16 %b) { + %cmp = icmp slt i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +; VI-LABEL: @select_sle_i16( +; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 +; VI: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 +; VI: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = sext i16 %a to i32 +; VI: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 +; VI: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 +; VI: ret i16 %[[SEL_16]] +define i16 @select_sle_i16(i16 %a, i16 %b) { + %cmp = icmp sle i16 %a, %b + %sel = select i1 %cmp, i16 %a, i16 %b + ret i16 %sel +} + +; VI-LABEL: @add_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = add <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @add_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = add <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @add_nsw_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = add nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = add nsw <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @add_nuw_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = add nuw <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = add nuw <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @add_nuw_nsw_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = add nuw nsw <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @sub_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = sub <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @sub_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = sub <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @sub_nsw_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = sub nsw <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @sub_nuw_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = sub nuw <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = sub nuw <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @sub_nuw_nsw_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = sub nuw nsw <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @mul_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = mul <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @mul_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = mul <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @mul_nsw_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = mul nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = mul nsw <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @mul_nuw_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = mul nuw <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @mul_nuw_nsw_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = mul nuw nsw <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @urem_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = urem <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @urem_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = urem <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @srem_3xi16( +; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = srem <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @srem_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = srem <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @shl_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = shl <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @shl_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = shl <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @shl_nsw_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = shl nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = shl nsw <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @shl_nuw_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = shl nuw <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = shl nuw <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @shl_nuw_nsw_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = shl nuw nsw <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @lshr_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = lshr <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @lshr_exact_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = lshr exact <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @ashr_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = ashr <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @ashr_exact_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = ashr exact <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @and_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @and_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = and <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @or_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @or_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = or <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @xor_3xi16( +; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]] +; VI: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> +; VI: ret <3 x i16> %[[R_16]] +define <3 x i16> @xor_3xi16(<3 x i16> %a, <3 x i16> %b) { + %r = xor <3 x i16> %a, %b + ret <3 x i16> %r +} + +; VI-LABEL: @select_eq_3xi16( +; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI: ret <3 x i16> %[[SEL_16]] +define <3 x i16> @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) { + %cmp = icmp eq <3 x i16> %a, %b + %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b + ret <3 x i16> %sel +} + +; VI-LABEL: @select_ne_3xi16( +; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI: ret <3 x i16> %[[SEL_16]] +define <3 x i16> @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) { + %cmp = icmp ne <3 x i16> %a, %b + %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b + ret <3 x i16> %sel +} + +; VI-LABEL: @select_ugt_3xi16( +; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI: ret <3 x i16> %[[SEL_16]] +define <3 x i16> @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) { + %cmp = icmp ugt <3 x i16> %a, %b + %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b + ret <3 x i16> %sel +} + +; VI-LABEL: @select_uge_3xi16( +; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI: ret <3 x i16> %[[SEL_16]] +define <3 x i16> @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) { + %cmp = icmp uge <3 x i16> %a, %b + %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b + ret <3 x i16> %sel +} + +; VI-LABEL: @select_ult_3xi16( +; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI: ret <3 x i16> %[[SEL_16]] +define <3 x i16> @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) { + %cmp = icmp ult <3 x i16> %a, %b + %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b + ret <3 x i16> %sel +} + +; VI-LABEL: @select_ule_3xi16( +; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = zext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> +; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI: ret <3 x i16> %[[SEL_16]] +define <3 x i16> @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) { + %cmp = icmp ule <3 x i16> %a, %b + %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b + ret <3 x i16> %sel +} + +; VI-LABEL: @select_sgt_3xi16( +; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI: ret <3 x i16> %[[SEL_16]] +define <3 x i16> @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) { + %cmp = icmp sgt <3 x i16> %a, %b + %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b + ret <3 x i16> %sel +} + +; VI-LABEL: @select_sge_3xi16( +; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI: ret <3 x i16> %[[SEL_16]] +define <3 x i16> @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) { + %cmp = icmp sge <3 x i16> %a, %b + %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b + ret <3 x i16> %sel +} + +; VI-LABEL: @select_slt_3xi16( +; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI: ret <3 x i16> %[[SEL_16]] +define <3 x i16> @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) { + %cmp = icmp slt <3 x i16> %a, %b + %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b + ret <3 x i16> %sel +} + +; VI-LABEL: @select_sle_3xi16( +; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]] +; VI: %[[A_32_1:[0-9]+]] = sext <3 x i16> %a to <3 x i32> +; VI: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> +; VI: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] +; VI: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> +; VI: ret <3 x i16> %[[SEL_16]] +define <3 x i16> @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) { + %cmp = icmp sle <3 x i16> %a, %b + %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b + ret <3 x i16> %sel +} diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll deleted file mode 100644 index d78c75165be..00000000000 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll +++ /dev/null @@ -1,246 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s -; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s -; Make sure this doesn't crash with no triple - -; NOOP-LABEL: @noop_fdiv_fpmath( -; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0 -define void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 { - %md.25ulp = fdiv float %a, %b, !fpmath !0 - store volatile float %md.25ulp, float addrspace(1)* %out - ret void -} - -; CHECK-LABEL: @fdiv_fpmath( -; CHECK: %no.md = fdiv float %a, %b{{$}} -; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1 -; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2 -; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 -; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3 -; CHECK: %fast.md.25ulp = call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 -; CHECK: arcp.md.25ulp = call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 -define void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 { - %no.md = fdiv float %a, %b - store volatile float %no.md, float addrspace(1)* %out - - %md.half.ulp = fdiv float %a, %b, !fpmath !1 - store volatile float %md.half.ulp, float addrspace(1)* %out - - %md.1ulp = fdiv float %a, %b, !fpmath !2 - store volatile float %md.1ulp, float addrspace(1)* %out - - %md.25ulp = fdiv float %a, %b, !fpmath !0 - store volatile float %md.25ulp, float addrspace(1)* %out - - %md.3ulp = fdiv float %a, %b, !fpmath !3 - store volatile float %md.3ulp, float addrspace(1)* %out - - %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 - store volatile float %fast.md.25ulp, float addrspace(1)* %out - - %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 - store volatile float %arcp.md.25ulp, float addrspace(1)* %out - - ret void -} - -; CHECK-LABEL: @rcp_fdiv_fpmath( -; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}} -; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0 -; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1 -; CHECK: %arcp.no.md = fdiv arcp float 1.000000e+00, %x{{$}} -; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0 -; CHECK: %fast.no.md = fdiv fast float 1.000000e+00, %x{{$}} -; CHECK: %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0 -define void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 { - %no.md = fdiv float 1.0, %x - store volatile float %no.md, float addrspace(1)* %out - - %md.25ulp = fdiv float 1.0, %x, !fpmath !0 - store volatile float %md.25ulp, float addrspace(1)* %out - - %md.half.ulp = fdiv float 1.0, %x, !fpmath !1 - store volatile float %md.half.ulp, float addrspace(1)* %out - - %arcp.no.md = fdiv arcp float 1.0, %x - store volatile float %arcp.no.md, float addrspace(1)* %out - - %arcp.25ulp = fdiv arcp float 1.0, %x, !fpmath !0 - store volatile float %arcp.25ulp, float addrspace(1)* %out - - %fast.no.md = fdiv fast float 1.0, %x - store volatile float %fast.no.md, float addrspace(1)* %out - - %fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0 - store volatile float %fast.25ulp, float addrspace(1)* %out - - ret void -} - -; CHECK-LABEL: @fdiv_fpmath_vector( -; CHECK: %no.md = fdiv <2 x float> %a, %b{{$}} -; CHECK: %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1 -; CHECK: %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2 - -; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0 -; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0 -; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]]), !fpmath !0 -; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0 -; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1 -; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1 -; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]), !fpmath !0 -; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1 -define void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 { - %no.md = fdiv <2 x float> %a, %b - store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out - - %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1 - store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out - - %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2 - store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out - - %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0 - store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out - - ret void -} - -; CHECK-LABEL: @rcp_fdiv_fpmath_vector( -; CHECK: %no.md = fdiv <2 x float> , %x{{$}} -; CHECK: %md.half.ulp = fdiv <2 x float> , %x, !fpmath !1 -; CHECK: %arcp.no.md = fdiv arcp <2 x float> , %x{{$}} -; CHECK: %fast.no.md = fdiv fast <2 x float> , %x{{$}} - -; CHECK: extractelement <2 x float> %x -; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 -; CHECK: extractelement <2 x float> %x -; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 -; CHECK: store volatile <2 x float> %arcp.25ulp - -; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 -; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 -; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out -define void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 { - %no.md = fdiv <2 x float> , %x - store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out - - %md.half.ulp = fdiv <2 x float> , %x, !fpmath !1 - store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out - - %arcp.no.md = fdiv arcp <2 x float> , %x - store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out - - %fast.no.md = fdiv fast <2 x float> , %x - store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out - - %arcp.25ulp = fdiv arcp <2 x float> , %x, !fpmath !0 - store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out - - %fast.25ulp = fdiv fast <2 x float> , %x, !fpmath !0 - store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out - - ret void -} - -; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat( -; CHECK: %no.md = fdiv <2 x float> , %x -; CHECK: %arcp.no.md = fdiv arcp <2 x float> , %x -; CHECK: %fast.no.md = fdiv fast <2 x float> , %x{{$}} - -; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0 -; CHECK: fdiv arcp float 1.000000e+00, %[[X0]], !fpmath !0 -; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1 -; CHECK: fdiv arcp float 2.000000e+00, %[[X1]], !fpmath !0 -; CHECK: store volatile <2 x float> %arcp.25ulp - -; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0 -; CHECK: fdiv fast float 1.000000e+00, %[[X0]], !fpmath !0 -; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1 -; CHECK: fdiv fast float 2.000000e+00, %[[X1]], !fpmath !0 -; CHECK: store volatile <2 x float> %fast.25ulp -define void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 { - %no.md = fdiv <2 x float> , %x - store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out - - %arcp.no.md = fdiv arcp <2 x float> , %x - store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out - - %fast.no.md = fdiv fast <2 x float> , %x - store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out - - %arcp.25ulp = fdiv arcp <2 x float> , %x, !fpmath !0 - store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out - - %fast.25ulp = fdiv fast <2 x float> , %x, !fpmath !0 - store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out - - ret void -} - -; FIXME: Should be able to get fdiv for 1.0 component -; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant( -; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 -; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 -; CHECK: store volatile <2 x float> %arcp.25ulp - -; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 -; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 -; CHECK: store volatile <2 x float> %fast.25ulp -define void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 { - %x.insert = insertelement <2 x float> %x, float 1.0, i32 0 - - %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0 - store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out - - %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0 - store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out - - ret void -} - -; CHECK-LABEL: @fdiv_fpmath_f32_denormals( -; CHECK: %no.md = fdiv float %a, %b{{$}} -; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1 -; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2 -; CHECK: %md.25ulp = fdiv float %a, %b, !fpmath !0 -; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3 -; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 -; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 -define void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 { - %no.md = fdiv float %a, %b - store volatile float %no.md, float addrspace(1)* %out - - %md.half.ulp = fdiv float %a, %b, !fpmath !1 - store volatile float %md.half.ulp, float addrspace(1)* %out - - %md.1ulp = fdiv float %a, %b, !fpmath !2 - store volatile float %md.1ulp, float addrspace(1)* %out - - %md.25ulp = fdiv float %a, %b, !fpmath !0 - store volatile float %md.25ulp, float addrspace(1)* %out - - %md.3ulp = fdiv float %a, %b, !fpmath !3 - store volatile float %md.3ulp, float addrspace(1)* %out - - %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 - store volatile float %fast.md.25ulp, float addrspace(1)* %out - - %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 - store volatile float %arcp.md.25ulp, float addrspace(1)* %out - - ret void -} - -attributes #0 = { nounwind optnone noinline } -attributes #1 = { nounwind } -attributes #2 = { nounwind "target-features"="+fp32-denormals" } - -; CHECK: !0 = !{float 2.500000e+00} -; CHECK: !1 = !{float 5.000000e-01} -; CHECK: !2 = !{float 1.000000e+00} -; CHECK: !3 = !{float 3.000000e+00} - -!0 = !{float 2.500000e+00} -!1 = !{float 5.000000e-01} -!2 = !{float 1.000000e+00} -!3 = !{float 3.000000e+00} diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll index e9d26a225e3..6a8666f9ad4 100644 --- a/llvm/test/CodeGen/AMDGPU/ctlz.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i7 @llvm.ctlz.i7(i7, i1) nounwind readnone @@ -17,13 +17,13 @@ declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; FUNC-LABEL: {{^}}s_ctlz_i32: -; SI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} -; SI-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]] -; SI-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}} -; SI-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]] -; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[VCTLZ]], 32, [[CMPZ]] -; SI: buffer_store_dword [[RESULT]] -; SI: s_endpgm +; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]] +; GCN-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}} +; GCN-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]] +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[VCTLZ]], 32, [[CMPZ]] +; GCN: buffer_store_dword [[RESULT]] +; GCN: s_endpgm ; EG: FFBH_UINT ; EG: CNDE_INT @@ -34,12 +34,12 @@ define void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { } ; FUNC-LABEL: {{^}}v_ctlz_i32: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI-DAG: v_ffbh_u32_e32 [[CTLZ:v[0-9]+]], [[VAL]] -; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]] -; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[CTLZ]], 32, vcc -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN-DAG: v_ffbh_u32_e32 [[CTLZ:v[0-9]+]], [[VAL]] +; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]] +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[CTLZ]], 32, vcc +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm ; EG: FFBH_UINT ; EG: CNDE_INT @@ -51,11 +51,11 @@ define void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalia } ; FUNC-LABEL: {{^}}v_ctlz_v2i32: -; SI: buffer_load_dwordx2 -; SI: v_ffbh_u32_e32 -; SI: v_ffbh_u32_e32 -; SI: buffer_store_dwordx2 -; SI: s_endpgm +; GCN: buffer_load_dwordx2 +; GCN: v_ffbh_u32_e32 +; GCN: v_ffbh_u32_e32 +; GCN: buffer_store_dwordx2 +; GCN: s_endpgm ; EG: FFBH_UINT ; EG: CNDE_INT @@ -69,13 +69,13 @@ define void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrsp } ; FUNC-LABEL: {{^}}v_ctlz_v4i32: -; SI: buffer_load_dwordx4 -; SI: v_ffbh_u32_e32 -; SI: v_ffbh_u32_e32 -; SI: v_ffbh_u32_e32 -; SI: v_ffbh_u32_e32 -; SI: buffer_store_dwordx4 -; SI: s_endpgm +; GCN: buffer_load_dwordx4 +; GCN: v_ffbh_u32_e32 +; GCN: v_ffbh_u32_e32 +; GCN: v_ffbh_u32_e32 +; GCN: v_ffbh_u32_e32 +; GCN: buffer_store_dwordx4 +; GCN: s_endpgm ; EG-DAG: FFBH_UINT @@ -97,12 +97,12 @@ define void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrsp } ; FUNC-LABEL: {{^}}v_ctlz_i8: -; SI: buffer_load_ubyte [[VAL:v[0-9]+]], -; SI-DAG: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]] -; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]] -; SI-DAG: v_cndmask_b32_e64 [[CORRECTED_FFBH:v[0-9]+]], [[FFBH]], 32, vcc -; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[CORRECTED_FFBH]] -; SI: buffer_store_byte [[RESULT]], +; GCN: buffer_load_ubyte [[VAL:v[0-9]+]], +; GCN-DAG: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]] +; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]] +; GCN-DAG: v_cndmask_b32_e64 [[CORRECTED_FFBH:v[0-9]+]], [[FFBH]], 32, vcc +; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[CORRECTED_FFBH]] +; GCN: buffer_store_byte [[RESULT]], define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind { %val = load i8, i8 addrspace(1)* %valptr %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone @@ -111,16 +111,16 @@ define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias % } ; FUNC-LABEL: {{^}}s_ctlz_i64: -; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} -; SI-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}} -; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]] -; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32 -; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]] -; SI-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[ADD]] -; SI-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]] -; SI-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]] -; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}} -; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}} +; GCN: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}} +; GCN-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]] +; GCN-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32 +; GCN-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]] +; GCN-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[ADD]] +; GCN-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]] +; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]] +; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}} +; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}} define void @s_ctlz_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind { %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false) store i64 %ctlz, i64 addrspace(1)* %out @@ -136,17 +136,17 @@ define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind } ; FUNC-LABEL: {{^}}v_ctlz_i64: -; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}} -; SI-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; SI-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]] -; SI-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]] -; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] -; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]] -; SI-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]] -; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]] -; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]] -; SI-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc -; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}} +; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} +; GCN-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]] +; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]] +; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] +; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]] +; GCN-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]] +; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]] +; GCN-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]] +; GCN-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc +; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}} define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -170,10 +170,10 @@ define void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* } ; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_neg1: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { %val = load i32, i32 addrspace(1)* %valptr %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone @@ -184,10 +184,10 @@ define void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* } ; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_neg1: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm define void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { %val = load i32, i32 addrspace(1)* %valptr %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone @@ -199,11 +199,11 @@ define void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspac ; TODO: Should be able to eliminate select here as well. ; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_bitwidth: -; SI: buffer_load_dword -; SI: v_ffbh_u32_e32 -; SI: v_cmp -; SI: v_cndmask -; SI: s_endpgm +; GCN: buffer_load_dword +; GCN: v_ffbh_u32_e32 +; GCN: v_cmp +; GCN: v_cndmask +; GCN: s_endpgm define void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { %val = load i32, i32 addrspace(1)* %valptr %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone @@ -214,11 +214,11 @@ define void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addr } ; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_bitwidth: -; SI: buffer_load_dword -; SI: v_ffbh_u32_e32 -; SI: v_cmp -; SI: v_cndmask -; SI: s_endpgm +; GCN: buffer_load_dword +; GCN: v_ffbh_u32_e32 +; GCN: v_cmp +; GCN: v_cndmask +; GCN: s_endpgm define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { %val = load i32, i32 addrspace(1)* %valptr %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone @@ -229,9 +229,9 @@ define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addr } ; FUNC-LABEL: {{^}}v_ctlz_i8_sel_eq_neg1: -; SI: buffer_load_ubyte [[VAL:v[0-9]+]], -; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]] -; SI: buffer_store_byte [[FFBH]], +; GCN: buffer_load_ubyte [[VAL:v[0-9]+]], +; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]] +; GCN: buffer_store_byte [[FFBH]], define void @v_ctlz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind { %val = load i8, i8 addrspace(1)* %valptr %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone @@ -255,10 +255,10 @@ define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addr } ; FUNC-LABEL: {{^}}v_ctlz_i7_sel_eq_neg1: -; SI: buffer_load_ubyte [[VAL:v[0-9]+]], -; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]] -; SI: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]] -; SI: buffer_store_byte [[TRUNC]], +; GCN: buffer_load_ubyte [[VAL:v[0-9]+]], +; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]] +; GCN: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]] +; GCN: buffer_store_byte [[TRUNC]], define void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind { %val = load i7, i7 addrspace(1)* %valptr %ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone diff --git a/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll b/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll new file mode 100644 index 00000000000..05c4dfa5acf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll @@ -0,0 +1,212 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s + +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone + +; FUNC-LABEL: {{^}}test_umul24_i32: +; GCN: v_mul_u32_u24 +define void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { +entry: + %0 = shl i32 %a, 8 + %a_24 = lshr i32 %0, 8 + %1 = shl i32 %b, 8 + %b_24 = lshr i32 %1, 8 + %2 = mul i32 %a_24, %b_24 + store i32 %2, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_umul24_i16_sext: +; SI: v_mul_u32_u24_e{{(32|64)}} [[VI_MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} +; SI: v_bfe_i32 v{{[0-9]}}, [[VI_MUL]], 0, 16 +; VI: s_mul_i32 [[SI_MUL:s[0-9]]], s{{[0-9]}}, s{{[0-9]}} +; VI: s_sext_i32_i16 s{{[0-9]}}, [[SI_MUL]] +define void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) { +entry: + %mul = mul i16 %a, %b + %ext = sext i16 %mul to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_umul24_i16_vgpr_sext: +; GCN: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} +; GCN: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 16 +define void @test_umul24_i16_vgpr_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { + %tid.x = call i32 @llvm.amdgcn.workitem.id.x() + %tid.y = call i32 @llvm.amdgcn.workitem.id.y() + %ptr_a = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.x + %ptr_b = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.y + %a = load i16, i16 addrspace(1)* %ptr_a + %b = load i16, i16 addrspace(1)* %ptr_b + %mul = mul i16 %a, %b + %val = sext i16 %mul to i32 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_umul24_i16: +; SI: s_and_b32 +; SI: v_mul_u32_u24_e32 +; SI: v_and_b32_e32 +; VI: s_mul_i32 +; VI: s_and_b32 +; VI: v_mov_b32_e32 +define void @test_umul24_i16(i32 addrspace(1)* %out, i16 %a, i16 %b) { +entry: + %mul = mul i16 %a, %b + %ext = zext i16 %mul to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_umul24_i16_vgpr: +; GCN: v_mul_u32_u24_e32 +; GCN: v_and_b32_e32 +define void @test_umul24_i16_vgpr(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { + %tid.x = call i32 @llvm.amdgcn.workitem.id.x() + %tid.y = call i32 @llvm.amdgcn.workitem.id.y() + %ptr_a = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.x + %ptr_b = getelementptr i16, i16 addrspace(1)* %in, i32 %tid.y + %a = load i16, i16 addrspace(1)* %ptr_a + %b = load i16, i16 addrspace(1)* %ptr_b + %mul = mul i16 %a, %b + %val = zext i16 %mul to i32 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_umul24_i8: +; GCN: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} +; GCN: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8 +define void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) { +entry: + %mul = mul i8 %a, %b + %ext = sext i8 %mul to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_umulhi24_i32_i64: +; GCN-NOT: and +; GCN: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]], +; GCN-NEXT: buffer_store_dword [[RESULT]] +define void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) { +entry: + %a.24 = and i32 %a, 16777215 + %b.24 = and i32 %b, 16777215 + %a.24.i64 = zext i32 %a.24 to i64 + %b.24.i64 = zext i32 %b.24 to i64 + %mul48 = mul i64 %a.24.i64, %b.24.i64 + %mul48.hi = lshr i64 %mul48, 32 + %mul24hi = trunc i64 %mul48.hi to i32 + store i32 %mul24hi, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_umulhi24: +; GCN-NOT: and +; GCN: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]], +; GCN-NEXT: buffer_store_dword [[RESULT]] +define void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %a.24 = and i64 %a, 16777215 + %b.24 = and i64 %b, 16777215 + %mul48 = mul i64 %a.24, %b.24 + %mul48.hi = lshr i64 %mul48, 32 + %mul24.hi = trunc i64 %mul48.hi to i32 + store i32 %mul24.hi, i32 addrspace(1)* %out + ret void +} + +; Multiply with 24-bit inputs and 64-bit output. +; FUNC-LABEL: {{^}}test_umul24_i64: +; GCN-NOT: and +; GCN-NOT: lshr +; GCN-DAG: v_mul_u32_u24_e32 +; GCN-DAG: v_mul_hi_u32_u24_e32 +; GCN: buffer_store_dwordx2 +define void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %tmp0 = shl i64 %a, 40 + %a_24 = lshr i64 %tmp0, 40 + %tmp1 = shl i64 %b, 40 + %b_24 = lshr i64 %tmp1, 40 + %tmp2 = mul i64 %a_24, %b_24 + store i64 %tmp2, i64 addrspace(1)* %out + ret void +} + +; FIXME: Should be able to eliminate the and. +; FUNC-LABEL: {{^}}test_umul24_i64_square: +; GCN: s_load_dword [[A:s[0-9]+]] +; GCN: s_and_b32 [[TRUNC:s[0-9]+]], [[A]], 0xffffff{{$}} +; GCN-DAG: v_mul_hi_u32_u24_e64 v{{[0-9]+}}, [[TRUNC]], [[TRUNC]] +; GCN-DAG: v_mul_u32_u24_e64 v{{[0-9]+}}, [[TRUNC]], [[TRUNC]] +define void @test_umul24_i64_square(i64 addrspace(1)* %out, i64 %a) { +entry: + %tmp0 = shl i64 %a, 40 + %a.24 = lshr i64 %tmp0, 40 + %tmp2 = mul i64 %a.24, %a.24 + store i64 %tmp2, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_umulhi16_i32: +; GCN: s_and_b32 +; GCN: s_and_b32 +; GCN: v_mul_u32_u24_e32 [[MUL24:v[0-9]+]] +; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, [[MUL24]] +define void @test_umulhi16_i32(i16 addrspace(1)* %out, i32 %a, i32 %b) { +entry: + %a.16 = and i32 %a, 65535 + %b.16 = and i32 %b, 65535 + %mul = mul i32 %a.16, %b.16 + %hi = lshr i32 %mul, 16 + %mulhi = trunc i32 %hi to i16 + store i16 %mulhi, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_umul24_i33: +; GCN: s_load_dword s +; GCN: s_load_dword s +; GCN-NOT: and +; GCN-NOT: lshr +; GCN-DAG: v_mul_u32_u24_e32 v[[MUL_LO:[0-9]+]], +; GCN-DAG: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]], +; GCN-DAG: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]] +; GCN: buffer_store_dwordx2 v{{\[}}[[MUL_LO]]:[[HI]]{{\]}} +define void @test_umul24_i33(i64 addrspace(1)* %out, i33 %a, i33 %b) { +entry: + %tmp0 = shl i33 %a, 9 + %a_24 = lshr i33 %tmp0, 9 + %tmp1 = shl i33 %b, 9 + %b_24 = lshr i33 %tmp1, 9 + %tmp2 = mul i33 %a_24, %b_24 + %ext = zext i33 %tmp2 to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_umulhi24_i33: +; GCN: s_load_dword s +; GCN: s_load_dword s +; GCN-NOT: and +; GCN-NOT: lshr +; GCN: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]], +; GCN-NEXT: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]] +; GCN-NEXT: buffer_store_dword v[[HI]] +define void @test_umulhi24_i33(i32 addrspace(1)* %out, i33 %a, i33 %b) { +entry: + %tmp0 = shl i33 %a, 9 + %a_24 = lshr i33 %tmp0, 9 + %tmp1 = shl i33 %b, 9 + %b_24 = lshr i33 %tmp1, 9 + %tmp2 = mul i33 %a_24, %b_24 + %hi = lshr i33 %tmp2, 32 + %trunc = trunc i33 %hi to i32 + store i32 %trunc, i32 addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll b/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll new file mode 100644 index 00000000000..da1c111fa5c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll @@ -0,0 +1,83 @@ +; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}test_umul24_i32: +; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W +define void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { +entry: + %0 = shl i32 %a, 8 + %a_24 = lshr i32 %0, 8 + %1 = shl i32 %b, 8 + %b_24 = lshr i32 %1, 8 + %2 = mul i32 %a_24, %b_24 + store i32 %2, i32 addrspace(1)* %out + ret void +} + +; The result must be sign-extended. +; FUNC-LABEL: {{^}}test_umul24_i16_sext: +; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]] +; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x +; EG: 16 +define void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) { +entry: + %mul = mul i16 %a, %b + %ext = sext i16 %mul to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; The result must be sign-extended. +; FUNC-LABEL: {{^}}test_umul24_i8: +; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]] +; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x +define void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) { +entry: + %mul = mul i8 %a, %b + %ext = sext i8 %mul to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_umulhi24_i32_i64: +; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W +define void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) { +entry: + %a.24 = and i32 %a, 16777215 + %b.24 = and i32 %b, 16777215 + %a.24.i64 = zext i32 %a.24 to i64 + %b.24.i64 = zext i32 %b.24 to i64 + %mul48 = mul i64 %a.24.i64, %b.24.i64 + %mul48.hi = lshr i64 %mul48, 32 + %mul24hi = trunc i64 %mul48.hi to i32 + store i32 %mul24hi, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_umulhi24: +; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y +define void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %a.24 = and i64 %a, 16777215 + %b.24 = and i64 %b, 16777215 + %mul48 = mul i64 %a.24, %b.24 + %mul48.hi = lshr i64 %mul48, 32 + %mul24.hi = trunc i64 %mul48.hi to i32 + store i32 %mul24.hi, i32 addrspace(1)* %out + ret void +} + +; Multiply with 24-bit inputs and 64-bit output. +; FUNC-LABEL: {{^}}test_umul24_i64: +; EG; MUL_UINT24 +; EG: MULHI +define void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %tmp0 = shl i64 %a, 40 + %a_24 = lshr i64 %tmp0, 40 + %tmp1 = shl i64 %b, 40 + %b_24 = lshr i64 %tmp1, 40 + %tmp2 = mul i64 %a_24, %b_24 + store i64 %tmp2, i64 addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/mul_uint24.ll b/llvm/test/CodeGen/AMDGPU/mul_uint24.ll deleted file mode 100644 index b882a4dd634..00000000000 --- a/llvm/test/CodeGen/AMDGPU/mul_uint24.ll +++ /dev/null @@ -1,197 +0,0 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s - -; FUNC-LABEL: {{^}}test_umul24_i32: -; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W -; SI: v_mul_u32_u24 -define void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { -entry: - %0 = shl i32 %a, 8 - %a_24 = lshr i32 %0, 8 - %1 = shl i32 %b, 8 - %b_24 = lshr i32 %1, 8 - %2 = mul i32 %a_24, %b_24 - store i32 %2, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}test_umul24_i16_sext: -; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]] -; The result must be sign-extended -; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x -; EG: 16 - -; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} -; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 16 -define void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) { -entry: - %mul = mul i16 %a, %b - %ext = sext i16 %mul to i32 - store i32 %ext, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}test_umul24_i16: -; SI: s_and_b32 -; SI: v_mul_u32_u24_e32 -; SI: v_and_b32_e32 -define void @test_umul24_i16(i32 addrspace(1)* %out, i16 %a, i16 %b) { -entry: - %mul = mul i16 %a, %b - %ext = zext i16 %mul to i32 - store i32 %ext, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}test_umul24_i8: -; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]] -; The result must be sign-extended -; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x -; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} -; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8 - -define void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) { -entry: - %mul = mul i8 %a, %b - %ext = sext i8 %mul to i32 - store i32 %ext, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}test_umulhi24_i32_i64: -; SI-NOT: and -; SI: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]], -; SI-NEXT: buffer_store_dword [[RESULT]] - -; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W -define void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) { -entry: - %a.24 = and i32 %a, 16777215 - %b.24 = and i32 %b, 16777215 - %a.24.i64 = zext i32 %a.24 to i64 - %b.24.i64 = zext i32 %b.24 to i64 - %mul48 = mul i64 %a.24.i64, %b.24.i64 - %mul48.hi = lshr i64 %mul48, 32 - %mul24hi = trunc i64 %mul48.hi to i32 - store i32 %mul24hi, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}test_umulhi24: -; SI-NOT: and -; SI: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]], -; SI-NEXT: buffer_store_dword [[RESULT]] - -; EG: MULHI_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y -define void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) { -entry: - %a.24 = and i64 %a, 16777215 - %b.24 = and i64 %b, 16777215 - %mul48 = mul i64 %a.24, %b.24 - %mul48.hi = lshr i64 %mul48, 32 - %mul24.hi = trunc i64 %mul48.hi to i32 - store i32 %mul24.hi, i32 addrspace(1)* %out - ret void -} - -; Multiply with 24-bit inputs and 64-bit output -; FUNC-LABEL: {{^}}test_umul24_i64: -; EG; MUL_UINT24 -; EG: MULHI - -; SI-NOT: and -; SI-NOT: lshr - -; SI-DAG: v_mul_u32_u24_e32 -; SI-DAG: v_mul_hi_u32_u24_e32 - -; SI: buffer_store_dwordx2 -define void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { -entry: - %tmp0 = shl i64 %a, 40 - %a_24 = lshr i64 %tmp0, 40 - %tmp1 = shl i64 %b, 40 - %b_24 = lshr i64 %tmp1, 40 - %tmp2 = mul i64 %a_24, %b_24 - store i64 %tmp2, i64 addrspace(1)* %out - ret void -} - -; FIXME: Should be able to eliminate the and -; FUNC-LABEL: {{^}}test_umul24_i64_square: -; SI: s_load_dword [[A:s[0-9]+]] -; SI: s_and_b32 [[TRUNC:s[0-9]+]], [[A]], 0xffffff{{$}} -; SI-DAG: v_mul_hi_u32_u24_e64 v{{[0-9]+}}, [[TRUNC]], [[TRUNC]] -; SI-DAG: v_mul_u32_u24_e64 v{{[0-9]+}}, [[TRUNC]], [[TRUNC]] -define void @test_umul24_i64_square(i64 addrspace(1)* %out, i64 %a) { -entry: - %tmp0 = shl i64 %a, 40 - %a.24 = lshr i64 %tmp0, 40 - %tmp2 = mul i64 %a.24, %a.24 - store i64 %tmp2, i64 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}test_umulhi16_i32: -; SI: s_and_b32 -; SI: s_and_b32 -; SI: v_mul_u32_u24_e32 [[MUL24:v[0-9]+]] -; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, [[MUL24]] -define void @test_umulhi16_i32(i16 addrspace(1)* %out, i32 %a, i32 %b) { -entry: - %a.16 = and i32 %a, 65535 - %b.16 = and i32 %b, 65535 - %mul = mul i32 %a.16, %b.16 - %hi = lshr i32 %mul, 16 - %mulhi = trunc i32 %hi to i16 - store i16 %mulhi, i16 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}test_umul24_i33: -; SI: s_load_dword s -; SI: s_load_dword s - -; SI-NOT: and -; SI-NOT: lshr - -; SI-DAG: v_mul_u32_u24_e32 v[[MUL_LO:[0-9]+]], -; SI-DAG: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]], -; SI-DAG: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]] -; SI: buffer_store_dwordx2 v{{\[}}[[MUL_LO]]:[[HI]]{{\]}} -define void @test_umul24_i33(i64 addrspace(1)* %out, i33 %a, i33 %b) { -entry: - %tmp0 = shl i33 %a, 9 - %a_24 = lshr i33 %tmp0, 9 - %tmp1 = shl i33 %b, 9 - %b_24 = lshr i33 %tmp1, 9 - %tmp2 = mul i33 %a_24, %b_24 - %ext = zext i33 %tmp2 to i64 - store i64 %ext, i64 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}test_umulhi24_i33: -; SI: s_load_dword s -; SI: s_load_dword s - -; SI-NOT: and -; SI-NOT: lshr - -; SI: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]], -; SI-NEXT: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]] -; SI-NEXT: buffer_store_dword v[[HI]] -define void @test_umulhi24_i33(i32 addrspace(1)* %out, i33 %a, i33 %b) { -entry: - %tmp0 = shl i33 %a, 9 - %a_24 = lshr i33 %tmp0, 9 - %tmp1 = shl i33 %b, 9 - %b_24 = lshr i33 %tmp1, 9 - %tmp2 = mul i33 %a_24, %b_24 - %hi = lshr i33 %tmp2, 32 - %trunc = trunc i33 %hi to i32 - store i32 %trunc, i32 addrspace(1)* %out - ret void -} -- cgit v1.2.3