diff options
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-rotate.ll | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-rotate.ll b/llvm/test/CodeGen/X86/avx512-rotate.ll new file mode 100644 index 00000000000..4de8dfd5d60 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512-rotate.ll @@ -0,0 +1,252 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX + +declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) +declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) +declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) +declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +; Tests showing failure to replace variable rotates with immediate splat versions. + +define <16 x i32> @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { +; KNL-LABEL: test_splat_rol_v16i32: +; KNL: # BB#0: +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm3 +; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm1 {%k1} +; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vpaddd %zmm3, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_splat_rol_v16i32: +; SKX: # BB#0: +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm3 +; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm1 {%k1} +; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vpaddd %zmm3, %zmm0, %zmm0 +; SKX-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> zeroinitializer, i16 %x2) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 -1) + %res3 = add <16 x i32> %res, %res1 + %res4 = add <16 x i32> %res3, %res2 + ret <16 x i32> %res4 +} + +define <8 x i64>@test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { +; KNL-LABEL: test_splat_rol_v8i64: +; KNL: # BB#0: +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5] +; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm3 +; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm1 {%k1} +; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vpaddq %zmm3, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_splat_rol_v8i64: +; SKX: # BB#0: +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5] +; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm3 +; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm1 {%k1} +; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vpaddq %zmm3, %zmm0, %zmm0 +; SKX-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> zeroinitializer, i8 %x2) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 -1) + %res3 = add <8 x i64> %res, %res1 + %res4 = add <8 x i64> %res3, %res2 + ret <8 x i64> %res4 +} + +define <16 x i32> @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { +; KNL-LABEL: test_splat_ror_v16i32: +; KNL: # BB#0: +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm3 +; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm1 {%k1} +; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vpaddd %zmm3, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_splat_ror_v16i32: +; SKX: # BB#0: +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm3 +; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm1 {%k1} +; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vpaddd %zmm3, %zmm0, %zmm0 +; SKX-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> zeroinitializer, i16 %x2) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 -1) + %res3 = add <16 x i32> %res, %res1 + %res4 = add <16 x i32> %res3, %res2 + ret <16 x i32> %res4 +} + +define <8 x i64>@test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { +; KNL-LABEL: test_splat_ror_v8i64: +; KNL: # BB#0: +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5] +; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm3 +; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm1 {%k1} +; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vpaddq %zmm3, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_splat_ror_v8i64: +; SKX: # BB#0: +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5] +; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm3 +; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm1 {%k1} +; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vpaddq %zmm3, %zmm0, %zmm0 +; SKX-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> zeroinitializer, i8 %x2) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 -1) + %res3 = add <8 x i64> %res, %res1 + %res4 = add <8 x i64> %res3, %res2 + ret <8 x i64> %res4 +} + +; Tests showing failure to replace out-of-bounds variable rotates with in-bounds immediate splat versions. + +define <16 x i32> @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { +; KNL-LABEL: test_splat_bounds_rol_v16i32: +; KNL: # BB#0: +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1} +; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 +; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm2 {%k1} {z} +; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; KNL-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_splat_bounds_rol_v16i32: +; SKX: # BB#0: +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1} +; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 +; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm2 {%k1} {z} +; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; SKX-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1) + %res3 = add <16 x i32> %res, %res1 + %res4 = add <16 x i32> %res3, %res2 + ret <16 x i32> %res4 +} + +define <8 x i64>@test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { +; KNL-LABEL: test_splat_bounds_rol_v8i64: +; KNL: # BB#0: +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1} +; KNL-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z} +; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 +; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 +; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm0 +; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_splat_bounds_rol_v8i64: +; SKX: # BB#0: +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1} +; SKX-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z} +; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 +; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 +; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm0 +; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1) + %res3 = add <8 x i64> %res, %res1 + %res4 = add <8 x i64> %res3, %res2 + ret <8 x i64> %res4 +} + +define <16 x i32> @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { +; KNL-LABEL: test_splat_bounds_ror_v16i32: +; KNL: # BB#0: +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1} +; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 +; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm2 {%k1} {z} +; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; KNL-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_splat_bounds_ror_v16i32: +; SKX: # BB#0: +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1} +; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 +; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm2 {%k1} {z} +; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; SKX-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1) + %res3 = add <16 x i32> %res, %res1 + %res4 = add <16 x i32> %res3, %res2 + ret <16 x i32> %res4 +} + +define <8 x i64>@test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { +; KNL-LABEL: test_splat_bounds_ror_v8i64: +; KNL: # BB#0: +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1} +; KNL-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z} +; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 +; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 +; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm0 +; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_splat_bounds_ror_v8i64: +; SKX: # BB#0: +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1} +; SKX-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z} +; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 +; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 +; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm0 +; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1) + %res3 = add <8 x i64> %res, %res1 + %res4 = add <8 x i64> %res3, %res2 + ret <8 x i64> %res4 +} |