summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-10-29 17:53:03 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-10-29 17:57:55 +0000
commit55cfaec9fb7b00ca1c013193b731406590c8a56b (patch)
tree3740eae1db83c095034feb156d7afb59030d9343
parent3c9063f5d2df3affba655711c2031020e6819510 (diff)
downloadbcm5719-llvm-55cfaec9fb7b00ca1c013193b731406590c8a56b.tar.gz
bcm5719-llvm-55cfaec9fb7b00ca1c013193b731406590c8a56b.zip
[X86][VBMI2] Add vector funnel shift tests
Demonstrates missed opportunity to combine to the VBMI2 SHLDV/SHRDV ops - combineOrShiftToFunnelShift should handle vector ops (and we should eventually move this to DAGCombine).
-rw-r--r--llvm/test/CodeGen/X86/avx512vbmi2-funnel-shifts.ll104
-rw-r--r--llvm/test/CodeGen/X86/avx512vbmi2vl-funnel-shifts.ll203
2 files changed, 307 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx512vbmi2-funnel-shifts.ll b/llvm/test/CodeGen/X86/avx512vbmi2-funnel-shifts.ll
new file mode 100644
index 00000000000..6a65d941c97
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512vbmi2-funnel-shifts.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 -| FileCheck %s --check-prefixes=CHECK,X64
+
+define <8 x i64> @avx512_funnel_shift_q_512(<8 x i64> %a0, <8 x i64> %a1) {
+; X86-LABEL: avx512_funnel_shift_q_512:
+; X86: # %bb.0:
+; X86-NEXT: vpsllvq {{\.LCPI.*}}, %zmm0, %zmm0
+; X86-NEXT: vpsrlvq {{\.LCPI.*}}, %zmm1, %zmm1
+; X86-NEXT: vporq %zmm1, %zmm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: avx512_funnel_shift_q_512:
+; X64: # %bb.0:
+; X64-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
+; X64-NEXT: vpsrlvq {{.*}}(%rip), %zmm1, %zmm1
+; X64-NEXT: vporq %zmm1, %zmm0, %zmm0
+; X64-NEXT: retq
+ %1 = shl <8 x i64> %a0, <i64 31, i64 33, i64 31, i64 33, i64 31, i64 33, i64 31, i64 33>
+ %2 = lshr <8 x i64> %a1, <i64 33, i64 31, i64 33, i64 31, i64 33, i64 31, i64 33, i64 31>
+ %3 = or <8 x i64> %1, %2
+ ret <8 x i64> %3
+}
+
+define <8 x i64> @avx512_funnel_shift_q_512_splat(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK-LABEL: avx512_funnel_shift_q_512_splat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllq $31, %zmm0, %zmm0
+; CHECK-NEXT: vpsrlq $33, %zmm1, %zmm1
+; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %1 = shl <8 x i64> %a0, <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>
+ %2 = lshr <8 x i64> %a1, <i64 33, i64 33, i64 33, i64 33, i64 33, i64 33, i64 33, i64 33>
+ %3 = or <8 x i64> %1, %2
+ ret <8 x i64> %3
+}
+
+define <16 x i32> @avx512_funnel_shift_d_512(<16 x i32> %a0, <16 x i32> %a1) {
+; X86-LABEL: avx512_funnel_shift_d_512:
+; X86: # %bb.0:
+; X86-NEXT: vpsllvd {{\.LCPI.*}}, %zmm0, %zmm0
+; X86-NEXT: vpsrlvd {{\.LCPI.*}}, %zmm1, %zmm1
+; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: avx512_funnel_shift_d_512:
+; X64: # %bb.0:
+; X64-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
+; X64-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1
+; X64-NEXT: vpord %zmm1, %zmm0, %zmm0
+; X64-NEXT: retq
+ %1 = shl <16 x i32> %a0, <i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17>
+ %2 = lshr <16 x i32> %a1, <i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15>
+ %3 = or <16 x i32> %1, %2
+ ret <16 x i32> %3
+}
+
+define <16 x i32> @avx512_funnel_shift_d_512_splat(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: avx512_funnel_shift_d_512_splat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpslld $15, %zmm0, %zmm0
+; CHECK-NEXT: vpsrld $17, %zmm1, %zmm1
+; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %1 = shl <16 x i32> %a0, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+ %2 = lshr <16 x i32> %a1, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
+ %3 = or <16 x i32> %1, %2
+ ret <16 x i32> %3
+}
+
+define <32 x i16> @avx512_funnel_shift_w_512(<32 x i16> %a0, <32 x i16> %a1) {
+; X86-LABEL: avx512_funnel_shift_w_512:
+; X86: # %bb.0:
+; X86-NEXT: vpsllvw {{\.LCPI.*}}, %zmm0, %zmm0
+; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %zmm1, %zmm1
+; X86-NEXT: vporq %zmm1, %zmm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: avx512_funnel_shift_w_512:
+; X64: # %bb.0:
+; X64-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
+; X64-NEXT: vpsrlvw {{.*}}(%rip), %zmm1, %zmm1
+; X64-NEXT: vporq %zmm1, %zmm0, %zmm0
+; X64-NEXT: retq
+ %1 = shl <32 x i16> %a0, <i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9>
+ %2 = lshr <32 x i16> %a1, <i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7>
+ %3 = or <32 x i16> %1, %2
+ ret <32 x i16> %3
+}
+
+define <32 x i16> @avx512_funnel_shift_w_512_splat(<32 x i16> %a0, <32 x i16> %a1) {
+; CHECK-LABEL: avx512_funnel_shift_w_512_splat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllw $7, %zmm0, %zmm0
+; CHECK-NEXT: vpsrlw $9, %zmm1, %zmm1
+; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %1 = shl <32 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ %2 = lshr <32 x i16> %a1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+ %3 = or <32 x i16> %1, %2
+ ret <32 x i16> %3
+}
+
+
diff --git a/llvm/test/CodeGen/X86/avx512vbmi2vl-funnel-shifts.ll b/llvm/test/CodeGen/X86/avx512vbmi2vl-funnel-shifts.ll
new file mode 100644
index 00000000000..ed5fb6f5e81
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512vbmi2vl-funnel-shifts.ll
@@ -0,0 +1,203 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl -| FileCheck %s --check-prefixes=CHECK,X64
+
+define <2 x i64> @avx512_funnel_shift_q_128(<2 x i64> %a0, <2 x i64> %a1) {
+; X86-LABEL: avx512_funnel_shift_q_128:
+; X86: # %bb.0:
+; X86-NEXT: vpsllvq {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-NEXT: vpsrlvq {{\.LCPI.*}}, %xmm1, %xmm1
+; X86-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: avx512_funnel_shift_q_128:
+; X64: # %bb.0:
+; X64-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpsrlvq {{.*}}(%rip), %xmm1, %xmm1
+; X64-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-NEXT: retq
+ %1 = shl <2 x i64> %a0, <i64 31, i64 33>
+ %2 = lshr <2 x i64> %a1, <i64 33, i64 31>
+ %3 = or <2 x i64> %1, %2
+ ret <2 x i64> %3
+}
+
+define <4 x i64> @avx512_funnel_shift_q_256(<4 x i64> %a0, <4 x i64> %a1) {
+; X86-LABEL: avx512_funnel_shift_q_256:
+; X86: # %bb.0:
+; X86-NEXT: vpsllvq {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-NEXT: vpsrlvq {{\.LCPI.*}}, %ymm1, %ymm1
+; X86-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: avx512_funnel_shift_q_256:
+; X64: # %bb.0:
+; X64-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vpsrlvq {{.*}}(%rip), %ymm1, %ymm1
+; X64-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-NEXT: retq
+ %1 = shl <4 x i64> %a0, <i64 31, i64 33, i64 31, i64 33>
+ %2 = lshr <4 x i64> %a1, <i64 33, i64 31, i64 33, i64 31>
+ %3 = or <4 x i64> %1, %2
+ ret <4 x i64> %3
+}
+
+define <2 x i64> @avx512_funnel_shift_q_128_splat(<2 x i64> %a0, <2 x i64> %a1) {
+; CHECK-LABEL: avx512_funnel_shift_q_128_splat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllq $31, %xmm0, %xmm0
+; CHECK-NEXT: vpsrlq $33, %xmm1, %xmm1
+; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %1 = shl <2 x i64> %a0, <i64 31, i64 31>
+ %2 = lshr <2 x i64> %a1, <i64 33, i64 33>
+ %3 = or <2 x i64> %1, %2
+ ret <2 x i64> %3
+}
+
+define <4 x i64> @avx512_funnel_shift_q_256_splat(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: avx512_funnel_shift_q_256_splat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllq $31, %ymm0, %ymm0
+; CHECK-NEXT: vpsrlq $33, %ymm1, %ymm1
+; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %1 = shl <4 x i64> %a0, <i64 31, i64 31, i64 31, i64 31>
+ %2 = lshr <4 x i64> %a1, <i64 33, i64 33, i64 33, i64 33>
+ %3 = or <4 x i64> %1, %2
+ ret <4 x i64> %3
+}
+
+define <4 x i32> @avx512_funnel_shift_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+; X86-LABEL: avx512_funnel_shift_d_128:
+; X86: # %bb.0:
+; X86-NEXT: vpsllvd {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm1, %xmm1
+; X86-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: avx512_funnel_shift_d_128:
+; X64: # %bb.0:
+; X64-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
+; X64-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-NEXT: retq
+ %1 = shl <4 x i32> %a0, <i32 15, i32 17, i32 15, i32 17>
+ %2 = lshr <4 x i32> %a1, <i32 17, i32 15, i32 17, i32 15>
+ %3 = or <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <8 x i32> @avx512_funnel_shift_d_256(<8 x i32> %a0, <8 x i32> %a1) {
+; X86-LABEL: avx512_funnel_shift_d_256:
+; X86: # %bb.0:
+; X86-NEXT: vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm1, %ymm1
+; X86-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: avx512_funnel_shift_d_256:
+; X64: # %bb.0:
+; X64-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1
+; X64-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-NEXT: retq
+ %1 = shl <8 x i32> %a0, <i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17>
+ %2 = lshr <8 x i32> %a1, <i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15>
+ %3 = or <8 x i32> %1, %2
+ ret <8 x i32> %3
+}
+
+define <4 x i32> @avx512_funnel_shift_d_128_splat(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: avx512_funnel_shift_d_128_splat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpslld $15, %xmm0, %xmm0
+; CHECK-NEXT: vpsrld $17, %xmm1, %xmm1
+; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %1 = shl <4 x i32> %a0, <i32 15, i32 15, i32 15, i32 15>
+ %2 = lshr <4 x i32> %a1, <i32 17, i32 17, i32 17, i32 17>
+ %3 = or <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <8 x i32> @avx512_funnel_shift_d_256_splat(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: avx512_funnel_shift_d_256_splat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpslld $15, %ymm0, %ymm0
+; CHECK-NEXT: vpsrld $17, %ymm1, %ymm1
+; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %1 = shl <8 x i32> %a0, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+ %2 = lshr <8 x i32> %a1, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
+ %3 = or <8 x i32> %1, %2
+ ret <8 x i32> %3
+}
+
+define <8 x i16> @avx512_funnel_shift_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+; X86-LABEL: avx512_funnel_shift_w_128:
+; X86: # %bb.0:
+; X86-NEXT: vpsllvw {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %xmm1, %xmm1
+; X86-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: avx512_funnel_shift_w_128:
+; X64: # %bb.0:
+; X64-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vpsrlvw {{.*}}(%rip), %xmm1, %xmm1
+; X64-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-NEXT: retq
+ %1 = shl <8 x i16> %a0, <i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9>
+ %2 = lshr <8 x i16> %a1, <i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7>
+ %3 = or <8 x i16> %1, %2
+ ret <8 x i16> %3
+}
+
+define <16 x i16> @avx512_funnel_shift_w_256(<16 x i16> %a0, <16 x i16> %a1) {
+; X86-LABEL: avx512_funnel_shift_w_256:
+; X86: # %bb.0:
+; X86-NEXT: vpsllvw {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %ymm1, %ymm1
+; X86-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: avx512_funnel_shift_w_256:
+; X64: # %bb.0:
+; X64-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vpsrlvw {{.*}}(%rip), %ymm1, %ymm1
+; X64-NEXT: vpor %ymm1, %ymm0, %ymm0
+; X64-NEXT: retq
+ %1 = shl <16 x i16> %a0, <i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9>
+ %2 = lshr <16 x i16> %a1, <i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7>
+ %3 = or <16 x i16> %1, %2
+ ret <16 x i16> %3
+}
+
+define <8 x i16> @avx512_funnel_shift_w_128_splat(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: avx512_funnel_shift_w_128_splat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
+; CHECK-NEXT: vpsrlw $9, %xmm1, %xmm1
+; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %1 = shl <8 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ %2 = lshr <8 x i16> %a1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+ %3 = or <8 x i16> %1, %2
+ ret <8 x i16> %3
+}
+
+define <16 x i16> @avx512_funnel_shift_w_256_splat(<16 x i16> %a0, <16 x i16> %a1) {
+; CHECK-LABEL: avx512_funnel_shift_w_256_splat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0
+; CHECK-NEXT: vpsrlw $9, %ymm1, %ymm1
+; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %1 = shl <16 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ %2 = lshr <16 x i16> %a1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+ %3 = or <16 x i16> %1, %2
+ ret <16 x i16> %3
+}
+
+
OpenPOWER on IntegriCloud