summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp15
-rw-r--r--llvm/test/CodeGen/X86/avx512-vselect.ll59
-rw-r--r--llvm/test/CodeGen/X86/min-legal-vector-width.ll20
-rw-r--r--llvm/test/CodeGen/X86/pr34177.ll121
4 files changed, 122 insertions, 93 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 560b5729e3d..5562f400b6e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -521,9 +521,18 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
GetSplitVector(Cond, CL, CH);
// It seems to improve code to generate two narrow SETCCs as opposed to
// splitting a wide result vector.
- else if (Cond.getOpcode() == ISD::SETCC)
- SplitVecRes_SETCC(Cond.getNode(), CL, CH);
- else
+ else if (Cond.getOpcode() == ISD::SETCC) {
+ // If the condition is a vXi1 vector, and the LHS of the setcc is a legal
+ // type and the setcc result type is the same vXi1, then leave the setcc
+ // alone.
+ EVT CondLHSVT = Cond.getOperand(0).getValueType();
+ if (Cond.getValueType().getVectorElementType() == MVT::i1 &&
+ isTypeLegal(CondLHSVT) &&
+ getSetCCResultType(CondLHSVT) == Cond.getValueType())
+ std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
+ else
+ SplitVecRes_SETCC(Cond.getNode(), CL, CH);
+ } else
std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
}
diff --git a/llvm/test/CodeGen/X86/avx512-vselect.ll b/llvm/test/CodeGen/X86/avx512-vselect.ll
index 7ee4e6674e0..07e5aeac015 100644
--- a/llvm/test/CodeGen/X86/avx512-vselect.ll
+++ b/llvm/test/CodeGen/X86/avx512-vselect.ll
@@ -51,10 +51,9 @@ entry:
define <16 x i64> @test3(<16 x i8> %x, <16 x i64> %a, <16 x i64> %b) {
; CHECK-SKX-LABEL: test3:
; CHECK-SKX: # %bb.0:
-; CHECK-SKX-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
-; CHECK-SKX-NEXT: vptestnmb %xmm5, %xmm5, %k1
-; CHECK-SKX-NEXT: vptestnmb %xmm0, %xmm0, %k2
-; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k2}
+; CHECK-SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1
+; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1}
+; CHECK-SKX-NEXT: kshiftrw $8, %k1, %k1
; CHECK-SKX-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
; CHECK-SKX-NEXT: retq
;
@@ -76,10 +75,9 @@ define <16 x i64> @test3(<16 x i8> %x, <16 x i64> %a, <16 x i64> %b) {
define <16 x i64> @test4(<16 x i16> %x, <16 x i64> %a, <16 x i64> %b) {
; CHECK-SKX-LABEL: test4:
; CHECK-SKX: # %bb.0:
-; CHECK-SKX-NEXT: vextracti128 $1, %ymm0, %xmm5
-; CHECK-SKX-NEXT: vptestnmw %xmm5, %xmm5, %k1
-; CHECK-SKX-NEXT: vptestnmw %xmm0, %xmm0, %k2
-; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k2}
+; CHECK-SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1
+; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1}
+; CHECK-SKX-NEXT: kshiftrw $8, %k1, %k1
; CHECK-SKX-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
; CHECK-SKX-NEXT: retq
;
@@ -99,23 +97,13 @@ define <16 x i64> @test4(<16 x i16> %x, <16 x i64> %a, <16 x i64> %b) {
}
define <16 x i64> @test5(<16 x i32> %x, <16 x i64> %a, <16 x i64> %b) {
-; CHECK-SKX-LABEL: test5:
-; CHECK-SKX: # %bb.0:
-; CHECK-SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; CHECK-SKX-NEXT: vptestnmd %ymm5, %ymm5, %k1
-; CHECK-SKX-NEXT: vptestnmd %ymm0, %ymm0, %k2
-; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k2}
-; CHECK-SKX-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
-; CHECK-SKX-NEXT: retq
-;
-; CHECK-KNL-LABEL: test5:
-; CHECK-KNL: # %bb.0:
-; CHECK-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; CHECK-KNL-NEXT: vptestnmd %zmm5, %zmm5, %k1
-; CHECK-KNL-NEXT: vptestnmd %zmm0, %zmm0, %k2
-; CHECK-KNL-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k2}
-; CHECK-KNL-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
-; CHECK-KNL-NEXT: retq
+; CHECK-LABEL: test5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
+; CHECK-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1}
+; CHECK-NEXT: kshiftrw $8, %k1, %k1
+; CHECK-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
+; CHECK-NEXT: retq
%c = icmp eq <16 x i32> %x, zeroinitializer
%ret = select <16 x i1> %c, <16 x i64> %a, <16 x i64> %b
ret <16 x i64> %ret
@@ -124,10 +112,9 @@ define <16 x i64> @test5(<16 x i32> %x, <16 x i64> %a, <16 x i64> %b) {
define <32 x i32> @test6(<32 x i8> %x, <32 x i32> %a, <32 x i32> %b) {
; CHECK-SKX-LABEL: test6:
; CHECK-SKX: # %bb.0:
-; CHECK-SKX-NEXT: vextracti128 $1, %ymm0, %xmm5
-; CHECK-SKX-NEXT: vptestnmb %xmm5, %xmm5, %k1
-; CHECK-SKX-NEXT: vptestnmb %xmm0, %xmm0, %k2
-; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k2}
+; CHECK-SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1
+; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k1}
+; CHECK-SKX-NEXT: kshiftrd $16, %k1, %k1
; CHECK-SKX-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1}
; CHECK-SKX-NEXT: retq
;
@@ -151,10 +138,9 @@ define <32 x i32> @test6(<32 x i8> %x, <32 x i32> %a, <32 x i32> %b) {
define <32 x i32> @test7(<32 x i16> %x, <32 x i32> %a, <32 x i32> %b) {
; CHECK-SKX-LABEL: test7:
; CHECK-SKX: # %bb.0:
-; CHECK-SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; CHECK-SKX-NEXT: vptestnmw %ymm5, %ymm5, %k1
-; CHECK-SKX-NEXT: vptestnmw %ymm0, %ymm0, %k2
-; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k2}
+; CHECK-SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1
+; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k1}
+; CHECK-SKX-NEXT: kshiftrd $16, %k1, %k1
; CHECK-SKX-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1}
; CHECK-SKX-NEXT: retq
;
@@ -179,10 +165,9 @@ define <32 x i32> @test7(<32 x i16> %x, <32 x i32> %a, <32 x i32> %b) {
define <64 x i16> @test8(<64 x i8> %x, <64 x i16> %a, <64 x i16> %b) {
; CHECK-SKX-LABEL: test8:
; CHECK-SKX: # %bb.0:
-; CHECK-SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; CHECK-SKX-NEXT: vptestnmb %ymm5, %ymm5, %k1
-; CHECK-SKX-NEXT: vptestnmb %ymm0, %ymm0, %k2
-; CHECK-SKX-NEXT: vpblendmw %zmm1, %zmm3, %zmm0 {%k2}
+; CHECK-SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1
+; CHECK-SKX-NEXT: vpblendmw %zmm1, %zmm3, %zmm0 {%k1}
+; CHECK-SKX-NEXT: kshiftrq $32, %k1, %k1
; CHECK-SKX-NEXT: vpblendmw %zmm2, %zmm4, %zmm1 {%k1}
; CHECK-SKX-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
index 46e73c1f854..88329600b23 100644
--- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll
+++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
@@ -1013,9 +1013,7 @@ define void @vselect_split_v8i16_setcc(<8 x i16> %s, <8 x i16> %t, <8 x i64>* %p
; CHECK-NEXT: vmovdqa (%rsi), %ymm2
; CHECK-NEXT: vmovdqa 32(%rsi), %ymm3
; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
-; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k2
+; CHECK-NEXT: kshiftrb $4, %k1, %k2
; CHECK-NEXT: vmovdqa64 32(%rdi), %ymm3 {%k2}
; CHECK-NEXT: vmovdqa64 (%rdi), %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, (%rdx)
@@ -1035,10 +1033,8 @@ define void @vselect_split_v8i32_setcc(<8 x i32> %s, <8 x i32> %t, <8 x i64>* %p
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rsi), %ymm2
; CHECK-NEXT: vmovdqa 32(%rsi), %ymm3
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
-; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k2
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
+; CHECK-NEXT: kshiftrb $4, %k1, %k2
; CHECK-NEXT: vmovdqa64 32(%rdi), %ymm3 {%k2}
; CHECK-NEXT: vmovdqa64 (%rdi), %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, (%rdx)
@@ -1059,9 +1055,7 @@ define void @vselect_split_v16i8_setcc(<16 x i8> %s, <16 x i8> %t, <16 x i32>* %
; CHECK-NEXT: vmovdqa (%rsi), %ymm2
; CHECK-NEXT: vmovdqa 32(%rsi), %ymm3
; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1
-; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k2
+; CHECK-NEXT: kshiftrw $8, %k1, %k2
; CHECK-NEXT: vmovdqa32 32(%rdi), %ymm3 {%k2}
; CHECK-NEXT: vmovdqa32 (%rdi), %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, (%rdx)
@@ -1081,10 +1075,8 @@ define void @vselect_split_v16i16_setcc(<16 x i16> %s, <16 x i16> %t, <16 x i32>
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rsi), %ymm2
; CHECK-NEXT: vmovdqa 32(%rsi), %ymm3
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
-; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k2
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
+; CHECK-NEXT: kshiftrw $8, %k1, %k2
; CHECK-NEXT: vmovdqa32 32(%rdi), %ymm3 {%k2}
; CHECK-NEXT: vmovdqa32 (%rdi), %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, (%rdx)
diff --git a/llvm/test/CodeGen/X86/pr34177.ll b/llvm/test/CodeGen/X86/pr34177.ll
index 056682bb275..f8ead6352f1 100644
--- a/llvm/test/CodeGen/X86/pr34177.ll
+++ b/llvm/test/CodeGen/X86/pr34177.ll
@@ -6,45 +6,88 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @test(<4 x i64> %a, <4 x x86_fp80> %b, <8 x x86_fp80>* %c) local_unnamed_addr {
-; CHECK-LABEL: test:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq %xmm0, %rax
-; CHECK-NEXT: vpextrq $1, %xmm0, %rcx
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vmovq %xmm0, %rdx
-; CHECK-NEXT: vpextrq $1, %xmm0, %rsi
-; CHECK-NEXT: cmpq $3, %rsi
-; CHECK-NEXT: fld1
-; CHECK-NEXT: fldz
-; CHECK-NEXT: fld %st(0)
-; CHECK-NEXT: fcmove %st(2), %st
-; CHECK-NEXT: cmpq $2, %rdx
-; CHECK-NEXT: fld %st(1)
-; CHECK-NEXT: fcmove %st(3), %st
-; CHECK-NEXT: cmpq $1, %rcx
-; CHECK-NEXT: fld %st(2)
-; CHECK-NEXT: fcmove %st(4), %st
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: fxch %st(3)
-; CHECK-NEXT: fcmove %st(4), %st
-; CHECK-NEXT: fstp %st(4)
-; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
-; CHECK-NEXT: fstpt 70(%rdi)
-; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
-; CHECK-NEXT: fstpt 50(%rdi)
-; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
-; CHECK-NEXT: fstpt 30(%rdi)
-; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
-; CHECK-NEXT: fstpt 10(%rdi)
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: fadd %st, %st(0)
-; CHECK-NEXT: fstpt 60(%rdi)
-; CHECK-NEXT: fadd %st, %st(0)
-; CHECK-NEXT: fstpt 40(%rdi)
-; CHECK-NEXT: fadd %st, %st(0)
-; CHECK-NEXT: fstpt 20(%rdi)
-; CHECK-NEXT: fadd %st, %st(0)
-; CHECK-NEXT: fstpt (%rdi)
+; AVX512F-LABEL: test:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovq %xmm0, %rax
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT: vmovq %xmm0, %rdx
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX512F-NEXT: cmpq $3, %rsi
+; AVX512F-NEXT: fld1
+; AVX512F-NEXT: fldz
+; AVX512F-NEXT: fld %st(0)
+; AVX512F-NEXT: fcmove %st(2), %st
+; AVX512F-NEXT: cmpq $2, %rdx
+; AVX512F-NEXT: fld %st(1)
+; AVX512F-NEXT: fcmove %st(3), %st
+; AVX512F-NEXT: cmpq $1, %rcx
+; AVX512F-NEXT: fld %st(2)
+; AVX512F-NEXT: fcmove %st(4), %st
+; AVX512F-NEXT: testq %rax, %rax
+; AVX512F-NEXT: fxch %st(3)
+; AVX512F-NEXT: fcmove %st(4), %st
+; AVX512F-NEXT: fstp %st(4)
+; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: fstpt 70(%rdi)
+; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: fstpt 50(%rdi)
+; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: fstpt 30(%rdi)
+; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: fstpt 10(%rdi)
+; AVX512F-NEXT: fxch %st(1)
+; AVX512F-NEXT: fadd %st, %st(0)
+; AVX512F-NEXT: fstpt 60(%rdi)
+; AVX512F-NEXT: fadd %st, %st(0)
+; AVX512F-NEXT: fstpt 40(%rdi)
+; AVX512F-NEXT: fadd %st, %st(0)
+; AVX512F-NEXT: fstpt 20(%rdi)
+; AVX512F-NEXT: fadd %st, %st(0)
+; AVX512F-NEXT: fstpt (%rdi)
+;
+; AVX512VL-LABEL: test:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpcmpeqq {{.*}}(%rip), %ymm0, %k0
+; AVX512VL-NEXT: kshiftrb $2, %k0, %k1
+; AVX512VL-NEXT: kshiftrb $1, %k0, %k2
+; AVX512VL-NEXT: kmovd %k0, %eax
+; AVX512VL-NEXT: testb $1, %al
+; AVX512VL-NEXT: fld1
+; AVX512VL-NEXT: fldz
+; AVX512VL-NEXT: fld %st(0)
+; AVX512VL-NEXT: fcmovne %st(2), %st
+; AVX512VL-NEXT: kshiftrb $1, %k1, %k0
+; AVX512VL-NEXT: kmovd %k0, %eax
+; AVX512VL-NEXT: testb $1, %al
+; AVX512VL-NEXT: fld %st(1)
+; AVX512VL-NEXT: fcmovne %st(3), %st
+; AVX512VL-NEXT: kmovd %k1, %eax
+; AVX512VL-NEXT: testb $1, %al
+; AVX512VL-NEXT: fld %st(2)
+; AVX512VL-NEXT: fcmovne %st(4), %st
+; AVX512VL-NEXT: kmovd %k2, %eax
+; AVX512VL-NEXT: testb $1, %al
+; AVX512VL-NEXT: fxch %st(3)
+; AVX512VL-NEXT: fcmovne %st(4), %st
+; AVX512VL-NEXT: fstp %st(4)
+; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: fstpt 70(%rdi)
+; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: fstpt 50(%rdi)
+; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: fstpt 30(%rdi)
+; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: fstpt 10(%rdi)
+; AVX512VL-NEXT: fxch %st(1)
+; AVX512VL-NEXT: fadd %st, %st(0)
+; AVX512VL-NEXT: fstpt (%rdi)
+; AVX512VL-NEXT: fadd %st, %st(0)
+; AVX512VL-NEXT: fstpt 60(%rdi)
+; AVX512VL-NEXT: fadd %st, %st(0)
+; AVX512VL-NEXT: fstpt 40(%rdi)
+; AVX512VL-NEXT: fadd %st, %st(0)
+; AVX512VL-NEXT: fstpt 20(%rdi)
%1 = icmp eq <4 x i64> <i64 0, i64 1, i64 2, i64 3>, %a
%2 = select <4 x i1> %1, <4 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000>, <4 x x86_fp80> zeroinitializer
%3 = fadd <4 x x86_fp80> %2, %2
OpenPOWER on IntegriCloud