summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp34
-rw-r--r--llvm/test/CodeGen/X86/avx512-vselect.ll61
2 files changed, 90 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9cac59b16d7..607ba9bc689 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1381,7 +1381,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
@@ -1445,8 +1445,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
- setOperationAction(ISD::VSELECT, MVT::v32i16, Legal);
- setOperationAction(ISD::VSELECT, MVT::v64i8, Legal);
setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
@@ -1479,7 +1477,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
@@ -13817,6 +13815,11 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode()))
return SDValue();
+ // If this VSELECT has a vector if i1 as a mask, it will be directly matched
+ // with patterns on the mask registers on AVX-512.
+ if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1)
+ return Op;
+
// Try to lower this to a blend-style vector shuffle. This can handle all
// constant condition cases.
if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG))
@@ -13826,10 +13829,31 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget.hasSSE41())
return SDValue();
+ SDLoc dl(Op);
+ MVT VT = Op.getSimpleValueType();
+
+ // If the VSELECT is on a 512-bit type, we have to convert a non-i1 condition
+ // into an i1 condition so that we can use the mask-based 512-bit blend
+ // instructions.
+ if (VT.getSizeInBits() == 512) {
+ SDValue Cond = Op.getOperand(0);
+ // The vNi1 condition case should be handled above as it can be trivially
+ // lowered.
+ assert(Cond.getValueType().getScalarSizeInBits() ==
+ VT.getScalarSizeInBits() &&
+ "Should have a size-matched integer condition!");
+ // Build a mask by testing the condition against itself (tests for zero).
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ SDValue Mask = DAG.getNode(X86ISD::TESTM, dl, MaskVT, Cond, Cond);
+ // Now return a new VSELECT using the mask.
+ return DAG.getNode(ISD::VSELECT, dl, VT, Mask, Op.getOperand(1),
+ Op.getOperand(2));
+ }
+
// Only some types will be legal on some subtargets. If we can emit a legal
// VSELECT-matching blend, return Op, and but if we need to expand, return
// a null value.
- switch (Op.getSimpleValueType().SimpleTy) {
+ switch (VT.SimpleTy) {
default:
// Most of the vector types have blends past SSE4.1.
return Op;
diff --git a/llvm/test/CodeGen/X86/avx512-vselect.ll b/llvm/test/CodeGen/X86/avx512-vselect.ll
new file mode 100644
index 00000000000..1940864824f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512-vselect.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=skx | FileCheck %s --check-prefixes=CHECK,CHECK-SKX
+; RUN: llc < %s -mcpu=knl | FileCheck %s --check-prefixes=CHECK,CHECK-KNL
+
+target triple = "x86_64-unknown-unknown"
+
+define <8 x i64> @test1(<8 x i64> %m, <8 x i64> %a, <8 x i64> %b) {
+; CHECK-LABEL: test1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vpsllq $63, %zmm0, %zmm0
+; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k1
+; CHECK-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
+; CHECK-NEXT: retq
+entry:
+ %m.trunc = trunc <8 x i64> %m to <8 x i1>
+ %ret = select <8 x i1> %m.trunc, <8 x i64> %a, <8 x i64> %b
+ ret <8 x i64> %ret
+}
+
+; This is a very contrived test case to trick the legalizer into splitting the
+; v16i1 masks in the select during type legalization, and in so doing extend them
+; into two v8i64 types. This lets us ensure that the lowering code can handle
+; both formulations of vselect. All of this trickery is because we can't
+; directly form an SDAG input to the lowering.
+define <16 x double> @test2(<16 x float> %x, <16 x float> %y, <16 x double> %a, <16 x double> %b) {
+; CHECK-SKX-LABEL: test2:
+; CHECK-SKX: # BB#0: # %entry
+; CHECK-SKX-NEXT: vxorps %zmm6, %zmm6, %zmm6
+; CHECK-SKX-NEXT: vcmpltps %zmm0, %zmm6, %k0
+; CHECK-SKX-NEXT: vcmpltps %zmm6, %zmm1, %k1
+; CHECK-SKX-NEXT: korw %k1, %k0, %k0
+; CHECK-SKX-NEXT: kshiftrw $8, %k0, %k1
+; CHECK-SKX-NEXT: vpmovm2q %k1, %zmm1
+; CHECK-SKX-NEXT: vpmovm2q %k0, %zmm0
+; CHECK-SKX-NEXT: vptestmq %zmm0, %zmm0, %k1
+; CHECK-SKX-NEXT: vblendmpd %zmm2, %zmm4, %zmm0 {%k1}
+; CHECK-SKX-NEXT: vptestmq %zmm1, %zmm1, %k1
+; CHECK-SKX-NEXT: vblendmpd %zmm3, %zmm5, %zmm1 {%k1}
+; CHECK-SKX-NEXT: retq
+;
+; CHECK-KNL-LABEL: test2:
+; CHECK-KNL: # BB#0: # %entry
+; CHECK-KNL-NEXT: vpxord %zmm6, %zmm6, %zmm6
+; CHECK-KNL-NEXT: vcmpltps %zmm0, %zmm6, %k0
+; CHECK-KNL-NEXT: vcmpltps %zmm6, %zmm1, %k1
+; CHECK-KNL-NEXT: korw %k1, %k0, %k1
+; CHECK-KNL-NEXT: kshiftrw $8, %k1, %k2
+; CHECK-KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; CHECK-KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; CHECK-KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; CHECK-KNL-NEXT: vblendmpd %zmm2, %zmm4, %zmm0 {%k1}
+; CHECK-KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
+; CHECK-KNL-NEXT: vblendmpd %zmm3, %zmm5, %zmm1 {%k1}
+; CHECK-KNL-NEXT: retq
+entry:
+ %gt.m = fcmp ogt <16 x float> %x, zeroinitializer
+ %lt.m = fcmp olt <16 x float> %y, zeroinitializer
+ %m.or = or <16 x i1> %gt.m, %lt.m
+ %ret = select <16 x i1> %m.or, <16 x double> %a, <16 x double> %b
+ ret <16 x double> %ret
+}
OpenPOWER on IntegriCloud