diff options
author | Craig Topper <craig.topper@gmail.com> | 2017-03-19 17:11:09 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2017-03-19 17:11:09 +0000 |
commit | 5992c8d1dcc28361934b726afa5f59fbf4e53ace (patch) | |
tree | 561b5de819a29310702a6cb1dcbf7cfe101b6ae8 /llvm/lib/Target/X86/X86ISelLowering.cpp | |
parent | ff2283ec0e98ead6c4e07566ea537ce9aee985d9 (diff) | |
download | bcm5719-llvm-5992c8d1dcc28361934b726afa5f59fbf4e53ace.tar.gz bcm5719-llvm-5992c8d1dcc28361934b726afa5f59fbf4e53ace.zip |
[AVX-512] Handle kor/kand/kandn/kxor/kxnor/knot intrinsics at lowering time instead of isel
Summary:
Currently we handle these intrinsics at isel with special patterns. But as they just map to normal logic operations, we should just handle them at lowering. This will expose them to DAG combine optimizations. Right now the kor-sequence test generates a bunch of regclass copies between GR16 and VK16 that the peephole optimizer and/or register coallescing are removing to keep everything in the mask domain. By handling the logic op intrinsics earlier, these copies become bitcasts in the DAG and get removed by DAG combine which seems more robust.
This should help enable my plan to stop copying between K registers and GR8/GR16. The peephole optimizer can't remove a chain of copies between K and GR32 with insert_subreg/extract_subreg present in the chain so the kor-sequence test break. But this patch should dodge the problem entirely.
Reviewers: zvi, delena, RKSimon, igorb
Reviewed By: igorb
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D31056
llvm-svn: 298228
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 207c46443ba..e07c63b4e24 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19648,6 +19648,15 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget Src2, Src1); return DAG.getBitcast(VT, Res); } + case MASK_BINOP: { + MVT VT = Op.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()); + + SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl); + SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl); + SDValue Res = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Src2); + return DAG.getBitcast(VT, Res); + } case FIXUPIMMS: case FIXUPIMMS_MASKZ: case FIXUPIMM: @@ -19820,6 +19829,33 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + case Intrinsic::x86_avx512_knot_w: { + SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1)); + SDValue RHS = DAG.getConstant(1, dl, MVT::v16i1); + SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS); + return DAG.getBitcast(MVT::i16, Res); + } + + case Intrinsic::x86_avx512_kandn_w: { + SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1)); + // Invert LHS for the not. + LHS = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, + DAG.getConstant(1, dl, MVT::v16i1)); + SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2)); + SDValue Res = DAG.getNode(ISD::AND, dl, MVT::v16i1, LHS, RHS); + return DAG.getBitcast(MVT::i16, Res); + } + + case Intrinsic::x86_avx512_kxnor_w: { + SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1)); + SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2)); + SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS); + // Invert result for the not. + Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, Res, + DAG.getConstant(1, dl, MVT::v16i1)); + return DAG.getBitcast(MVT::i16, Res); + } + case Intrinsic::x86_sse42_pcmpistria128: case Intrinsic::x86_sse42_pcmpestria128: case Intrinsic::x86_sse42_pcmpistric128: |