diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 53 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 150 |
2 files changed, 160 insertions, 43 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 83c1251265f..cb1c7d3c339 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -510,6 +510,7 @@ namespace { bool combineIncDecVector(SDNode *Node); bool tryShrinkShlLogicImm(SDNode *N); bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); + bool tryMatchBitSelect(SDNode *N); MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, const SDLoc &dl, MVT VT, SDNode *Node); @@ -4275,6 +4276,55 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, return true; } +// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it +// into vpternlog. +bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) { + assert(N->getOpcode() == ISD::OR && "Unexpected opcode!"); + + MVT NVT = N->getSimpleValueType(0); + + // Make sure we support VPTERNLOG. + if (!NVT.isVector() || !Subtarget->hasAVX512()) + return false; + + // We need VLX for 128/256-bit. + if (!(Subtarget->hasVLX() || NVT.is512BitVector())) + return false; + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Canonicalize AND to LHS. + if (N1.getOpcode() == ISD::AND) + std::swap(N0, N1); + + if (N0.getOpcode() != ISD::AND || + N1.getOpcode() != X86ISD::ANDNP || + !N0.hasOneUse() || !N1.hasOneUse()) + return false; + + // ANDN is not commutable, use it to pick down A and C. + SDValue A = N1.getOperand(0); + SDValue C = N1.getOperand(1); + + // AND is commutable, if one operand matches A, the other operand is B. + // Otherwise this isn't a match. + SDValue B; + if (N0.getOperand(0) == A) + B = N0.getOperand(1); + else if (N0.getOperand(1) == A) + B = N0.getOperand(0); + else + return false; + + SDLoc dl(N); + SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8); + SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm); + ReplaceNode(N, Ternlog.getNode()); + SelectCode(Ternlog.getNode()); + return true; +} + void X86DAGToDAGISel::Select(SDNode *Node) { MVT NVT = Node->getSimpleValueType(0); unsigned Opcode = Node->getOpcode(); @@ -4433,6 +4483,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) { if (tryShrinkShlLogicImm(Node)) return; + if (Opcode == ISD::OR && tryMatchBitSelect(Node)) + return; + LLVM_FALLTHROUGH; case ISD::ADD: case ISD::SUB: { diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 60bcf3e2dfd..18c95a631c9 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -11436,6 +11436,113 @@ defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, avx512vl_i64_info>, VEX_W; +// Patterns to use VPTERNLOG for vXi16/vXi8 vectors. +let Predicates = [HasVLX] in { + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, + (loadv16i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2, + VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, + (loadv8i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2, + VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, + (loadv32i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2, + VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, + (loadv16i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2, + VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; +} + +let Predicates = [HasAVX512] in { + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, + (loadv64i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2, + VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, + (loadv32i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2, + VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; +} + // Patterns to implement vnot using vpternlog instead of creating all ones // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen // so that the result is only dependent on src0. But we use the same source @@ -11533,49 +11640,6 @@ let Predicates = [HasVLX] in { (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; } -let Predicates = [HasVLX] in { - def : Pat<(v16i8 (or (and VR128X:$src1, VR128X:$src2), - (X86andnp VR128X:$src1, VR128X:$src3))), - (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>; - def : Pat<(v8i16 (or (and VR128X:$src1, VR128X:$src2), - (X86andnp VR128X:$src1, VR128X:$src3))), - (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>; - def : Pat<(v4i32 (or (and VR128X:$src1, VR128X:$src2), - (X86andnp VR128X:$src1, VR128X:$src3))), - (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>; - def : Pat<(v2i64 (or (and VR128X:$src1, VR128X:$src2), - (X86andnp VR128X:$src1, VR128X:$src3))), - (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>; - - def : Pat<(v32i8 (or (and VR256X:$src1, VR256X:$src2), - (X86andnp VR256X:$src1, VR256X:$src3))), - (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>; - def : Pat<(v16i16 (or (and VR256X:$src1, VR256X:$src2), - (X86andnp VR256X:$src1, VR256X:$src3))), - (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>; - def : Pat<(v8i32 (or (and VR256X:$src1, VR256X:$src2), - (X86andnp VR256X:$src1, VR256X:$src3))), - (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>; - def : Pat<(v4i64 (or (and VR256X:$src1, VR256X:$src2), - (X86andnp VR256X:$src1, VR256X:$src3))), - (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>; -} - -let Predicates = [HasAVX512] in { - def : Pat<(v64i8 (or (and VR512:$src1, VR512:$src2), - (X86andnp VR512:$src1, VR512:$src3))), - (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>; - def : Pat<(v32i16 (or (and VR512:$src1, VR512:$src2), - (X86andnp VR512:$src1, VR512:$src3))), - (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>; - def : Pat<(v16i32 (or (and VR512:$src1, VR512:$src2), - (X86andnp VR512:$src1, VR512:$src3))), - (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>; - def : Pat<(v8i64 (or (and VR512:$src1, VR512:$src2), - (X86andnp VR512:$src1, VR512:$src3))), - (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>; -} - //===----------------------------------------------------------------------===// // AVX-512 - FixupImm //===----------------------------------------------------------------------===// |