diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 38 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 26 |
3 files changed, 25 insertions, 49 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index c6c6f0055d6..a87b9bec1ed 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -342,6 +342,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "avx.cvt.ps2.pd.256" || // Added in 3.9 Name.startswith("avx.vinsertf128.") || // Added in 3.7 Name == "avx2.vinserti128" || // Added in 3.7 + Name.startswith("avx512.mask.insert") || // Added in 4.0 Name.startswith("avx.vextractf128.") || // Added in 3.7 Name == "avx2.vextracti128" || // Added in 3.7 Name.startswith("avx512.mask.vextract") || // Added in 4.0 @@ -1151,21 +1152,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); } else if (IsX86 && (Name.startswith("avx.vinsertf128.") || - Name == "avx2.vinserti128")) { + Name == "avx2.vinserti128" || + Name.startswith("avx512.mask.insert"))) { Value *Op0 = CI->getArgOperand(0); Value *Op1 = CI->getArgOperand(1); unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); - VectorType *VecTy = cast<VectorType>(CI->getType()); - unsigned NumElts = VecTy->getNumElements(); + unsigned DstNumElts = CI->getType()->getVectorNumElements(); + unsigned SrcNumElts = Op1->getType()->getVectorNumElements(); + unsigned Scale = DstNumElts / SrcNumElts; // Mask off the high bits of the immediate value; hardware ignores those. - Imm = Imm & 1; + Imm = Imm % Scale; - // Extend the second operand into a vector that is twice as big. + // Extend the second operand into a vector the size of the destination. Value *UndefV = UndefValue::get(Op1->getType()); - SmallVector<uint32_t, 8> Idxs(NumElts); - for (unsigned i = 0; i != NumElts; ++i) + SmallVector<uint32_t, 8> Idxs(DstNumElts); + for (unsigned i = 0; i != SrcNumElts; ++i) Idxs[i] = i; + for (unsigned i = SrcNumElts; i != DstNumElts; ++i) + Idxs[i] = SrcNumElts; Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs); // Insert the second operand into the first operand. @@ -1179,15 +1184,18 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > - // The low half of the result is either the low half of the 1st operand - // or the low half of the 2nd operand (the inserted vector). - for (unsigned i = 0; i != NumElts / 2; ++i) - Idxs[i] = Imm ? i : (i + NumElts); - // The high half of the result is either the low half of the 2nd operand - // (the inserted vector) or the high half of the 1st operand. - for (unsigned i = NumElts / 2; i != NumElts; ++i) - Idxs[i] = Imm ? (i + NumElts / 2) : i; + // First fill with identify mask. + for (unsigned i = 0; i != DstNumElts; ++i) + Idxs[i] = i; + // Then replace the elements where we need to insert. + for (unsigned i = 0; i != SrcNumElts; ++i) + Idxs[i + Imm * SrcNumElts] = i + DstNumElts; Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs); + + // If the intrinsic has a mask operand, handle that. + if (CI->getNumArgOperands() == 5) + Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, + CI->getArgOperand(3)); } else if (IsX86 && (Name.startswith("avx.vextractf128.") || Name == "avx2.vextracti128" || Name.startswith("avx512.mask.vextract"))) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b293dfa98f8..66e71ab107f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18660,8 +18660,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_3OP_IMM8_MASK: - case INTR_TYPE_3OP_MASK: - case INSERT_SUBVEC: { + case INTR_TYPE_3OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); @@ -18670,13 +18669,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK) Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3); - else if (IntrData->Type == INSERT_SUBVEC) { - // imm should be adapted to ISD::INSERT_SUBVECTOR behavior - assert(isa<ConstantSDNode>(Src3) && "Expected a ConstantSDNode here!"); - unsigned Imm = cast<ConstantSDNode>(Src3)->getZExtValue(); - Imm *= Src2.getSimpleValueType().getVectorNumElements(); - Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32); - } // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index df47b4ad583..63a02af02fa 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -34,7 +34,7 @@ enum IntrinsicType : uint16_t { INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, - EXPAND_FROM_MEM, INSERT_SUBVEC, + EXPAND_FROM_MEM, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK }; @@ -795,30 +795,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VGETMANTS, 0), X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM, X86ISD::VGETMANTS, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK, ISD::CTLZ, 0), X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK, |

