summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp64
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp38
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h113
3 files changed, 65 insertions, 150 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index bd4638f147a..2455d3bb3d9 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -267,6 +267,9 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
+ Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
+ Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
+ Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
Name == "sse.cvtsi2ss" || // Added in 7.0
Name == "sse.cvtsi642ss" || // Added in 7.0
Name == "sse2.cvtsi2sd" || // Added in 7.0
@@ -2599,6 +2602,67 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
: CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
+ } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
+ Name.startswith("avx512.mask.vpermt2var.") ||
+ Name.startswith("avx512.maskz.vpermt2var."))) {
+ bool ZeroMask = Name[11] == 'z';
+ bool IndexForm = Name[17] == 'i';
+ unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
+ unsigned EltWidth = CI->getType()->getScalarSizeInBits();
+ bool IsFloat = CI->getType()->isFPOrFPVectorTy();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && EltWidth == 32 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
+ else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_d_128;
+ else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
+ else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_q_128;
+ else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
+ else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_d_256;
+ else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
+ else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_q_256;
+ else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
+ else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_d_512;
+ else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
+ else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_q_512;
+ else if (VecWidth == 128 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
+ else if (VecWidth == 256 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
+ else if (VecWidth == 512 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
+ else if (VecWidth == 128 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
+ else if (VecWidth == 256 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
+ else if (VecWidth == 512 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
+ CI->getArgOperand(2) };
+
+ // If this isn't index form we need to swap operand 0 and 1.
+ if (!IndexForm)
+ std::swap(Args[0], Args[1]);
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
+ Args);
+ Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
+ : Builder.CreateBitCast(CI->getArgOperand(1),
+ CI->getType());
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
} else if (IsX86 && Name.startswith("avx512.mask.") &&
upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
// Rep will be updated by the call in the condition.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 09bd7bf274a..15eeddf0faa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20515,44 +20515,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Swap Src1 and Src2 in the node creation
return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
}
- case VPERM_3OP_MASKZ:
- case VPERM_3OP_MASK:{
- // Src2 is the PassThru
- SDValue Src1 = Op.getOperand(1);
- SDValue Src2 = Op.getOperand(2);
- SDValue Src3 = Op.getOperand(3);
- SDValue Mask = Op.getOperand(4);
- MVT VT = Op.getSimpleValueType();
-
- // set PassThru element
- SDValue PassThru;
- if (IntrData->Type == VPERM_3OP_MASKZ)
- PassThru = getZeroVector(VT, Subtarget, DAG, dl);
- else
- PassThru = DAG.getBitcast(VT, Src2);
-
- // Swap Src1 and Src2 in the node creation
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
- dl, Op.getValueType(),
- Src2, Src1, Src3),
- Mask, PassThru, Subtarget, DAG);
- }
- case VPERMI_3OP_MASK:{
- // Src2 is the PassThru
- SDValue Src1 = Op.getOperand(1);
- SDValue Src2 = Op.getOperand(2);
- SDValue Src3 = Op.getOperand(3);
- SDValue Mask = Op.getOperand(4);
- MVT VT = Op.getSimpleValueType();
-
- // set PassThru element
- SDValue PassThru = DAG.getBitcast(VT, Src2);
-
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
- dl, Op.getValueType(),
- Src1, Src2, Src3),
- Mask, PassThru, Subtarget, DAG);
- }
case FMA_OP_MASK3:
case FMA_OP_MASKZ:
case FMA_OP_MASK: {
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 3a79af9a1f5..7b5df0fd325 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -30,8 +30,7 @@ enum IntrinsicType : uint16_t {
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_IMM8_MASK,
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3,
FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3,
- IFMA_OP,
- VPERM_2OP, VPERMI_3OP_MASK, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
+ IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
@@ -1061,79 +1060,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERMI_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_512, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_512, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_512, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_512, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_qi_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_qi_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_qi_512, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
-
X86_INTRINSIC_DATA(avx512_mask_vpshld_d_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
X86_INTRINSIC_DATA(avx512_mask_vpshld_d_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
X86_INTRINSIC_DATA(avx512_mask_vpshld_d_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
@@ -1277,43 +1203,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_256, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_512, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_512, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_128, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_256, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_512, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_128, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_256, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_512, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_128, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_256, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_512, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_128, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_256, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_qi_128, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_qi_256, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_qi_512, VPERM_3OP_MASKZ,
- X86ISD::VPERMV3, 0),
-
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
OpenPOWER on IntegriCloud