summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp37
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td33
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td7
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h74
5 files changed, 73 insertions, 82 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5270e471959..09bd7bf274a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4372,7 +4372,6 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::VPPERM:
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
- case X86ISD::VPERMIV3:
case X86ISD::VZEXT_MOVL:
return true;
}
@@ -4388,7 +4387,6 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
case X86ISD::VPPERM:
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
- case X86ISD::VPERMIV3:
return true;
// 'Faux' Target Shuffles.
case ISD::AND:
@@ -5977,21 +5975,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
}
return false;
}
- case X86ISD::VPERMIV3: {
- assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- assert(N->getOperand(2).getValueType() == VT && "Unexpected value type");
- IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2);
- // Unlike most shuffle nodes, VPERMIV3's mask operand is the first one.
- Ops.push_back(N->getOperand(1));
- Ops.push_back(N->getOperand(2));
- SDValue MaskNode = N->getOperand(0);
- unsigned MaskEltSize = VT.getScalarSizeInBits();
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodeVPERMV3Mask(C, MaskEltSize, Mask);
- break;
- }
- return false;
- }
default: llvm_unreachable("unknown target shuffle node");
}
@@ -20540,9 +20523,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
MVT VT = Op.getSimpleValueType();
- SDValue PassThru = SDValue();
// set PassThru element
+ SDValue PassThru;
if (IntrData->Type == VPERM_3OP_MASKZ)
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
else
@@ -20554,6 +20537,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Src2, Src1, Src3),
Mask, PassThru, Subtarget, DAG);
}
+ case VPERMI_3OP_MASK:{
+ // Src2 is the PassThru
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ MVT VT = Op.getSimpleValueType();
+
+ // set PassThru element
+ SDValue PassThru = DAG.getBitcast(VT, Src2);
+
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
+ dl, Op.getValueType(),
+ Src1, Src2, Src3),
+ Mask, PassThru, Subtarget, DAG);
+ }
case FMA_OP_MASK3:
case FMA_OP_MASKZ:
case FMA_OP_MASK: {
@@ -25873,7 +25872,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VPERMV: return "X86ISD::VPERMV";
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
- case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
@@ -38861,7 +38859,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERMI:
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
- case X86ISD::VPERMIV3:
case X86ISD::VPERMIL2:
case X86ISD::VPERMILPI:
case X86ISD::VPERMILPV:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 58d0c9d92af..dd6be5b8e48 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -428,10 +428,6 @@ namespace llvm {
// Res = VPERMV3 V0, MaskV, V1
VPERMV3,
- // 3-op Variable Permute overwriting the index (VPERMI2).
- // Res = VPERMIV3 V0, MaskV, V1
- VPERMIV3,
-
// Bitwise ternary logic.
VPTERNLOG,
// Fix Up Special Packed Float32/64 values.
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 5fecf0a0a13..3bf097bd79c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -334,6 +334,7 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
// Similar to AVX512_maskable_3src but in this case the input VT for the tied
// operand differs from the output VT. This requires a bitconvert on
// the preserved vector going into the vselect.
+// NOTE: The unmasked pattern is disabled.
multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
X86VectorVTInfo InVT,
dag Outs, dag NonTiedIns, string OpcodeStr,
@@ -343,7 +344,7 @@ multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
!con((ins InVT.RC:$src1), NonTiedIns),
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
- OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
(vselect InVT.KRCWM:$mask, RHS,
(bitconvert InVT.RC:$src1)),
vselect, "", IsCommutable>;
@@ -1719,17 +1720,19 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
-let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
+let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
+ hasSideEffects = 0 in {
defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3)), 1>,
+ (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
EVEX_4V, AVX5128IBase, Sched<[sched]>;
+ let mayLoad = 1 in
defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
+ (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
(_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
}
@@ -1738,13 +1741,14 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
- let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
+ hasSideEffects = 0, mayLoad = 1 in
defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
- (_.VT (X86VPermi2X IdxVT.RC:$src1,
- _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
+ (_.VT (X86VPermt2 _.RC:$src2,
+ IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
AVX5128IBase, EVEX_4V, EVEX_B,
Sched<[sched.Folded, ReadAfterLd]>;
}
@@ -1806,21 +1810,22 @@ multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
X86VectorVTInfo IdxVT,
X86VectorVTInfo CastVT> {
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
- (_.VT _.RC:$src2), _.RC:$src3),
+ (X86VPermt2 (_.VT _.RC:$src2),
+ (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
(!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
- _.RC:$src2, (_.LdFrag addr:$src3)),
+ (X86VPermt2 _.RC:$src2,
+ (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
+ (_.LdFrag addr:$src3)),
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
(!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
- _.RC:$src2,
- (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
+ (X86VPermt2 _.RC:$src2,
+ (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
(!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3)>;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index cbe37b64b12..5a8ca994235 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -417,13 +417,6 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
SDTCisSameSizeAs<0,2>,
SDTCisSameAs<0,3>]>, []>;
-def X86VPermi2X : SDNode<"X86ISD::VPERMIV3",
- SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
- SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
- SDTCisSameSizeAs<0,1>,
- SDTCisSameAs<0,2>,
- SDTCisSameAs<0,3>]>, []>;
-
def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index d5263767db1..3e3a62ec82e 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -31,7 +31,7 @@ enum IntrinsicType : uint16_t {
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3,
FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3,
IFMA_OP,
- VPERM_2OP, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
+ VPERM_2OP, VPERMI_3OP_MASK, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
@@ -1061,42 +1061,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
OpenPOWER on IntegriCloud