summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-05-28 19:33:11 +0000
committerCraig Topper <craig.topper@intel.com>2018-05-28 19:33:11 +0000
commitdcfcfdb0d166fff8388bdd2edc5a2948054c9da1 (patch)
treeed53f4e61304ba4f384eb0cc2ee6716e91a4d921
parent6b545182fbab96e6656a09e6d75d66935843d356 (diff)
downloadbcm5719-llvm-dcfcfdb0d166fff8388bdd2edc5a2948054c9da1.tar.gz
bcm5719-llvm-dcfcfdb0d166fff8388bdd2edc5a2948054c9da1.zip
[X86] Converge X86ISD::VPERMV3 and X86ISD::VPERMIV3 to a single opcode.
These do the same thing with the first and second sources swapped. They previously came from separate intrinsics that specified different masking behavior. But we can cover that with isel patterns and a single node. This is a step towards reducing the number of intrinsics needed. A bunch of tests change because we are now biased to choosing VPERMT over VPERMI when there is nothing to signal that commuting is beneficial. llvm-svn: 333383
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp37
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td33
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td7
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h74
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics.ll16
-rw-r--r--llvm/test/CodeGen/X86/avx512bw-intrinsics.ll8
-rw-r--r--llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll8
-rw-r--r--llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll10
-rw-r--r--llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll14
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-intrinsics.ll16
11 files changed, 109 insertions, 118 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5270e471959..09bd7bf274a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4372,7 +4372,6 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::VPPERM:
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
- case X86ISD::VPERMIV3:
case X86ISD::VZEXT_MOVL:
return true;
}
@@ -4388,7 +4387,6 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
case X86ISD::VPPERM:
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
- case X86ISD::VPERMIV3:
return true;
// 'Faux' Target Shuffles.
case ISD::AND:
@@ -5977,21 +5975,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
}
return false;
}
- case X86ISD::VPERMIV3: {
- assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- assert(N->getOperand(2).getValueType() == VT && "Unexpected value type");
- IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2);
- // Unlike most shuffle nodes, VPERMIV3's mask operand is the first one.
- Ops.push_back(N->getOperand(1));
- Ops.push_back(N->getOperand(2));
- SDValue MaskNode = N->getOperand(0);
- unsigned MaskEltSize = VT.getScalarSizeInBits();
- if (auto *C = getTargetConstantFromNode(MaskNode)) {
- DecodeVPERMV3Mask(C, MaskEltSize, Mask);
- break;
- }
- return false;
- }
default: llvm_unreachable("unknown target shuffle node");
}
@@ -20540,9 +20523,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
MVT VT = Op.getSimpleValueType();
- SDValue PassThru = SDValue();
// set PassThru element
+ SDValue PassThru;
if (IntrData->Type == VPERM_3OP_MASKZ)
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
else
@@ -20554,6 +20537,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Src2, Src1, Src3),
Mask, PassThru, Subtarget, DAG);
}
+ case VPERMI_3OP_MASK:{
+ // Src2 is the PassThru
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ MVT VT = Op.getSimpleValueType();
+
+ // set PassThru element
+ SDValue PassThru = DAG.getBitcast(VT, Src2);
+
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
+ dl, Op.getValueType(),
+ Src1, Src2, Src3),
+ Mask, PassThru, Subtarget, DAG);
+ }
case FMA_OP_MASK3:
case FMA_OP_MASKZ:
case FMA_OP_MASK: {
@@ -25873,7 +25872,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VPERMV: return "X86ISD::VPERMV";
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
- case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
@@ -38861,7 +38859,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERMI:
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
- case X86ISD::VPERMIV3:
case X86ISD::VPERMIL2:
case X86ISD::VPERMILPI:
case X86ISD::VPERMILPV:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 58d0c9d92af..dd6be5b8e48 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -428,10 +428,6 @@ namespace llvm {
// Res = VPERMV3 V0, MaskV, V1
VPERMV3,
- // 3-op Variable Permute overwriting the index (VPERMI2).
- // Res = VPERMIV3 V0, MaskV, V1
- VPERMIV3,
-
// Bitwise ternary logic.
VPTERNLOG,
// Fix Up Special Packed Float32/64 values.
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 5fecf0a0a13..3bf097bd79c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -334,6 +334,7 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
// Similar to AVX512_maskable_3src but in this case the input VT for the tied
// operand differs from the output VT. This requires a bitconvert on
// the preserved vector going into the vselect.
+// NOTE: The unmasked pattern is disabled.
multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
X86VectorVTInfo InVT,
dag Outs, dag NonTiedIns, string OpcodeStr,
@@ -343,7 +344,7 @@ multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
!con((ins InVT.RC:$src1), NonTiedIns),
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
- OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
(vselect InVT.KRCWM:$mask, RHS,
(bitconvert InVT.RC:$src1)),
vselect, "", IsCommutable>;
@@ -1719,17 +1720,19 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
-let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
+let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
+ hasSideEffects = 0 in {
defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3)), 1>,
+ (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
EVEX_4V, AVX5128IBase, Sched<[sched]>;
+ let mayLoad = 1 in
defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
+ (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
(_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
}
@@ -1738,13 +1741,14 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
- let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
+ hasSideEffects = 0, mayLoad = 1 in
defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
- (_.VT (X86VPermi2X IdxVT.RC:$src1,
- _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
+ (_.VT (X86VPermt2 _.RC:$src2,
+ IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
AVX5128IBase, EVEX_4V, EVEX_B,
Sched<[sched.Folded, ReadAfterLd]>;
}
@@ -1806,21 +1810,22 @@ multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
X86VectorVTInfo IdxVT,
X86VectorVTInfo CastVT> {
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
- (_.VT _.RC:$src2), _.RC:$src3),
+ (X86VPermt2 (_.VT _.RC:$src2),
+ (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
(!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
- _.RC:$src2, (_.LdFrag addr:$src3)),
+ (X86VPermt2 _.RC:$src2,
+ (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
+ (_.LdFrag addr:$src3)),
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
(!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
- _.RC:$src2,
- (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
+ (X86VPermt2 _.RC:$src2,
+ (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
(!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3)>;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index cbe37b64b12..5a8ca994235 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -417,13 +417,6 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
SDTCisSameSizeAs<0,2>,
SDTCisSameAs<0,3>]>, []>;
-def X86VPermi2X : SDNode<"X86ISD::VPERMIV3",
- SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
- SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
- SDTCisSameSizeAs<0,1>,
- SDTCisSameAs<0,2>,
- SDTCisSameAs<0,3>]>, []>;
-
def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index d5263767db1..3e3a62ec82e 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -31,7 +31,7 @@ enum IntrinsicType : uint16_t {
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3,
FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3,
IFMA_OP,
- VPERM_2OP, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
+ VPERM_2OP, VPERMI_3OP_MASK, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
@@ -1061,42 +1061,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERM_3OP_MASK,
- X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERMI_3OP_MASK,
+ X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index 74f7cebaa5e..aeeb5905ad8 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -1963,8 +1963,8 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1}
-; CHECK-NEXT: vpermi2d %zmm2, %zmm0, %zmm1
-; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
+; CHECK-NEXT: vpermt2d %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
; CHECK-NEXT: retq
%x2 = load <16 x i32>, <16 x i32>* %x2p
%res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
@@ -1979,8 +1979,8 @@ define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0,
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovapd %zmm1, %zmm3
-; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
+; CHECK-NEXT: vpermt2pd %zmm2, %zmm1, %zmm3
; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vaddpd %zmm3, %zmm1, %zmm0
; CHECK-NEXT: retq
@@ -1996,8 +1996,8 @@ define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0,
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovaps %zmm1, %zmm3
-; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vpermt2ps %zmm2, %zmm1, %zmm3
; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0
; CHECK-NEXT: retq
@@ -2013,8 +2013,8 @@ define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
-; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vpermt2q %zmm2, %zmm1, %zmm3
; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vpaddq %zmm3, %zmm1, %zmm0
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
index 334565a6ddf..337d72b69bf 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -1055,8 +1055,8 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
-; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
-; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
+; AVX512BW-NEXT: vpermt2w %zmm2, %zmm1, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm3, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
@@ -1064,8 +1064,8 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
-; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
-; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
+; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm1, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm3, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index b2cb157786d..02e69fea203 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -1872,8 +1872,8 @@ define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
-; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x75,0xda]
+; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
+; CHECK-NEXT: vpermt2w %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xda]
; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
; CHECK-NEXT: vpaddw %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -1889,8 +1889,8 @@ define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
-; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x75,0xda]
+; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
+; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xda]
; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
; CHECK-NEXT: vpaddw %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
diff --git a/llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll
index 6c8a129974f..26ac03fd98c 100644
--- a/llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll
@@ -51,13 +51,13 @@ define <64 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_512(<64 x i8> %x0, <64 x
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq %rdi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
-; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm3 {%k1}
-; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm3
+; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm4 {%k1} {z}
-; CHECK-NEXT: vpaddb %zmm1, %zmm4, %zmm0
-; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: vpaddb %zmm3, %zmm4, %zmm0
+; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2, i64 %x3)
diff --git a/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll
index 56769698b09..6bedb5d9069 100644
--- a/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll
@@ -95,8 +95,8 @@ define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
-; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x75,0xda]
+; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
+; CHECK-NEXT: vpermt2b %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x7d,0xda]
; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca]
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x75,0xe2]
@@ -117,13 +117,13 @@ define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
-; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x75,0xda]
-; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x75,0xca]
+; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
+; CHECK-NEXT: vpermt2b %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x7d,0xda]
+; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca]
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x75,0xe2]
-; CHECK-NEXT: vpaddb %ymm1, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc1]
-; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0]
+; CHECK-NEXT: vpaddb %ymm3, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
+; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
%res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2, i32 %x3)
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index 332c505fec8..b6531a907a4 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -602,8 +602,8 @@ define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0,
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9]
-; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x77,0xda]
+; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
+; CHECK-NEXT: vpermt2pd %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda]
; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
; CHECK-NEXT: vaddpd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -619,8 +619,8 @@ define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0,
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9]
-; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x77,0xda]
+; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
+; CHECK-NEXT: vpermt2pd %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda]
; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
; CHECK-NEXT: vaddpd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -636,8 +636,8 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9]
-; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x77,0xda]
+; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
+; CHECK-NEXT: vpermt2ps %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda]
; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
; CHECK-NEXT: vaddps %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -665,8 +665,8 @@ define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9]
-; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x77,0xda]
+; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
+; CHECK-NEXT: vpermt2ps %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda]
; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
; CHECK-NEXT: vaddps %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
OpenPOWER on IntegriCloud