summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp12
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td38
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h4
3 files changed, 35 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 802988c1c62..2a11eddaf15 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3976,6 +3976,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::VPERMILPI:
+ case X86ISD::VPERMILPV:
case X86ISD::VPERM2X128:
case X86ISD::VPERMI:
case X86ISD::VPERMV:
@@ -5008,6 +5009,16 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
DecodeZeroMoveLowMask(VT, Mask);
IsUnary = true;
break;
+ case X86ISD::VPERMILPV: {
+ IsUnary = true;
+ SDValue MaskNode = N->getOperand(1);
+ if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
+ unsigned MaskEltSize = VT.getScalarSizeInBits();
+ DecodeVPERMILPMask(C, MaskEltSize, Mask);
+ break;
+ }
+ return false;
+ }
case X86ISD::PSHUFB: {
IsUnary = true;
SDValue MaskNode = N->getOperand(1);
@@ -29107,6 +29118,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::VPERMILPI:
+ case X86ISD::VPERMILPV:
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
case ISD::FMA: return combineFMA(N, DAG, Subtarget);
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 0822f1ff0a9..70bcc2b2242 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -8093,45 +8093,45 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop_f,
X86MemOperand x86memop_i, PatFrag i_frag,
- Intrinsic IntVar, ValueType vt> {
- def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V,
- Sched<[WriteFShuffle]>;
- def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop_i:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (IntVar RC:$src1,
- (bitconvert (i_frag addr:$src2))))]>, VEX_4V,
- Sched<[WriteFShuffleLd, ReadAfterLd]>;
-
+ ValueType f_vt, ValueType i_vt> {
let Predicates = [HasAVX, NoVLX] in {
+ def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V,
+ Sched<[WriteFShuffle]>;
+ def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop_i:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1,
+ (i_vt (bitconvert (i_frag addr:$src2))))))]>, VEX_4V,
+ Sched<[WriteFShuffleLd, ReadAfterLd]>;
+
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
+ [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
Sched<[WriteFShuffle]>;
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
(ins x86memop_f:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
- (vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
+ (f_vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
Sched<[WriteFShuffleLd]>;
}// Predicates = [HasAVX, NoVLX]
}
let ExeDomain = SSEPackedSingle in {
defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
- loadv2i64, int_x86_avx_vpermilvar_ps, v4f32>;
+ loadv2i64, v4f32, v4i32>;
defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- loadv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L;
+ loadv4i64, v8f32, v8i32>, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
- loadv2i64, int_x86_avx_vpermilvar_pd, v2f64>;
+ loadv2i64, v2f64, v2i64>;
defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
- loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
+ loadv4i64, v4f64, v4i64>, VEX_L;
}
let Predicates = [HasAVX, NoVLX] in {
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 2d6660e2605..65aea3dfe15 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -328,6 +328,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(avx_vpermilvar_pd, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
OpenPOWER on IntegriCloud