summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
authorIgor Breger <igor.breger@intel.com>2015-08-31 11:14:02 +0000
committerIgor Breger <igor.breger@intel.com>2015-08-31 11:14:02 +0000
commit2ae0fe3ac39340713546b6510701749e70a9097c (patch)
tree2f8a9c3e9d9f74f49e3080f6e3c128108133fb00 /llvm/lib/Target/X86
parent9f3d55cf3dfec5f2353ac91e32ba9312b1a6fc70 (diff)
downloadbcm5719-llvm-2ae0fe3ac39340713546b6510701749e70a9097c.tar.gz
bcm5719-llvm-2ae0fe3ac39340713546b6510701749e70a9097c.zip
AVX512: Implemented encoding and intrinsics for vpalignr
Added tests for intrinsics and encoding. Differential Revision: http://reviews.llvm.org/D12270 llvm-svn: 246428
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp6
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td76
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td28
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h16
4 files changed, 92 insertions, 34 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f9f79955828..1f591e0b51d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6902,7 +6902,7 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
Hi = DAG.getBitcast(AlignVT, Hi);
return DAG.getBitcast(
- VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo,
+ VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Lo, Hi,
DAG.getConstant(Rotation * Scale, DL, MVT::i8)));
}
@@ -15695,12 +15695,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1, Src2, Imm, Rnd),
Mask, PassThru, Subtarget, DAG);
}
+ case INTR_TYPE_3OP_IMM8_MASK:
case INTR_TYPE_3OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue PassThru = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
+
+ if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
+ Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index c0614ac03ab..b82818a2cb5 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -6443,22 +6443,33 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
+multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
+
+ defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
+ (SrcInfo.VT SrcInfo.RC:$src2),
+ (i8 imm:$src3)))>;
+ let mayLoad = 1 in
+ defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
+ (SrcInfo.VT (bitconvert
+ (SrcInfo.LdFrag addr:$src2))),
+ (i8 imm:$src3)))>;
+}
+
+//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
+// op(reg_vec2,mem_vec,imm)
// op(reg_vec2,broadcast(eltVt),imm)
multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _>{
- defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
- OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- (i8 imm:$src3))>;
- let mayLoad = 1 in {
- defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
- OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2))),
- (i8 imm:$src3))>;
+ X86VectorVTInfo _>:
+ avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
+
+ let mayLoad = 1 in
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
@@ -6466,7 +6477,6 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B;
- }
}
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
@@ -6542,6 +6552,20 @@ multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
}
}
+multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
+ AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{
+ let Predicates = [HasBWI] in {
+ defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
+ SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
+ }
+ let Predicates = [HasBWI, HasVLX] in {
+ defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
+ SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
+ defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info256,
+ SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
+ }
+}
+
multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
bits<8> opc, SDNode OpNode>{
let Predicates = [HasAVX512] in {
@@ -6665,6 +6689,28 @@ defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>,
defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>,
EVEX_CD8<64, CD8VF>, VEX_W;
+multiclass avx512_vpalign_lowering<X86VectorVTInfo _ , list<Predicate> p>{
+ let Predicates = p in
+ def NAME#_.VTName#rri:
+ Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
+ (!cast<Instruction>(NAME#_.ZSuffix#rri)
+ _.RC:$src1, _.RC:$src2, imm:$imm)>;
+}
+
+multiclass avx512_vpalign_lowering_common<AVX512VLVectorVTInfo _>:
+ avx512_vpalign_lowering<_.info512, [HasBWI]>,
+ avx512_vpalign_lowering<_.info128, [HasBWI, HasVLX]>,
+ avx512_vpalign_lowering<_.info256, [HasBWI, HasVLX]>;
+
+defm VPALIGN: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
+ avx512vl_i8_info, avx512vl_i8_info>,
+ avx512_vpalign_lowering_common<avx512vl_i16_info>,
+ avx512_vpalign_lowering_common<avx512vl_i32_info>,
+ avx512_vpalign_lowering_common<avx512vl_f32_info>,
+ avx512_vpalign_lowering_common<avx512vl_i64_info>,
+ avx512_vpalign_lowering_common<avx512vl_f64_info>,
+ EVEX_CD8<8, CD8VF>;
+
multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 3e072bc4bbc..0342ac2d48f 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5799,37 +5799,37 @@ let Predicates = [HasAVX2] in
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
defm PALIGN : ssse3_palignr<"palignr">;
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+ (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+ (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+ (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+ (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
}
let Predicates = [UseSSSE3] in {
def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
}
//===---------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index c065f3d86c5..ed516294139 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -23,7 +23,8 @@ enum IntrinsicType {
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
- INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
+ INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
+ FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
VPERM_3OP_MASKZ,
INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
@@ -753,6 +754,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_palignr_128, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::PALIGNR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_palignr_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::PALIGNR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_palignr_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::PALIGNR, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
@@ -1199,9 +1206,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::UNPCKL, 0),
X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_512, INTR_TYPE_2OP_MASK,
X86ISD::UNPCKL, 0),
- X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
- X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
-
+ X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
OpenPOWER on IntegriCloud