summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp20
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h3
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td54
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td1
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h14
5 files changed, 83 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b2e08c28f48..d36bcd98e1d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -15084,6 +15084,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1,Src2),
Mask, PassThru, Subtarget, DAG);
}
+ case INTR_TYPE_2OP_MASK_RM: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue PassThru = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ // We specify 2 possible modes for intrinsics, with/without rounding modes.
+ // First, we check if the intrinsic have rounding mode (6 operands),
+ // if not, we set rounding mode to "current".
+ SDValue Rnd;
+ if (Op.getNumOperands() == 6)
+ Rnd = Op.getOperand(5);
+ else
+ Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ Src1, Src2, Rnd),
+ Mask, PassThru, Subtarget, DAG);
+ }
case INTR_TYPE_3OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
@@ -18490,9 +18507,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND";
case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND";
case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND";
+ case X86ISD::SCALEF: return "X86ISD::SCALEF";
case X86ISD::ADDS: return "X86ISD::ADDS";
case X86ISD::SUBS: return "X86ISD::SUBS";
- case X86ISD::AVG: return "X86ISD::AVG";
+ case X86ISD::AVG: return "X86ISD::AVG";
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index dd6aecdab7e..372121b39ad 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -211,7 +211,8 @@ namespace llvm {
// FP vector get exponent
FGETEXP_RND,
-
+ // FP Scale
+ SCALEF,
// Integer add/sub with unsigned saturation.
ADDUS,
SUBUS,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index d2197a20b12..79f0ceb1315 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -3436,7 +3436,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
- X86VectorVTInfo _, bit IsCommutable> {
+ X86VectorVTInfo _> {
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
"$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -3446,7 +3446,7 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRn
multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
- X86VectorVTInfo _, bit IsCommutable> {
+ X86VectorVTInfo _> {
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
@@ -3481,16 +3481,16 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
- defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>,
+ defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>,
+ defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
- defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>,
+ defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>,
+ defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
@@ -3513,6 +3513,48 @@ let Predicates = [HasDQI] in {
defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>;
}
+multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, EVEX_4V;
+ let mayLoad = 1 in {
+ defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, EVEX_4V;
+ defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>,
+ EVEX_4V, EVEX_B;
+ }//let mayLoad = 1
+}
+
+multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
+ avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
+ avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
+ EVEX_V128, EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f32x_info>,
+ EVEX_V256, EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v2f64x_info>,
+ EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f64x_info>,
+ EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+ }
+}
+defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD;
+
//===----------------------------------------------------------------------===//
// AVX-512 VPTESTM instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index f1fb9b1c6f4..072bc050aad 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -311,6 +311,7 @@ def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
+def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>;
def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index c037b7b3d82..d8c5dd918ac 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -21,7 +21,7 @@ enum IntrinsicType {
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
- INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK,
+ INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, VPERM_3OP_MASK,
VPERM_3OP_MASKZ,
INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
@@ -668,6 +668,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::RNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::RNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_pd_512, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_128, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
OpenPOWER on IntegriCloud