diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 30 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 24 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 2 |
5 files changed, 35 insertions, 33 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e06e6fbcf52..59e16d0a094 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19316,7 +19316,8 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, // chunks, thus directly computes the pop count for v2i64 and v4i64. if (EltVT == MVT::i64) { SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); - V = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, V, Zeros); + MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); + V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros); return DAG.getBitcast(VT, V); } @@ -19332,9 +19333,10 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, // Do the horizontal sums into two v2i64s. Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); - Low = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, + MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); + Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, DAG.getBitcast(ByteVecVT, Low), Zeros); - High = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, + High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, DAG.getBitcast(ByteVecVT, High), Zeros); // Merge them together. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index f5d02ef3608..8411ce6a2c0 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7369,32 +7369,34 @@ defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, - string OpcodeStr, X86VectorVTInfo _src>{ + string OpcodeStr, X86VectorVTInfo _dst, + X86VectorVTInfo _src>{ def rr : AVX512BI<opc, MRMSrcReg, - (outs _src.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), + (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set _src.RC:$dst,(_src.VT - (OpNode _src.RC:$src1, _src.RC:$src2)))]>; + [(set _dst.RC:$dst,(_dst.VT + (OpNode (_src.VT _src.RC:$src1), + (_src.VT _src.RC:$src2))))]>; let mayLoad = 1 in def rm : AVX512BI<opc, MRMSrcMem, - (outs _src.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), + (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set _src.RC:$dst,(_src.VT - (OpNode _src.RC:$src1, - (_src.VT (bitconvert + [(set _dst.RC:$dst,(_dst.VT + (OpNode (_src.VT _src.RC:$src1), + (_src.VT (bitconvert (_src.LdFrag addr:$src2))))))]>; } multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, string OpcodeStr, Predicate prd> { let Predicates = [prd] in - defm Z512 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v64i8_info>, - EVEX_V512; + defm Z512 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v8i64_info, + v64i8_info>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v32i8x_info>, - EVEX_V256; - defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v16i8x_info>, - EVEX_V128; + defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v4i64x_info, + v32i8x_info>, EVEX_V256; + defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v2i64x_info, + v16i8x_info>, EVEX_V128; } } diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 68891fa18db..1a0a19ceadd 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -79,8 +79,8 @@ def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86psadbw : SDNode<"X86ISD::PSADBW", - SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>>; + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisSameAs<1,2>]>>; def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>, SDTCisInt<3>]>>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 14d40a7700d..a93240bd717 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4066,22 +4066,18 @@ defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w, int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>; defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, int_x86_avx2_pmadd_wd, SSE_PMADD, 1>; -defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, - int_x86_avx2_psad_bw, SSE_PMADD, 1>; - -let Predicates = [HasAVX2] in - def : Pat<(v32i8 (X86psadbw (v32i8 VR256:$src1), - (v32i8 VR256:$src2))), - (VPSADBWYrr VR256:$src2, VR256:$src1)>; let Predicates = [HasAVX] in - def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1), - (v16i8 VR128:$src2))), - (VPSADBWrr VR128:$src2, VR128:$src1)>; - -def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1), - (v16i8 VR128:$src2))), - (PSADBWrr VR128:$src2, VR128:$src1)>; +defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128, + loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, + VEX_4V; +let Predicates = [HasAVX2] in +defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256, + loadv4i64, i256mem, SSE_INTMUL_ITINS_P, 1, 0>, + VEX_4V, VEX_L; +let Constraints = "$src1 = $dst" in +defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128, + memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1>; let Predicates = [HasAVX] in defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 0b77d480bc0..4784d9c3758 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -284,6 +284,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0), X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0), X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0), + X86_INTRINSIC_DATA(avx2_psad_bw, INTR_TYPE_2OP, X86ISD::PSADBW, 0), X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(avx2_psign_b, INTR_TYPE_2OP, X86ISD::PSIGN, 0), X86_INTRINSIC_DATA(avx2_psign_d, INTR_TYPE_2OP, X86ISD::PSIGN, 0), @@ -1710,6 +1711,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0), X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0), X86_INTRINSIC_DATA(sse2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0), + X86_INTRINSIC_DATA(sse2_psad_bw, INTR_TYPE_2OP, X86ISD::PSADBW, 0), X86_INTRINSIC_DATA(sse2_pshuf_d, INTR_TYPE_2OP, X86ISD::PSHUFD, 0), X86_INTRINSIC_DATA(sse2_pshufh_w, INTR_TYPE_2OP, X86ISD::PSHUFHW, 0), X86_INTRINSIC_DATA(sse2_pshufl_w, INTR_TYPE_2OP, X86ISD::PSHUFLW, 0), |