diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 52 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc-ssat.ll | 44 |
2 files changed, 56 insertions, 40 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8f01210d666..5f08c9dd120 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34109,6 +34109,41 @@ static SDValue detectUSatPattern(SDValue In, EVT VT) { return SDValue(); } +/// Detect patterns of truncation with signed saturation: +/// (truncate (smin ((smax (x, signed_min_of_dest_type)), +/// signed_max_of_dest_type)) to dest_type) +/// or: +/// (truncate (smax ((smin (x, signed_max_of_dest_type)), +/// signed_min_of_dest_type)) to dest_type). +/// Return the source value to be truncated or SDValue() if the pattern was not +/// matched. +static SDValue detectSSatPattern(SDValue In, EVT VT) { + unsigned NumDstBits = VT.getScalarSizeInBits(); + unsigned NumSrcBits = In.getScalarValueSizeInBits(); + assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation"); + + auto MatchMinMax = [](SDValue V, unsigned Opcode, const APInt &Limit) { + APInt C; + if (V.getOpcode() == Opcode && + ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) && C == Limit) + return V.getOperand(0); + return SDValue(); + }; + + APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits); + APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits); + + if (SDValue SMin = MatchMinMax(In, ISD::SMIN, SignedMax)) + if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, SignedMin)) + return SMax; + + if (SDValue SMax = MatchMinMax(In, ISD::SMAX, SignedMin)) + if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, SignedMax)) + return SMin; + + return SDValue(); +} + /// Detect a pattern of truncation with saturation: /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). /// The types should allow to use VPMOVUS* instruction on AVX512. @@ -34121,15 +34156,18 @@ static SDValue detectAVX512USatPattern(SDValue In, EVT VT, return detectUSatPattern(In, VT); } -static SDValue combineTruncateWithUSat(SDValue In, EVT VT, const SDLoc &DL, - SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, + SelectionDAG &DAG, + const X86Subtarget &Subtarget) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isTypeLegal(In.getValueType()) || !TLI.isTypeLegal(VT)) return SDValue(); - if (auto USatVal = detectUSatPattern(In, VT)) - if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) + if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) { + if (auto SSatVal = detectSSatPattern(In, VT)) + return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); + if (auto USatVal = detectUSatPattern(In, VT)) return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); + } return SDValue(); } @@ -35393,8 +35431,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; - // Try to combine truncation with unsigned saturation. - if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget)) + // Try to combine truncation with signed/unsigned saturation. + if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget)) return Val; // The bitcast source is a direct mmx result. diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll index 50088001860..7cd448d2069 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll @@ -258,9 +258,7 @@ define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) { ; ; AVX512VL-LABEL: trunc_ssat_v4i64_v4i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmaxsq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovsqd %ymm0, %xmm0 ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; @@ -278,9 +276,7 @@ define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) { ; ; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i32: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmaxsq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovsqd %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq %1 = icmp slt <4 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> @@ -705,9 +701,7 @@ define <8 x i32> @trunc_ssat_v8i64_v8i32(<8 x i64> %a0) { ; ; AVX512-LABEL: trunc_ssat_v8i64_v8i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: vpmovsqd %zmm0, %ymm0 ; AVX512-NEXT: retq %1 = icmp slt <8 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> @@ -1172,9 +1166,7 @@ define <8 x i16> @trunc_ssat_v8i64_v8i16(<8 x i64> %a0) { ; ; AVX512-LABEL: trunc_ssat_v8i64_v8i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = icmp slt <8 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767> @@ -1302,9 +1294,7 @@ define <8 x i16> @trunc_ssat_v8i32_v8i16(<8 x i32> %a0) { ; ; AVX512VL-LABEL: trunc_ssat_v8i32_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovsdw %ymm0, %xmm0 ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; @@ -1321,9 +1311,7 @@ define <8 x i16> @trunc_ssat_v8i32_v8i16(<8 x i32> %a0) { ; ; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i16: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovsdw %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq %1 = icmp slt <8 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> @@ -1511,9 +1499,7 @@ define <16 x i16> @trunc_ssat_v16i32_v16i16(<16 x i32> %a0) { ; ; AVX512-LABEL: trunc_ssat_v16i32_v16i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512-NEXT: vpmovsdw %zmm0, %ymm0 ; AVX512-NEXT: retq %1 = icmp slt <16 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> @@ -3231,9 +3217,7 @@ define <16 x i8> @trunc_ssat_v16i32_v16i8(<16 x i32> %a0) { ; ; AVX512-LABEL: trunc_ssat_v16i32_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpmovsdb %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = icmp slt <16 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> @@ -3344,9 +3328,7 @@ define <16 x i8> @trunc_ssat_v16i16_v16i8(<16 x i16> %a0) { ; ; AVX512BWVL-LABEL: trunc_ssat_v16i16_v16i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovswb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq %1 = icmp slt <16 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> @@ -3500,16 +3482,12 @@ define <32 x i8> @trunc_ssat_v32i16_v32i8(<32 x i16> %a0) { ; ; AVX512BW-LABEL: trunc_ssat_v32i16_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vpmaxsw {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: vpmovswb %zmm0, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_ssat_v32i16_v32i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BWVL-NEXT: vpmaxsw {{.*}}(%rip), %zmm0, %zmm0 -; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BWVL-NEXT: vpmovswb %zmm0, %ymm0 ; AVX512BWVL-NEXT: retq %1 = icmp slt <32 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> |