summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp20
-rw-r--r--llvm/test/CodeGen/X86/vector-trunc-ssat.ll12
2 files changed, 23 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e4f86127a06..57e50d30af8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34349,6 +34349,20 @@ static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) {
return SDValue();
}
+/// Detect a pattern of truncation with signed saturation.
+/// The types should allow to use VPMOVSS* instruction on AVX512.
+/// Return the source value to be truncated or SDValue() if the pattern was not
+/// matched.
+static SDValue detectAVX512SSatPattern(SDValue In, EVT VT,
+ const X86Subtarget &Subtarget,
+ const TargetLowering &TLI) {
+ if (!TLI.isTypeLegal(In.getValueType()))
+ return SDValue();
+ if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
+ return SDValue();
+ return detectSSatPattern(In, VT);
+}
+
/// Detect a pattern of truncation with saturation:
/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
/// The types should allow to use VPMOVUS* instruction on AVX512.
@@ -34988,6 +35002,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (SDValue Val =
+ detectAVX512SSatPattern(St->getValue(), St->getMemoryVT(), Subtarget,
+ TLI))
+ return EmitTruncSStore(true /* Signed saturation */, St->getChain(),
+ dl, Val, St->getBasePtr(),
+ St->getMemoryVT(), St->getMemOperand(), DAG);
+ if (SDValue Val =
detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget,
TLI))
return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
index 2ad9b8de43c..4ba6693ac52 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
@@ -2042,9 +2042,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
;
; AVX512-LABEL: trunc_ssat_v8i64_v8i8_store:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512-NEXT: vpmovqb %zmm0, (%rdi)
+; AVX512-NEXT: vpmovsqb %zmm0, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = icmp slt <8 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127>
@@ -2998,9 +2996,7 @@ define void @trunc_ssat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
;
; AVX512VL-LABEL: trunc_ssat_v8i32_v8i8_store:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512VL-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512VL-NEXT: vpmovdb %ymm0, (%rdi)
+; AVX512VL-NEXT: vpmovsdb %ymm0, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
@@ -3018,9 +3014,7 @@ define void @trunc_ssat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
;
; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i8_store:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi)
+; AVX512BWVL-NEXT: vpmovsdb %ymm0, (%rdi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%1 = icmp slt <8 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
OpenPOWER on IntegriCloud