diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-02-28 21:42:19 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-02-28 21:42:19 +0000 |
commit | 72b86586b0085f0a6488ada035d985edd0c7cdc2 (patch) | |
tree | 964e56d411078a2acc2bd38c050a15abed9e4848 | |
parent | 3fffe1f02d849e464f17bc41e1b2d2cb7a606883 (diff) | |
download | bcm5719-llvm-72b86586b0085f0a6488ada035d985edd0c7cdc2.tar.gz bcm5719-llvm-72b86586b0085f0a6488ada035d985edd0c7cdc2.zip |
[X86][AVX512] Improve support for signed saturation truncation stores
Matches what we already manage for unsigned saturation truncation stores
Differential Revision: https://reviews.llvm.org/D43629
llvm-svn: 326372
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc-ssat.ll | 12 |
2 files changed, 23 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e4f86127a06..57e50d30af8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34349,6 +34349,20 @@ static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) { return SDValue(); } +/// Detect a pattern of truncation with signed saturation. +/// The types should allow to use VPMOVSS* instruction on AVX512. +/// Return the source value to be truncated or SDValue() if the pattern was not +/// matched. +static SDValue detectAVX512SSatPattern(SDValue In, EVT VT, + const X86Subtarget &Subtarget, + const TargetLowering &TLI) { + if (!TLI.isTypeLegal(In.getValueType())) + return SDValue(); + if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) + return SDValue(); + return detectSSatPattern(In, VT); +} + /// Detect a pattern of truncation with saturation: /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). /// The types should allow to use VPMOVUS* instruction on AVX512. @@ -34988,6 +35002,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (SDValue Val = + detectAVX512SSatPattern(St->getValue(), St->getMemoryVT(), Subtarget, + TLI)) + return EmitTruncSStore(true /* Signed saturation */, St->getChain(), + dl, Val, St->getBasePtr(), + St->getMemoryVT(), St->getMemOperand(), DAG); + if (SDValue Val = detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget, TLI)) return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll index 2ad9b8de43c..4ba6693ac52 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll @@ -2042,9 +2042,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) { ; ; AVX512-LABEL: trunc_ssat_v8i64_v8i8_store: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqb %zmm0, (%rdi) +; AVX512-NEXT: vpmovsqb %zmm0, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = icmp slt <8 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127> @@ -2998,9 +2996,7 @@ define void @trunc_ssat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; ; AVX512VL-LABEL: trunc_ssat_v8i32_v8i8_store: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmovdb %ymm0, (%rdi) +; AVX512VL-NEXT: vpmovsdb %ymm0, (%rdi) ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; @@ -3018,9 +3014,7 @@ define void @trunc_ssat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; ; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i8_store: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi) +; AVX512BWVL-NEXT: vpmovsdb %ymm0, (%rdi) ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq %1 = icmp slt <8 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> |