[X86 Codegen] Fixed a bug in unsigned saturation

PACKUSWB converts Signed word to Unsigned byte, (the same about DW) and it can't be used for umin+truncate pattern. AVX-512 VPMOVUS* instructions fit the pattern since they convert Unsigned to Unsigned. See https://llvm.org/bugs/show_bug.cgi?id=31773 Differential Revision: https://reviews.llvm.org/D29196 llvm-svn: 293431
author: Elena Demikhovsky <elena.demikhovsky@intel.com> 2017-01-29 13:18:30 +0000
committer: Elena Demikhovsky <elena.demikhovsky@intel.com> 2017-01-29 13:18:30 +0000
commit: 17fe27f1f2df3f5dbd14a193928f5ae7c63d03f1 (patch)
tree: 57db4333ecf39ae86032176bf673b6c7fa90a6fe
parent: faa03195837fe50290bbd9c55d26a2e02f4cb832 (diff)
download: bcm5719-llvm-17fe27f1f2df3f5dbd14a193928f5ae7c63d03f1.tar.gz
bcm5719-llvm-17fe27f1f2df3f5dbd14a193928f5ae7c63d03f1.zip
4 files changed, 27 insertions, 51 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1d0cefc3214..2de2d531603 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31382,21 +31382,6 @@ static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
   return false;
 }
 
-/// Return true if VPACK* instruction can be used for the given types
-/// and it is avalable on \p Subtarget.
-static bool
-isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
-  if (Subtarget.hasSSE2())
-    // v16i16 -> v16i8
-    if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8)
-      return true;
-  if (Subtarget.hasSSE41())
-    // v8i32 -> v8i16
-    if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16)
-      return true;
-  return false;
-}
-
 /// Detect a pattern of truncation with saturation:
 /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
 /// Return the source value to be truncated or SDValue() if the pattern was not
@@ -31437,16 +31422,9 @@ combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (!TLI.isTypeLegal(In.getValueType()) || !TLI.isTypeLegal(VT))
     return SDValue();
-  SDValue USatVal = detectUSatPattern(In, VT);
-  if (USatVal) {
+  if (auto USatVal = detectUSatPattern(In, VT))
     if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
       return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
-    if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) {
-      SDValue Lo, Hi;
-      std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL);
-      return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi);
-    }
-  }
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/X86/avx-trunc.ll b/llvm/test/CodeGen/X86/avx-trunc.ll
index c729b988cfb..1a9acd00777 100644
--- a/llvm/test/CodeGen/X86/avx-trunc.ll
+++ b/llvm/test/CodeGen/X86/avx-trunc.ll
@@ -40,28 +40,4 @@ define <16 x i8> @trunc_16_8(<16 x i16> %A) nounwind uwtable readnone ssp{
   ret <16 x i8> %B
 }
 
-define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
-; CHECK-LABEL: usat_trunc_wb_256:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
-  %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  %x6 = trunc <16 x i16> %x5 to <16 x i8>
-  ret <16 x i8> %x6
-}
 
-define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
-; CHECK-LABEL: usat_trunc_dw_256:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
-  %x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  %x6 = trunc <8 x i32> %x5 to <8 x i16>
-  ret <8 x i16> %x6
-}
diff --git a/llvm/test/CodeGen/X86/avx512-trunc.ll b/llvm/test/CodeGen/X86/avx512-trunc.ll
index 85e607c4be7..ac58024772b 100644
--- a/llvm/test/CodeGen/X86/avx512-trunc.ll
+++ b/llvm/test/CodeGen/X86/avx512-trunc.ll
@@ -505,8 +505,9 @@ define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
 define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
 ; KNL-LABEL: usat_trunc_wb_256_mem:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; KNL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    vpminuw {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    vmovdqu %xmm0, (%rdi)
 ; KNL-NEXT:    retq
 ;
@@ -524,8 +525,9 @@ define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
 define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
 ; KNL-LABEL: usat_trunc_wb_256:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; KNL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    vpminuw {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: usat_trunc_wb_256:
diff --git a/llvm/test/CodeGen/X86/pr31773.ll b/llvm/test/CodeGen/X86/pr31773.ll
new file mode 100644
index 00000000000..8722df3f4b5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr31773.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+
+; This matter of this test is ensuring that vpackus* is not used for umin+trunc combination, since vpackus* input is a signed number.
+define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
+; CHECK-LABEL: usat_trunc_wb_256:
+; CHECK-NOT:    vpackuswb %xmm1, %xmm0, %xmm0
+  %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %x6 = trunc <16 x i16> %x5 to <16 x i8>
+  ret <16 x i8> %x6
+}
+ 
+define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
+; CHECK-LABEL: usat_trunc_dw_256:
+; CHECK-NOT:    vpackusdw %xmm1, %xmm0, %xmm0
+  %x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+  %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+  %x6 = trunc <8 x i32> %x5 to <8 x i16>
+  ret <8 x i16> %x6
+}
author	Elena Demikhovsky <elena.demikhovsky@intel.com>	2017-01-29 13:18:30 +0000
committer	Elena Demikhovsky <elena.demikhovsky@intel.com>	2017-01-29 13:18:30 +0000
commit	17fe27f1f2df3f5dbd14a193928f5ae7c63d03f1 (patch)
tree	57db4333ecf39ae86032176bf673b6c7fa90a6fe
parent	faa03195837fe50290bbd9c55d26a2e02f4cb832 (diff)
download	bcm5719-llvm-17fe27f1f2df3f5dbd14a193928f5ae7c63d03f1.tar.gz bcm5719-llvm-17fe27f1f2df3f5dbd14a193928f5ae7c63d03f1.zip