diff options
| author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2017-01-29 13:18:30 +0000 |
|---|---|---|
| committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2017-01-29 13:18:30 +0000 |
| commit | 17fe27f1f2df3f5dbd14a193928f5ae7c63d03f1 (patch) | |
| tree | 57db4333ecf39ae86032176bf673b6c7fa90a6fe | |
| parent | faa03195837fe50290bbd9c55d26a2e02f4cb832 (diff) | |
| download | bcm5719-llvm-17fe27f1f2df3f5dbd14a193928f5ae7c63d03f1.tar.gz bcm5719-llvm-17fe27f1f2df3f5dbd14a193928f5ae7c63d03f1.zip | |
[X86 Codegen] Fixed a bug in unsigned saturation
PACKUSWB converts Signed word to Unsigned byte, (the same about DW) and it can't be used for umin+truncate pattern.
AVX-512 VPMOVUS* instructions fit the pattern since they convert Unsigned to Unsigned.
See https://llvm.org/bugs/show_bug.cgi?id=31773
Differential Revision: https://reviews.llvm.org/D29196
llvm-svn: 293431
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-trunc.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-trunc.ll | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr31773.ll | 20 |
4 files changed, 27 insertions, 51 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1d0cefc3214..2de2d531603 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31382,21 +31382,6 @@ static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT, return false; } -/// Return true if VPACK* instruction can be used for the given types -/// and it is avalable on \p Subtarget. -static bool -isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) { - if (Subtarget.hasSSE2()) - // v16i16 -> v16i8 - if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8) - return true; - if (Subtarget.hasSSE41()) - // v8i32 -> v8i16 - if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16) - return true; - return false; -} - /// Detect a pattern of truncation with saturation: /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). /// Return the source value to be truncated or SDValue() if the pattern was not @@ -31437,16 +31422,9 @@ combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isTypeLegal(In.getValueType()) || !TLI.isTypeLegal(VT)) return SDValue(); - SDValue USatVal = detectUSatPattern(In, VT); - if (USatVal) { + if (auto USatVal = detectUSatPattern(In, VT)) if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); - if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) { - SDValue Lo, Hi; - std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL); - return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi); - } - } return SDValue(); } diff --git a/llvm/test/CodeGen/X86/avx-trunc.ll b/llvm/test/CodeGen/X86/avx-trunc.ll index c729b988cfb..1a9acd00777 100644 --- a/llvm/test/CodeGen/X86/avx-trunc.ll +++ b/llvm/test/CodeGen/X86/avx-trunc.ll @@ -40,28 +40,4 @@ define <16 x i8> @trunc_16_8(<16 x i16> %A) nounwind uwtable readnone ssp{ ret <16 x i8> %B } -define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) { -; CHECK-LABEL: usat_trunc_wb_256: -; CHECK: # BB#0: -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> - %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> - %x6 = trunc <16 x i16> %x5 to <16 x i8> - ret <16 x i8> %x6 -} -define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) { -; CHECK-LABEL: usat_trunc_dw_256: -; CHECK: # BB#0: -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - %x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> - %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> - %x6 = trunc <8 x i32> %x5 to <8 x i16> - ret <8 x i16> %x6 -} diff --git a/llvm/test/CodeGen/X86/avx512-trunc.ll b/llvm/test/CodeGen/X86/avx512-trunc.ll index 85e607c4be7..ac58024772b 100644 --- a/llvm/test/CodeGen/X86/avx512-trunc.ll +++ b/llvm/test/CodeGen/X86/avx512-trunc.ll @@ -505,8 +505,9 @@ define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 { define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) { ; KNL-LABEL: usat_trunc_wb_256_mem: ; KNL: ## BB#0: -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 +; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vmovdqu %xmm0, (%rdi) ; KNL-NEXT: retq ; @@ -524,8 +525,9 @@ define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) { define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) { ; KNL-LABEL: usat_trunc_wb_256: ; KNL: ## BB#0: -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 +; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: usat_trunc_wb_256: diff --git a/llvm/test/CodeGen/X86/pr31773.ll b/llvm/test/CodeGen/X86/pr31773.ll new file mode 100644 index 00000000000..8722df3f4b5 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr31773.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s + +; This matter of this test is ensuring that vpackus* is not used for umin+trunc combination, since vpackus* input is a signed number. +define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) { +; CHECK-LABEL: usat_trunc_wb_256: +; CHECK-NOT: vpackuswb %xmm1, %xmm0, %xmm0 + %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> + %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> + %x6 = trunc <16 x i16> %x5 to <16 x i8> + ret <16 x i8> %x6 +} + +define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) { +; CHECK-LABEL: usat_trunc_dw_256: +; CHECK-NOT: vpackusdw %xmm1, %xmm0, %xmm0 + %x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> + %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> + %x6 = trunc <8 x i32> %x5 to <8 x i16> + ret <8 x i16> %x6 +} |

