summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorJuergen Ributzka <juergen@apple.com>2013-10-30 05:48:18 +0000
committerJuergen Ributzka <juergen@apple.com>2013-10-30 05:48:18 +0000
commit6ad05d6b95f3edacc5a7d62b3ea8cc9f6e3616b2 (patch)
tree678b1cdef1d467e391e2b233e964ec86d1cc7c0d /llvm/test/CodeGen
parentd3b4344af986619fee9ea01423691c9353d5b3c2 (diff)
downloadbcm5719-llvm-6ad05d6b95f3edacc5a7d62b3ea8cc9f6e3616b2.tar.gz
bcm5719-llvm-6ad05d6b95f3edacc5a7d62b3ea8cc9f6e3616b2.zip
SelectionDAG: Teach the legalizer to split SETCC if VSELECT needs splitting too.
The Type Legalizer recognizes that VSELECT needs to be split, because the type is to wide for the given target. The same does not always apply to SETCC, because less space is required to encode the result of a comparison. As a result VSELECT is split and SETCC is unrolled into scalar comparisons. This commit fixes the issue by checking for VSELECT-SETCC patterns in the DAG Combiner. If a matching pattern is found, then the result mask of SETCC is promoted to the expected vector mask type for the given target. This mask has usually the same size as the VSELECT return type (except for Intel KNL). Now the type legalizer will split both VSELECT and SETCC. This allows the following X86 DAG Combine code to sucessfully detect the MIN/MAX pattern. This fixes PR16695, PR17002, and <rdar://problem/14594431>. Reviewed by Nadav llvm-svn: 193676
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/vec_split.ll42
1 files changed, 42 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vec_split.ll b/llvm/test/CodeGen/X86/vec_split.ll
new file mode 100644
index 00000000000..f9e7c20ba4e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec_split.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
+; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
+; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
+
+define <16 x i16> @split16(<16 x i16> %a, <16 x i16> %b, <16 x i8> %__mask) {
+; SSE4-LABEL: split16:
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: ret
+; AVX1-LABEL: split16:
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: ret
+; AVX2-LABEL: split16:
+; AVX2: vpminuw
+; AVX2: ret
+ %1 = icmp ult <16 x i16> %a, %b
+ %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %2
+}
+
+define <32 x i16> @split32(<32 x i16> %a, <32 x i16> %b, <32 x i8> %__mask) {
+; SSE4-LABEL: split32:
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: ret
+; AVX1-LABEL: split32:
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: ret
+; AVX2-LABEL: split32:
+; AVX2: vpminuw
+; AVX2: vpminuw
+; AVX2: ret
+ %1 = icmp ult <32 x i16> %a, %b
+ %2 = select <32 x i1> %1, <32 x i16> %a, <32 x i16> %b
+ ret <32 x i16> %2
+}
OpenPOWER on IntegriCloud