summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/InstCombine/vector-casts.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-10-09 21:26:01 +0000
committerSanjay Patel <spatel@rotateright.com>2018-10-09 21:26:01 +0000
commite9ca7ea3e5c0f3e155fc7b47cb9068a2f12cae6a (patch)
treed9811d19d68449dc71aa1e0cb83a0d45d3b73610 /llvm/test/Transforms/InstCombine/vector-casts.ll
parent5989281cf3af52fc07ad458297e70f559db02de7 (diff)
downloadbcm5719-llvm-e9ca7ea3e5c0f3e155fc7b47cb9068a2f12cae6a.tar.gz
bcm5719-llvm-e9ca7ea3e5c0f3e155fc7b47cb9068a2f12cae6a.zip
[InstCombine] reverse 'trunc X to <N x i1>' canonicalization
icmp ne (and X, 1), 0 --> trunc X to N x i1 Ideally, we'd do the same for scalars, but there will likely be regressions unless we add more trunc folds as we're doing here for vectors. The motivating vector case is from PR37549: https://bugs.llvm.org/show_bug.cgi?id=37549 define <4 x float> @bitwise_select(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) { %c = fcmp ole <4 x float> %x, %y %s = sext <4 x i1> %c to <4 x i32> %s1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1> %s2 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> %cond = or <4 x i32> %s1, %s2 %condtr = trunc <4 x i32> %cond to <4 x i1> %r = select <4 x i1> %condtr, <4 x float> %z, <4 x float> %w ret <4 x float> %r } Here's a sampling of the vector codegen for that case using mask+icmp (current behavior) vs. trunc (with this patch): AVX before: vcmpleps %xmm1, %xmm0, %xmm0 vpermilps $80, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,1,1] vpermilps $250, %xmm0, %xmm0 ## xmm0 = xmm0[2,2,3,3] vorps %xmm0, %xmm1, %xmm0 vandps LCPI0_0(%rip), %xmm0, %xmm0 vxorps %xmm1, %xmm1, %xmm1 vpcmpeqd %xmm1, %xmm0, %xmm0 vblendvps %xmm0, %xmm3, %xmm2, %xmm0 AVX after: vcmpleps %xmm1, %xmm0, %xmm0 vpermilps $80, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,1,1] vpermilps $250, %xmm0, %xmm0 ## xmm0 = xmm0[2,2,3,3] vorps %xmm0, %xmm1, %xmm0 vblendvps %xmm0, %xmm2, %xmm3, %xmm0 AVX512f before: vcmpleps %xmm1, %xmm0, %xmm0 vpermilps $80, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,1,1] vpermilps $250, %xmm0, %xmm0 ## xmm0 = xmm0[2,2,3,3] vorps %xmm0, %xmm1, %xmm0 vpbroadcastd LCPI0_0(%rip), %xmm1 ## xmm1 = [1,1,1,1] vptestnmd %zmm1, %zmm0, %k1 vblendmps %zmm3, %zmm2, %zmm0 {%k1} AVX512f after: vcmpleps %xmm1, %xmm0, %xmm0 vpermilps $80, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,1,1] vpermilps $250, %xmm0, %xmm0 ## xmm0 = xmm0[2,2,3,3] vorps %xmm0, %xmm1, %xmm0 vpslld $31, %xmm0, %xmm0 vptestmd %zmm0, %zmm0, %k1 vblendmps %zmm2, %zmm3, %zmm0 {%k1} AArch64 before: fcmge v0.4s, v1.4s, v0.4s zip1 v1.4s, v0.4s, v0.4s zip2 v0.4s, v0.4s, v0.4s orr v0.16b, v1.16b, v0.16b movi v1.4s, #1 and v0.16b, v0.16b, v1.16b cmeq v0.4s, v0.4s, #0 bsl v0.16b, v3.16b, v2.16b AArch64 after: fcmge v0.4s, v1.4s, v0.4s zip1 v1.4s, v0.4s, v0.4s zip2 v0.4s, v0.4s, v0.4s orr v0.16b, v1.16b, v0.16b bsl v0.16b, v2.16b, v3.16b PowerPC-le before: xvcmpgesp 34, 35, 34 vspltisw 0, 1 vmrglw 3, 2, 2 vmrghw 2, 2, 2 xxlor 0, 35, 34 xxlxor 35, 35, 35 xxland 34, 0, 32 vcmpequw 2, 2, 3 xxsel 34, 36, 37, 34 PowerPC-le after: xvcmpgesp 34, 35, 34 vmrglw 3, 2, 2 vmrghw 2, 2, 2 xxlor 0, 35, 34 xxsel 34, 37, 36, 0 Differential Revision: https://reviews.llvm.org/D52747 llvm-svn: 344082
Diffstat (limited to 'llvm/test/Transforms/InstCombine/vector-casts.ll')
-rw-r--r--llvm/test/Transforms/InstCombine/vector-casts.ll19
1 files changed, 7 insertions, 12 deletions
diff --git a/llvm/test/Transforms/InstCombine/vector-casts.ll b/llvm/test/Transforms/InstCombine/vector-casts.ll
index 6e0d66b8ed4..e0d6083a969 100644
--- a/llvm/test/Transforms/InstCombine/vector-casts.ll
+++ b/llvm/test/Transforms/InstCombine/vector-casts.ll
@@ -1,26 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
-; This turns into a&1 != 0
-; TODO: The bar for canonicalizing to something bigger than the minimal length IR is very high.
-; This pattern does not appear to meet that standard.
+; Can't get smaller than this.
define <2 x i1> @trunc(<2 x i64> %a) {
; CHECK-LABEL: @trunc(
-; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 1, i64 1>
-; CHECK-NEXT: [[T:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[T:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
; CHECK-NEXT: ret <2 x i1> [[T]]
;
%t = trunc <2 x i64> %a to <2 x i1>
ret <2 x i1> %t
}
-; TODO: This could be just 1 instruction (trunc).
+; This is trunc.
define <2 x i1> @and_cmp_is_trunc(<2 x i64> %a) {
; CHECK-LABEL: @and_cmp_is_trunc(
-; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[A:%.*]], <i64 1, i64 1>
-; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i64> [[T]], zeroinitializer
+; CHECK-NEXT: [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%t = and <2 x i64> %a, <i64 1, i64 1>
@@ -28,12 +24,11 @@ define <2 x i1> @and_cmp_is_trunc(<2 x i64> %a) {
ret <2 x i1> %r
}
-; TODO: This could be just 1 instruction (trunc).
+; This is trunc.
define <2 x i1> @and_cmp_is_trunc_even_with_undef_elt(<2 x i64> %a) {
; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elt(
-; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[A:%.*]], <i64 undef, i64 1>
-; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i64> [[T]], zeroinitializer
+; CHECK-NEXT: [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%t = and <2 x i64> %a, <i64 undef, i64 1>
@@ -41,7 +36,7 @@ define <2 x i1> @and_cmp_is_trunc_even_with_undef_elt(<2 x i64> %a) {
ret <2 x i1> %r
}
-; TODO: This could be just 1 instruction (trunc).
+; TODO: This could be just 1 instruction (trunc), but our undef matching is incomplete.
define <2 x i1> @and_cmp_is_trunc_even_with_undef_elts(<2 x i64> %a) {
; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elts(
OpenPOWER on IntegriCloud