summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-08-15 13:27:30 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-08-15 13:27:30 +0000
commit0750c84623bc91f2742451b82e8595ab1985081b (patch)
tree473ecbdc4a2e9dca96f032afe7d1543e7ef4105d /llvm/test
parent234a48270e5f8824e65ad83b33d91a918287463d (diff)
downloadbcm5719-llvm-0750c84623bc91f2742451b82e8595ab1985081b.tar.gz
bcm5719-llvm-0750c84623bc91f2742451b82e8595ab1985081b.zip
[DAGCombiner] Attempt to mask vectors before zero extension instead of after.
For cases where we TRUNCATE and then ZERO_EXTEND to a larger size (often from vector legalization), see if we can mask the source data and then ZERO_EXTEND (instead of after a ANY_EXTEND). This can help avoid having to generate a larger mask, and possibly applying it to several sub-vectors. (zext (truncate x)) -> (zext (and(x, m)) Includes a minor patch to SystemZ to better recognise 8/16-bit zero extension patterns from RISBG bit-extraction code. This is the first of a number of minor patches to help improve the conversion of byte masks to clear mask shuffles. Differential Revision: http://reviews.llvm.org/D11764 llvm-svn: 245160
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-aapcs.ll15
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-arith.ll3
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vector-ext.ll54
-rw-r--r--llvm/test/CodeGen/AArch64/bitfield.ll44
-rw-r--r--llvm/test/CodeGen/SystemZ/insert-05.ll4
-rw-r--r--llvm/test/CodeGen/X86/avx2-conversions.ll2
-rw-r--r--llvm/test/CodeGen/X86/vec_cast2.ll7
-rw-r--r--llvm/test/CodeGen/X86/vector-zext.ll46
8 files changed, 94 insertions, 81 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-aapcs.ll
index f345acf453d..5d25f66b30e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-aapcs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-aapcs.ll
@@ -27,12 +27,13 @@ define [2 x i64] @test_i64x2_align(i32, [2 x i64] %arg, i32 %after) {
; Check stack slots are 64-bit at all times.
define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
i32 %int, i64 %long) {
- ; Part of last store. Blasted scheduler.
-; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32]
-
%ext_bool = zext i1 %bool to i64
store volatile i64 %ext_bool, i64* @var64, align 8
; CHECK: ldrb w[[EXT:[0-9]+]], [sp]
+
+ ; Part of last store. Blasted scheduler.
+; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32]
+
; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1
; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64]
@@ -63,8 +64,8 @@ define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) {
%ext_bool = zext i1 %bool to i64
store volatile i64 %ext_bool, i64* @var64
-; CHECK: and [[EXT:x[0-9]+]], x0, #0x1
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: and w[[EXT:[0-9]+]], w0, #0x1
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
%ext_char = sext i8 %char to i64
store volatile i64 %ext_char, i64* @var64
@@ -73,8 +74,8 @@ define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) {
%ext_short = zext i16 %short to i64
store volatile i64 %ext_short, i64* @var64
-; CHECK: and [[EXT:x[0-9]+]], x2, #0xffff
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: and w[[EXT:[0-9]+]], w2, #0xffff
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
%ext_int = zext i32 %int to i64
store volatile i64 %ext_int, i64* @var64
diff --git a/llvm/test/CodeGen/AArch64/arm64-arith.ll b/llvm/test/CodeGen/AArch64/arm64-arith.ll
index f36e706b15d..d5d9a1b9817 100644
--- a/llvm/test/CodeGen/AArch64/arm64-arith.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-arith.ll
@@ -123,7 +123,8 @@ entry:
define i64 @t14(i16 %a, i64 %x) nounwind ssp {
entry:
; CHECK-LABEL: t14:
-; CHECK: add x0, x1, w0, uxth #3
+; CHECK: and w8, w0, #0xffff
+; CHECK: add x0, x1, w8, uxtw #3
; CHECK: ret
%c = zext i16 %a to i64
%d = shl i64 %c, 3
diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-ext.ll b/llvm/test/CodeGen/AArch64/arm64-vector-ext.ll
index 5bee1611e6c..994a9956cf7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vector-ext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vector-ext.ll
@@ -1,27 +1,27 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
-
-;CHECK: @func30
-;CHECK: ushll.4s v0, v0, #0
-;CHECK: movi.4s v1, #0x1
-;CHECK: and.16b v0, v0, v1
-;CHECK: str q0, [x0]
-;CHECK: ret
-
-%T0_30 = type <4 x i1>
-%T1_30 = type <4 x i32>
-define void @func30(%T0_30 %v0, %T1_30* %p1) {
- %r = zext %T0_30 %v0 to %T1_30
- store %T1_30 %r, %T1_30* %p1
- ret void
-}
-
-; Extend from v1i1 was crashing things (PR20791). Make sure we do something
-; sensible instead.
-define <1 x i32> @autogen_SD7918() {
-; CHECK-LABEL: autogen_SD7918
-; CHECK: movi d0, #0000000000000000
-; CHECK-NEXT: ret
- %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
- %ZE = zext <1 x i1> %I29 to <1 x i32>
- ret <1 x i32> %ZE
-}
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+;CHECK: @func30
+;CHECK: movi.4h v1, #0x1
+;CHECK: and.8b v0, v0, v1
+;CHECK: ushll.4s v0, v0, #0
+;CHECK: str q0, [x0]
+;CHECK: ret
+
+%T0_30 = type <4 x i1>
+%T1_30 = type <4 x i32>
+define void @func30(%T0_30 %v0, %T1_30* %p1) {
+ %r = zext %T0_30 %v0 to %T1_30
+ store %T1_30 %r, %T1_30* %p1
+ ret void
+}
+
+; Extend from v1i1 was crashing things (PR20791). Make sure we do something
+; sensible instead.
+define <1 x i32> @autogen_SD7918() {
+; CHECK-LABEL: autogen_SD7918
+; CHECK: movi d0, #0000000000000000
+; CHECK-NEXT: ret
+ %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
+ %ZE = zext <1 x i1> %I29 to <1 x i32>
+ ret <1 x i32> %ZE
+}
diff --git a/llvm/test/CodeGen/AArch64/bitfield.ll b/llvm/test/CodeGen/AArch64/bitfield.ll
index e1e4f62f662..5f19b6943b8 100644
--- a/llvm/test/CodeGen/AArch64/bitfield.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield.ll
@@ -3,51 +3,67 @@
@var32 = global i32 0
@var64 = global i64 0
-define void @test_extendb(i8 %var) {
-; CHECK-LABEL: test_extendb:
+define void @test_extendb32(i8 %var) {
+; CHECK-LABEL: test_extendb32:
%sxt32 = sext i8 %var to i32
store volatile i32 %sxt32, i32* @var32
; CHECK: sxtb {{w[0-9]+}}, {{w[0-9]+}}
- %sxt64 = sext i8 %var to i64
- store volatile i64 %sxt64, i64* @var64
-; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
-
; N.b. this doesn't actually produce a bitfield instruction at the
; moment, but it's still a good test to have and the semantics are
; correct.
%uxt32 = zext i8 %var to i32
store volatile i32 %uxt32, i32* @var32
; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
+ ret void
+}
+
+define void @test_extendb64(i8 %var) {
+; CHECK-LABEL: test_extendb64:
+
+ %sxt64 = sext i8 %var to i64
+ store volatile i64 %sxt64, i64* @var64
+; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
%uxt64 = zext i8 %var to i64
store volatile i64 %uxt64, i64* @var64
-; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
ret void
}
-define void @test_extendh(i16 %var) {
-; CHECK-LABEL: test_extendh:
+define void @test_extendh32(i16 %var) {
+; CHECK-LABEL: test_extendh32:
%sxt32 = sext i16 %var to i32
store volatile i32 %sxt32, i32* @var32
; CHECK: sxth {{w[0-9]+}}, {{w[0-9]+}}
- %sxt64 = sext i16 %var to i64
- store volatile i64 %sxt64, i64* @var64
-; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
-
; N.b. this doesn't actually produce a bitfield instruction at the
; moment, but it's still a good test to have and the semantics are
; correct.
%uxt32 = zext i16 %var to i32
store volatile i32 %uxt32, i32* @var32
; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
+ ret void
+}
+
+define void @test_extendh64(i16 %var) {
+; CHECK-LABEL: test_extendh64:
+
+ %sxt64 = sext i16 %var to i64
+ store volatile i64 %sxt64, i64* @var64
+; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
%uxt64 = zext i16 %var to i64
store volatile i64 %uxt64, i64* @var64
-; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
ret void
}
diff --git a/llvm/test/CodeGen/SystemZ/insert-05.ll b/llvm/test/CodeGen/SystemZ/insert-05.ll
index b76859a568f..1ea8a64e28e 100644
--- a/llvm/test/CodeGen/SystemZ/insert-05.ll
+++ b/llvm/test/CodeGen/SystemZ/insert-05.ll
@@ -214,8 +214,8 @@ define i64 @f18(i32 %a) {
; The truncation here isn't free; we need an explicit zero extension.
define i64 @f19(i32 %a) {
; CHECK-LABEL: f19:
-; CHECK: llgcr %r2, %r2
-; CHECK: oihl %r2, 1
+; CHECK: llcr %r2, %r2
+; CHECK: iihf %r2, 1
; CHECK: br %r14
%trunc = trunc i32 %a to i8
%ext = zext i8 %trunc to i64
diff --git a/llvm/test/CodeGen/X86/avx2-conversions.ll b/llvm/test/CodeGen/X86/avx2-conversions.ll
index f97a8ff69ee..402e2e04e62 100644
--- a/llvm/test/CodeGen/X86/avx2-conversions.ll
+++ b/llvm/test/CodeGen/X86/avx2-conversions.ll
@@ -61,8 +61,8 @@ define <8 x i32> @zext8(<8 x i16> %A) nounwind {
define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
; CHECK-LABEL: zext_8i8_8i32:
; CHECK: ## BB#0:
+; CHECK-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; CHECK-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; CHECK-NEXT: retq
%B = zext <8 x i8> %A to <8 x i32>
ret <8 x i32>%B
diff --git a/llvm/test/CodeGen/X86/vec_cast2.ll b/llvm/test/CodeGen/X86/vec_cast2.ll
index 7c039bc9850..311c7870b4b 100644
--- a/llvm/test/CodeGen/X86/vec_cast2.ll
+++ b/llvm/test/CodeGen/X86/vec_cast2.ll
@@ -46,11 +46,10 @@ define <4 x float> @foo1_4(<4 x i8> %src) {
define <8 x float> @foo2_8(<8 x i8> %src) {
; CHECK-LABEL: foo2_8:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7]
-; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpand LCPI2_0, %xmm0, %xmm0
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
; CHECK-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index 04ace30b819..752eef68fbc 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -910,50 +910,46 @@ entry:
define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
; SSE2-LABEL: zext_8i8_to_8i32:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSE2-NEXT: pand %xmm1, %xmm2
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: zext_8i8_to_8i32:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSSE3-NEXT: pand %xmm1, %xmm2
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: pand %xmm0, %xmm1
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: zext_8i8_to_8i32:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSE41-NEXT: pand %xmm1, %xmm2
-; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: zext_8i8_to_8i32:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7]
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: zext_8i8_to_8i32:
; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
entry:
%t = zext <8 x i8> %z to <8 x i32>
OpenPOWER on IntegriCloud