summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-03-18 17:12:59 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-03-18 17:12:59 +0000
commit28f46d9f390123b822f82403649284d6f61e6153 (patch)
tree73b4df9d3e0b6cfdcd220a40cbed1f62889d9b7f /llvm/test/CodeGen/X86
parented39e7cfebba4e92839877422c49db93a50e3a32 (diff)
downloadbcm5719-llvm-28f46d9f390123b822f82403649284d6f61e6153.tar.gz
bcm5719-llvm-28f46d9f390123b822f82403649284d6f61e6153.zip
[DAGCombiner] teach how to simplify xor/and/or nodes according to the following rules:
1) (AND (shuf (A, C, Mask), shuf (B, C, Mask)) -> shuf (AND (A, B), C, Mask) 2) (OR (shuf (A, C, Mask), shuf (B, C, Mask)) -> shuf (OR (A, B), C, Mask) 3) (XOR (shuf (A, C, Mask), shuf (B, C, Mask)) -> shuf (XOR (A, B), V_0, Mask) 4) (AND (shuf (C, A, Mask), shuf (C, B, Mask)) -> shuf (C, AND (A, B), Mask) 5) (OR (shuf (C, A, Mask), shuf (C, B, Mask)) -> shuf (C, OR (A, B), Mask) 6) (XOR (shuf (C, A, Mask), shuf (C, B, Mask)) -> shuf (V_0, XOR (A, B), Mask) llvm-svn: 204160
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/combine-or.ll2
-rw-r--r--llvm/test/CodeGen/X86/combine-vec-shuffle.ll253
2 files changed, 255 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll
index 60b6d756165..c1ce53334ec 100644
--- a/llvm/test/CodeGen/X86/combine-or.ll
+++ b/llvm/test/CodeGen/X86/combine-or.ll
@@ -251,6 +251,7 @@ define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test20
; CHECK-NOT: xorps
; CHECK: orps
+; CHECK-NEXT: movq
; CHECK-NEXT: ret
@@ -262,6 +263,7 @@ define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
}
; CHECK-LABEL: test21
; CHECK: por
+; CHECK-NEXT: pslldq
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/X86/combine-vec-shuffle.ll b/llvm/test/CodeGen/X86/combine-vec-shuffle.ll
new file mode 100644
index 00000000000..9e6ab892713
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-vec-shuffle.ll
@@ -0,0 +1,253 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
+
+; Verify that the DAGCombiner correctly folds according to the following rules:
+
+; fold (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
+; fold (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
+; fold (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
+
+; fold (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
+; fold (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
+; fold (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
+
+
+
+define <4 x i32> @test1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
+ %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
+ %and = and <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %and
+}
+; CHECK-LABEL: test1
+; CHECK-NOT: pshufd
+; CHECK: pand
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
+ %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
+ %or = or <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %or
+}
+; CHECK-LABEL: test2
+; CHECK-NOT: pshufd
+; CHECK: por
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
+ %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
+ %xor = xor <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %xor
+}
+; CHECK-LABEL: test3
+; CHECK-NOT: pshufd
+; CHECK: pxor
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
+ %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
+ %and = and <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %and
+}
+; CHECK-LABEL: test4
+; CHECK-NOT: pshufd
+; CHECK: pand
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
+ %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
+ %or = or <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %or
+}
+; CHECK-LABEL: test5
+; CHECK-NOT: pshufd
+; CHECK: por
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
+ %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
+ %xor = xor <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %xor
+}
+; CHECK-LABEL: test6
+; CHECK-NOT: pshufd
+; CHECK: pxor
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: ret
+
+
+; Verify that DAGCombiner moves the shuffle after the xor/and/or even if shuffles
+; are not performing a swizzle operations.
+
+define <4 x i32> @test1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+ %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+ %and = and <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %and
+}
+; CHECK-LABEL: test1b
+; CHECK-NOT: blendps
+; CHECK: andps
+; CHECK-NEXT: blendps
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+ %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+ %or = or <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %or
+}
+; CHECK-LABEL: test2b
+; CHECK-NOT: blendps
+; CHECK: orps
+; CHECK-NEXT: blendps
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test3b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+ %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+ %xor = xor <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %xor
+}
+; CHECK-LABEL: test3b
+; CHECK-NOT: blendps
+; CHECK: xorps
+; CHECK-NEXT: xorps
+; CHECK-NEXT: blendps
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test4b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+ %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+ %and = and <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %and
+}
+; CHECK-LABEL: test4b
+; CHECK-NOT: blendps
+; CHECK: andps
+; CHECK-NEXT: blendps
+; CHECK: ret
+
+
+define <4 x i32> @test5b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+ %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+ %or = or <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %or
+}
+; CHECK-LABEL: test5b
+; CHECK-NOT: blendps
+; CHECK: orps
+; CHECK-NEXT: blendps
+; CHECK: ret
+
+
+define <4 x i32> @test6b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+ %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+ %xor = xor <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %xor
+}
+; CHECK-LABEL: test6b
+; CHECK-NOT: blendps
+; CHECK: xorps
+; CHECK-NEXT: xorps
+; CHECK-NEXT: blendps
+; CHECK: ret
+
+define <4 x i32> @test1c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+ %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+ %and = and <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %and
+}
+; CHECK-LABEL: test1c
+; CHECK-NOT: shufps
+; CHECK: andps
+; CHECK-NEXT: shufps
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+ %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+ %or = or <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %or
+}
+; CHECK-LABEL: test2c
+; CHECK-NOT: shufps
+; CHECK: orps
+; CHECK-NEXT: shufps
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+ %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+ %xor = xor <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %xor
+}
+; CHECK-LABEL: test3c
+; CHECK-NOT: shufps
+; CHECK: xorps
+; CHECK-NEXT: xorps
+; CHECK-NEXT: shufps
+; CHECK-NEXT: ret
+
+
+define <4 x i32> @test4c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+ %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+ %and = and <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %and
+}
+; CHECK-LABEL: test4c
+; CHECK-NOT: shufps
+; CHECK: andps
+; CHECK-NEXT: shufps
+; CHECK: ret
+
+
+define <4 x i32> @test5c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+ %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+ %or = or <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %or
+}
+; CHECK-LABEL: test5c
+; CHECK-NOT: shufps
+; CHECK: orps
+; CHECK-NEXT: shufps
+; CHECK: ret
+
+
+define <4 x i32> @test6c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+ %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+ %xor = xor <4 x i32> %shuf1, %shuf2
+ ret <4 x i32> %xor
+}
+; CHECK-LABEL: test6c
+; CHECK-NOT: shufps
+; CHECK: xorps
+; CHECK-NEXT: xorps
+; CHECK-NEXT: shufps
+; CHECK: ret
+
OpenPOWER on IntegriCloud