summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/ARM/vuzp.ll
diff options
context:
space:
mode:
authorEli Friedman <efriedma@codeaurora.org>2016-12-20 20:05:07 +0000
committerEli Friedman <efriedma@codeaurora.org>2016-12-20 20:05:07 +0000
commitd03df8145f3b84d51dcb0c37053bfb7dbb69669f (patch)
treeb06ef2528b322e69d62a317ed24cb3f8ea9a0bcc /llvm/test/CodeGen/ARM/vuzp.ll
parent9cc1e196034e5844bb11a4bc73bc6ff0b2f4c69e (diff)
downloadbcm5719-llvm-d03df8145f3b84d51dcb0c37053bfb7dbb69669f.tar.gz
bcm5719-llvm-d03df8145f3b84d51dcb0c37053bfb7dbb69669f.zip
[ARM] Implement isExtractSubvectorCheap.
See https://reviews.llvm.org/D6678 for the history of isExtractSubvectorCheap. Essentially the same considerations apply to ARM. This temporarily breaks the formation of vpadd/vpaddl in certain cases; AddCombineToVPADDL essentially assumes that we won't form VUZP shuffles. See https://reviews.llvm.org/D27779 for followup fix. Differential Revision: https://reviews.llvm.org/D27774 llvm-svn: 290198
Diffstat (limited to 'llvm/test/CodeGen/ARM/vuzp.ll')
-rw-r--r--llvm/test/CodeGen/ARM/vuzp.ll31
1 files changed, 25 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/ARM/vuzp.ll b/llvm/test/CodeGen/ARM/vuzp.ll
index 95a4ed7a915..a83a4df5490 100644
--- a/llvm/test/CodeGen/ARM/vuzp.ll
+++ b/llvm/test/CodeGen/ARM/vuzp.ll
@@ -7,14 +7,14 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
; CHECK-NEXT: vuzp.8 d17, d16
-; CHECK-NEXT: vadd.i8 d16, d17, d16
+; CHECK-NEXT: vmul.i8 d16, d17, d16
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
- %tmp5 = add <8 x i8> %tmp3, %tmp4
+ %tmp5 = mul <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
@@ -39,14 +39,14 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
; CHECK-NEXT: vuzp.16 d17, d16
-; CHECK-NEXT: vadd.i16 d16, d17, d16
+; CHECK-NEXT: vmul.i16 d16, d17, d16
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
- %tmp5 = add <4 x i16> %tmp3, %tmp4
+ %tmp5 = mul <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
@@ -207,14 +207,14 @@ define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-NEXT: vldr d16, [r1]
; CHECK-NEXT: vldr d17, [r0]
; CHECK-NEXT: vuzp.8 d17, d16
-; CHECK-NEXT: vadd.i8 d16, d17, d16
+; CHECK-NEXT: vmul.i8 d16, d17, d16
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
- %tmp5 = add <8 x i8> %tmp3, %tmp4
+ %tmp5 = mul <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
@@ -550,3 +550,22 @@ define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
%rv = select <10 x i1> %c, <10 x i8> %tr0, <10 x i8> %tr1
ret <10 x i8> %rv
}
+
+%struct.uint8x8x2_t = type { [2 x <8 x i8>] }
+define %struct.uint8x8x2_t @vuzp_extract_subvector(<16 x i8> %t) #0 {
+; CHECK-LABEL: vuzp_extract_subvector:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vorr d18, d17, d17
+; CHECK-NEXT: vuzp.8 d16, d18
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d18
+; CHECK-NEXT: mov pc, lr
+
+ %vuzp.i = shufflevector <16 x i8> %t, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %vuzp1.i = shufflevector <16 x i8> %t, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
+ %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
+ ret %struct.uint8x8x2_t %.fca.0.1.insert
+}
OpenPOWER on IntegriCloud