summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/ARM
diff options
context:
space:
mode:
authorQuentin Colombet <qcolombet@apple.com>2015-12-04 01:53:14 +0000
committerQuentin Colombet <qcolombet@apple.com>2015-12-04 01:53:14 +0000
commit901f036353d328ad89969eb84eaa83aaefe49eed (patch)
tree405cc9966604fc21fd33b0ad2e8cf13142257b87 /llvm/test/CodeGen/ARM
parent97d0ffbe0647c4ec49ed4c1e58c9067963f57f95 (diff)
downloadbcm5719-llvm-901f036353d328ad89969eb84eaa83aaefe49eed.tar.gz
bcm5719-llvm-901f036353d328ad89969eb84eaa83aaefe49eed.zip
[ARM] When a bitcast is about to be turned into a VMOVDRR, try to combine it
with its source instead of forcing the values on GPRs. This improves the lowering of vector code when such bitcasts happen in the middle of vector computations. rdar://problem/23691584 llvm-svn: 254684
Diffstat (limited to 'llvm/test/CodeGen/ARM')
-rw-r--r--llvm/test/CodeGen/ARM/combine-vmovdrr.ll72
1 files changed, 72 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/combine-vmovdrr.ll b/llvm/test/CodeGen/ARM/combine-vmovdrr.ll
new file mode 100644
index 00000000000..358f7e3a983
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/combine-vmovdrr.ll
@@ -0,0 +1,72 @@
+; RUN: llc %s -o - | FileCheck %s
+
+target triple = "thumbv7s-apple-ios"
+
+declare <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %shuffle.i.i307, <8 x i8> %shuffle.i27.i308, <8 x i8> %vtbl2.i25.i)
+
+; Check that we get the motivating example:
+; The bitcasts force the values to go through the GPRs, whereas
+; they are defined on VPRs and used on VPRs.
+;
+; CHECK-LABEL: motivatingExample:
+; CHECK: vldr [[ARG2_VAL:d[0-9]+]], [r1]
+; CHECK-NEXT: vld1.32 {[[ARG1_VALlo:d[0-9]+]], [[ARG1_VALhi:d[0-9]+]]}, [r0]
+; CHECK-NEXT: vtbl.8 [[RES:d[0-9]+]], {[[ARG1_VALlo]], [[ARG1_VALhi]]}, [[ARG2_VAL]]
+; CHECK-NEXT: vstr [[RES]], [r1]
+; CHECK-NEXT: bx lr
+define void @motivatingExample(<2 x i64>* %addr, <8 x i8>* %addr2) {
+ %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr
+ %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2
+ %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0
+ %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
+ %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
+ %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
+ %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
+ store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2
+ ret void
+}
+
+; Check that we do not perform the transformation for dynamic index.
+; CHECK-LABEL: dynamicIndex:
+; CHECK-NOT: mul
+; CHECK: pop
+define void @dynamicIndex(<2 x i64>* %addr, <8 x i8>* %addr2, i32 %index) {
+ %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr
+ %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2
+ %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 %index
+ %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
+ %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
+ %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
+ %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
+ store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2
+ ret void
+}
+
+; Check that we do not perform the transformation when there are several uses
+; of the result of the bitcast.
+; CHECK-LABEL: severalUses:
+; ARG1_VALlo is hard coded because we need to access the high part of d0,
+; i.e., s1, and we can't express that with filecheck.
+; CHECK: vld1.32 {[[ARG1_VALlo:d0]], [[ARG1_VALhi:d[0-9]+]]}, [r0]
+; CHECK-NEXT: vldr [[ARG2_VAL:d[0-9]+]], [r1]
+; s1 is actually 2 * ARG1_VALlo + 1, but we cannot express that with filecheck.
+; CHECK-NEXT: vmov [[REThi:r[0-9]+]], s1
+; We build the return value here. s0 is 2 * ARG1_VALlo.
+; CHECK-NEXT: vmov r0, s0
+; This copy is correct but actually useless. We should be able to clean it up.
+; CHECK-NEXT: vmov [[ARG1_VALloCPY:d[0-9]+]], r0, [[REThi]]
+; CHECK-NEXT: vtbl.8 [[RES:d[0-9]+]], {[[ARG1_VALloCPY]], [[ARG1_VALhi]]}, [[ARG2_VAL]]
+; CHECK-NEXT: vstr [[RES]], [r1]
+; CHECK-NEXT: mov r1, [[REThi]]
+; CHECK-NEXT: bx lr
+define i64 @severalUses(<2 x i64>* %addr, <8 x i8>* %addr2) {
+ %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr
+ %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2
+ %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0
+ %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
+ %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
+ %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
+ %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
+ store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2
+ ret i64 %shuffle.i.extract.i310
+}
OpenPOWER on IntegriCloud