diff options
author | Evandro Menezes <e.menezes@samsung.com> | 2018-01-04 21:43:12 +0000 |
---|---|---|
committer | Evandro Menezes <e.menezes@samsung.com> | 2018-01-04 21:43:12 +0000 |
commit | 6161a0b3b0ae3160c2317ad31f273973e1f889b0 (patch) | |
tree | 179e9f5bcd47c17606e5e267e6216b88a8444599 /llvm/test/CodeGen/AArch64/arm64-build-vector.ll | |
parent | e7c06423c17381fc2cad0c7fd5e0c9b2a3275c21 (diff) | |
download | bcm5719-llvm-6161a0b3b0ae3160c2317ad31f273973e1f889b0.tar.gz bcm5719-llvm-6161a0b3b0ae3160c2317ad31f273973e1f889b0.zip |
[AArch64] Improve code generation of vector build
Instead of using, for example, `dup v0.4s, wzr`, which transfers between
register files, use the more efficient `movi v0.4s, #0` instead.
Differential revision: https://reviews.llvm.org/D41515
llvm-svn: 321824
Diffstat (limited to 'llvm/test/CodeGen/AArch64/arm64-build-vector.ll')
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-build-vector.ll | 18 |
1 files changed, 0 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll index 9d324735049..68dea215c8c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll +++ b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll @@ -1,23 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s -; Check that building up a vector w/ only one non-zero lane initializes -; intelligently. -define void @one_lane(i32* nocapture %out_int, i32 %skip0) nounwind { -; CHECK-LABEL: one_lane: -; CHECK: dup.16b v[[REG:[0-9]+]], wzr -; CHECK-NEXT: mov.b v[[REG]][0], w1 -; v and q are aliases, and str is preferred against st.16b when possible -; rdar://11246289 -; CHECK: str q[[REG]], [x0] -; CHECK: ret - %conv = trunc i32 %skip0 to i8 - %vset_lane = insertelement <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, i8 %conv, i32 0 - %tmp = bitcast i32* %out_int to <4 x i32>* - %tmp1 = bitcast <16 x i8> %vset_lane to <4 x i32> - store <4 x i32> %tmp1, <4 x i32>* %tmp, align 16 - ret void -} - ; Check that building a vector from floats doesn't insert an unnecessary ; copy for lane zero. define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind { |