From 34e9931bec0bd85862fb6ed2e14b43cb3e43fe4c Mon Sep 17 00:00:00 2001 From: James Molloy Date: Thu, 6 Sep 2012 09:16:01 +0000 Subject: Optimize codegen for VSETLNi{8,16,32} operating on Q registers. Degenerate to a VSETLN on D registers, instead of an (INSERT_SUBREG (VSETLN (EXTRACT_SUBREG ))) sequence to help the register coalescer. llvm-svn: 163298 --- llvm/test/CodeGen/ARM/integer_insertelement.ll | 35 ++++++++++++++++++++++++++ llvm/test/CodeGen/ARM/vget_lane.ll | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/ARM/integer_insertelement.ll (limited to 'llvm/test/CodeGen') diff --git a/llvm/test/CodeGen/ARM/integer_insertelement.ll b/llvm/test/CodeGen/ARM/integer_insertelement.ll new file mode 100644 index 00000000000..4f2d7e3f73e --- /dev/null +++ b/llvm/test/CodeGen/ARM/integer_insertelement.ll @@ -0,0 +1,35 @@ +; RUN: llc %s -o - -march=arm -mattr=+neon | FileCheck %s + +; This test checks that when inserting one (integer) element into a vector, +; the vector is not spuriously copied. "vorr dX, dY, dY" is the way of moving +; one DPR to another that we check for. + +; CHECK: @f +; CHECK-NOT: vorr d +; CHECK: vmov s +; CHECK-NOT: vorr d +; CHECK: mov pc, lr +define <4 x i32> @f(<4 x i32> %in) { + %1 = insertelement <4 x i32> %in, i32 255, i32 3 + ret <4 x i32> %1 +} + +; CHECK: @g +; CHECK-NOT: vorr d +; CHECK: vmov.16 d +; CHECK-NOT: vorr d +; CHECK: mov pc, lr +define <8 x i16> @g(<8 x i16> %in) { + %1 = insertelement <8 x i16> %in, i16 255, i32 7 + ret <8 x i16> %1 +} + +; CHECK: @h +; CHECK-NOT: vorr d +; CHECK: vmov.8 d +; CHECK-NOT: vorr d +; CHECK: mov pc, lr +define <16 x i8> @h(<16 x i8> %in) { + %1 = insertelement <16 x i8> %in, i8 255, i32 15 + ret <16 x i8> %1 +} diff --git a/llvm/test/CodeGen/ARM/vget_lane.ll b/llvm/test/CodeGen/ARM/vget_lane.ll index 1fc885d6137..2ed65c9aeed 100644 --- a/llvm/test/CodeGen/ARM/vget_lane.ll +++ b/llvm/test/CodeGen/ARM/vget_lane.ll @@ -200,7 +200,7 @@ define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind { define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind { ;CHECK: vsetQ_lane32: -;CHECK: vmov.32 +;CHECK: vmov s %tmp1 = load <4 x i32>* %A %tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1 ret <4 x i32> %tmp2 -- cgit v1.2.3