diff options
Diffstat (limited to 'llvm/test/CodeGen/ARM')
-rw-r--r-- | llvm/test/CodeGen/ARM/vlddup.ll | 144 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/vmul.ll | 25 |
2 files changed, 6 insertions, 163 deletions
diff --git a/llvm/test/CodeGen/ARM/vlddup.ll b/llvm/test/CodeGen/ARM/vlddup.ll index 66dcea81dd8..c115a3863d0 100644 --- a/llvm/test/CodeGen/ARM/vlddup.ll +++ b/llvm/test/CodeGen/ARM/vlddup.ll @@ -10,84 +10,6 @@ define <8 x i8> @vld1dupi8(i8* %A) nounwind { ret <8 x i8> %tmp3 } -define <8 x i8> @vld1dupi8_preinc(i8** noalias nocapture %a, i32 %b) nounwind { -entry: -;CHECK-LABEL: vld1dupi8_preinc: -;CHECK: vld1.8 {d16[]}, [r1] - %0 = load i8*, i8** %a, align 4 - %add.ptr = getelementptr inbounds i8, i8* %0, i32 %b - %1 = load i8, i8* %add.ptr, align 1 - %2 = insertelement <8 x i8> undef, i8 %1, i32 0 - %lane = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer - store i8* %add.ptr, i8** %a, align 4 - ret <8 x i8> %lane -} - -define <8 x i8> @vld1dupi8_postinc_fixed(i8** noalias nocapture %a) nounwind { -entry: -;CHECK-LABEL: vld1dupi8_postinc_fixed: -;CHECK: vld1.8 {d16[]}, [r1]! - %0 = load i8*, i8** %a, align 4 - %1 = load i8, i8* %0, align 1 - %2 = insertelement <8 x i8> undef, i8 %1, i32 0 - %lane = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer - %add.ptr = getelementptr inbounds i8, i8* %0, i32 1 - store i8* %add.ptr, i8** %a, align 4 - ret <8 x i8> %lane -} - -define <8 x i8> @vld1dupi8_postinc_register(i8** noalias nocapture %a, i32 %n) nounwind { -entry: -;CHECK-LABEL: vld1dupi8_postinc_register: -;CHECK: vld1.8 {d16[]}, [r2], r1 - %0 = load i8*, i8** %a, align 4 - %1 = load i8, i8* %0, align 1 - %2 = insertelement <8 x i8> undef, i8 %1, i32 0 - %lane = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer - %add.ptr = getelementptr inbounds i8, i8* %0, i32 %n - store i8* %add.ptr, i8** %a, align 4 - ret <8 x i8> %lane -} - -define <16 x i8> @vld1dupqi8_preinc(i8** noalias nocapture %a, i32 %b) nounwind { -entry: -;CHECK-LABEL: vld1dupqi8_preinc: -;CHECK: vld1.8 {d16[], d17[]}, [r1] - %0 = load i8*, i8** %a, align 4 - %add.ptr = getelementptr inbounds i8, i8* %0, i32 %b - %1 = load i8, i8* %add.ptr, align 1 - %2 = insertelement <16 x i8> undef, i8 %1, i32 0 - %lane = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer - store i8* %add.ptr, i8** %a, align 4 - ret <16 x i8> %lane -} - -define <16 x i8> @vld1dupqi8_postinc_fixed(i8** noalias nocapture %a) nounwind { -entry: -;CHECK-LABEL: vld1dupqi8_postinc_fixed: -;CHECK: vld1.8 {d16[], d17[]}, [r1]! - %0 = load i8*, i8** %a, align 4 - %1 = load i8, i8* %0, align 1 - %2 = insertelement <16 x i8> undef, i8 %1, i32 0 - %lane = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer - %add.ptr = getelementptr inbounds i8, i8* %0, i32 1 - store i8* %add.ptr, i8** %a, align 4 - ret <16 x i8> %lane -} - -define <16 x i8> @vld1dupqi8_postinc_register(i8** noalias nocapture %a, i32 %n) nounwind { -entry: -;CHECK-LABEL: vld1dupqi8_postinc_register: -;CHECK: vld1.8 {d16[], d17[]}, [r2], r1 - %0 = load i8*, i8** %a, align 4 - %1 = load i8, i8* %0, align 1 - %2 = insertelement <16 x i8> undef, i8 %1, i32 0 - %lane = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer - %add.ptr = getelementptr inbounds i8, i8* %0, i32 %n - store i8* %add.ptr, i8** %a, align 4 - ret <16 x i8> %lane -} - define <4 x i16> @vld1dupi16(i16* %A) nounwind { ;CHECK-LABEL: vld1dupi16: ;Check the alignment value. Max for this instruction is 16 bits: @@ -98,15 +20,6 @@ define <4 x i16> @vld1dupi16(i16* %A) nounwind { ret <4 x i16> %tmp3 } -define <4 x i16> @vld1dupi16_misaligned(i16* %A) nounwind { -;CHECK-LABEL: vld1dupi16_misaligned: -;CHECK: vld1.16 {d16[]}, [r0] - %tmp1 = load i16, i16* %A, align 1 - %tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0 - %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer - ret <4 x i16> %tmp3 -} - define <2 x i32> @vld1dupi32(i32* %A) nounwind { ;CHECK-LABEL: vld1dupi32: ;Check the alignment value. Max for this instruction is 32 bits: @@ -162,63 +75,6 @@ define <8 x i8> @vld2dupi8(i8* %A) nounwind { ret <8 x i8> %tmp5 } -define void @vld2dupi8_preinc(%struct.__neon_int8x8x2_t* noalias nocapture sret %agg.result, i8** noalias nocapture %a, i32 %b) nounwind { -;CHECK-LABEL: vld2dupi8_preinc: -;CHECK: vld2.8 {d16[], d17[]}, [r2] -entry: - %0 = load i8*, i8** %a, align 4 - %add.ptr = getelementptr inbounds i8, i8* %0, i32 %b - %vld_dup = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %add.ptr, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) - %1 = extractvalue %struct.__neon_int8x8x2_t %vld_dup, 0 - %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer - %2 = extractvalue %struct.__neon_int8x8x2_t %vld_dup, 1 - %lane1 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer - store i8* %add.ptr, i8** %a, align 4 - %r8 = getelementptr inbounds %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %agg.result, i32 0, i32 0 - store <8 x i8> %lane, <8 x i8>* %r8, align 8 - %r11 = getelementptr inbounds %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %agg.result, i32 0, i32 1 - store <8 x i8> %lane1, <8 x i8>* %r11, align 8 - ret void -} - -define void @vld2dupi8_postinc_fixed(%struct.__neon_int8x8x2_t* noalias nocapture sret %agg.result, i8** noalias nocapture %a) nounwind { -entry: -;CHECK-LABEL: vld2dupi8_postinc_fixed: -;CHECK: vld2.8 {d16[], d17[]}, [r2]! - %0 = load i8*, i8** %a, align 4 - %vld_dup = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %0, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) - %1 = extractvalue %struct.__neon_int8x8x2_t %vld_dup, 0 - %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer - %2 = extractvalue %struct.__neon_int8x8x2_t %vld_dup, 1 - %lane1 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer - %add.ptr = getelementptr inbounds i8, i8* %0, i32 2 - store i8* %add.ptr, i8** %a, align 4 - %r7 = getelementptr inbounds %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %agg.result, i32 0, i32 0 - store <8 x i8> %lane, <8 x i8>* %r7, align 8 - %r10 = getelementptr inbounds %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %agg.result, i32 0, i32 1 - store <8 x i8> %lane1, <8 x i8>* %r10, align 8 - ret void -} - -define void @vld2dupi8_postinc_variable(%struct.__neon_int8x8x2_t* noalias nocapture sret %agg.result, i8** noalias nocapture %a, i32 %n) nounwind { -entry: -;CHECK-LABEL: vld2dupi8_postinc_variable: -;CHECK: vld2.8 {d16[], d17[]}, [r3], r2 - %0 = load i8*, i8** %a, align 4 - %vld_dup = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %0, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) - %1 = extractvalue %struct.__neon_int8x8x2_t %vld_dup, 0 - %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer - %2 = extractvalue %struct.__neon_int8x8x2_t %vld_dup, 1 - %lane1 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer - %add.ptr = getelementptr inbounds i8, i8* %0, i32 %n - store i8* %add.ptr, i8** %a, align 4 - %r7 = getelementptr inbounds %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %agg.result, i32 0, i32 0 - store <8 x i8> %lane, <8 x i8>* %r7, align 8 - %r10 = getelementptr inbounds %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %agg.result, i32 0, i32 1 - store <8 x i8> %lane1, <8 x i8>* %r10, align 8 - ret void -} - define <4 x i16> @vld2dupi16(i8* %A) nounwind { ;CHECK-LABEL: vld2dupi16: ;Check that a power-of-two alignment smaller than the total size of the memory diff --git a/llvm/test/CodeGen/ARM/vmul.ll b/llvm/test/CodeGen/ARM/vmul.ll index fcffe175e2b..0455190b4c9 100644 --- a/llvm/test/CodeGen/ARM/vmul.ll +++ b/llvm/test/CodeGen/ARM/vmul.ll @@ -635,26 +635,13 @@ entry: ret void } -define void @fmul_splat(<4 x float> * %a, <4 x float>* nocapture %dst, float %tmp) nounwind { -; Look for a scalar float rather than a splat, then a vector*scalar multiply. -; CHECK: vmov s0, r2 +define void @foo(<4 x float> * %a, <4 x float>* nocapture %dst, float* nocapture readonly %src) nounwind { +; Look for doing a normal scalar FP load rather than an to-all-lanes load. +; e.g., "ldr s0, [r2]" rathern than "vld1.32 {d18[], d19[]}, [r2:32]" +; Then check that the vector multiply has folded the splat to all lanes +; and used a vector * scalar instruction. +; CHECK: vldr {{s[0-9]+}}, [r2] ; CHECK: vmul.f32 q8, q8, d0[0] - %tmp5 = load <4 x float>, <4 x float>* %a, align 4 - %tmp6 = insertelement <4 x float> undef, float %tmp, i32 0 - %tmp7 = insertelement <4 x float> %tmp6, float %tmp, i32 1 - %tmp8 = insertelement <4 x float> %tmp7, float %tmp, i32 2 - %tmp9 = insertelement <4 x float> %tmp8, float %tmp, i32 3 - %tmp10 = fmul <4 x float> %tmp9, %tmp5 - store <4 x float> %tmp10, <4 x float>* %dst, align 4 - ret void -} - -define void @fmul_splat_load(<4 x float> * %a, <4 x float>* nocapture %dst, float* nocapture readonly %src) nounwind { -; Look for doing a normal scalar FP load rather than an to-all-lanes load, -; then a vector*scalar multiply. -; FIXME: Temporarily broken due to splat representation changes. -; CHECK: vld1.32 {d18[], d19[]}, [r2:32] -; CHECK: vmul.f32 q8, q9, q8 %tmp = load float, float* %src, align 4 %tmp5 = load <4 x float>, <4 x float>* %a, align 4 %tmp6 = insertelement <4 x float> undef, float %tmp, i32 0 |