From ab99b59e8ca28f5366fb95b497e64ae44d67a9ca Mon Sep 17 00:00:00 2001 From: Jeroen Ketema Date: Wed, 30 Sep 2015 10:56:37 +0000 Subject: [ARM][NEON] Use address space in vld([1234]|[234]lane) and vst([1234]|[234]lane) instructions This commit changes the interface of the vld[1234], vld[234]lane, and vst[1234], vst[234]lane ARM neon intrinsics and associates an address space with the pointer that these intrinsics take. This changes, e.g., <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) to <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8*, i32) This change ensures that address spaces are fully taken into account in the ARM target during lowering of interleaved loads and stores. Differential Revision: http://reviews.llvm.org/D12985 llvm-svn: 248887 --- llvm/test/CodeGen/ARM/vld3.ll | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'llvm/test/CodeGen/ARM/vld3.ll') diff --git a/llvm/test/CodeGen/ARM/vld3.ll b/llvm/test/CodeGen/ARM/vld3.ll index 0d14179ba73..c3e8ee8691f 100644 --- a/llvm/test/CodeGen/ARM/vld3.ll +++ b/llvm/test/CodeGen/ARM/vld3.ll @@ -16,7 +16,7 @@ define <8 x i8> @vld3i8(i8* %A) nounwind { ;CHECK-LABEL: vld3i8: ;Check the alignment value. Max for this instruction is 64 bits: ;CHECK: vld3.8 {d16, d17, d18}, [r0:64] - %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32) + %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A, i32 32) %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 %tmp4 = add <8 x i8> %tmp2, %tmp3 @@ -27,7 +27,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind { ;CHECK-LABEL: vld3i16: ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1) + %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0i8(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2 %tmp4 = add <4 x i16> %tmp2, %tmp3 @@ -40,7 +40,7 @@ define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind { ;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}} %A = load i16*, i16** %ptr %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1) + %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0i8(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2 %tmp4 = add <4 x i16> %tmp2, %tmp3 @@ -53,7 +53,7 @@ define <2 x i32> @vld3i32(i32* %A) nounwind { ;CHECK-LABEL: vld3i32: ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1) + %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32.p0i8(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2 %tmp4 = add <2 x i32> %tmp2, %tmp3 @@ -64,7 +64,7 @@ define <2 x float> @vld3f(float* %A) nounwind { ;CHECK-LABEL: vld3f: ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0, i32 1) + %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32.p0i8(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2 %tmp4 = fadd <2 x float> %tmp2, %tmp3 @@ -76,7 +76,7 @@ define <1 x i64> @vld3i64(i64* %A) nounwind { ;Check the alignment value. Max for this instruction is 64 bits: ;CHECK: vld1.64 {d16, d17, d18}, [r0:64] %tmp0 = bitcast i64* %A to i8* - %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16) + %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16) %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 %tmp4 = add <1 x i64> %tmp2, %tmp3 @@ -87,7 +87,7 @@ define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind { ;CHECK-LABEL: vld3i64_update: ;CHECK: vld1.64 {d16, d17, d18}, [r1:64]! %tmp0 = bitcast i64* %A to i8* - %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16) + %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16) %tmp5 = getelementptr i64, i64* %A, i32 3 store i64* %tmp5, i64** %ptr %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 @@ -101,7 +101,7 @@ define <16 x i8> @vld3Qi8(i8* %A) nounwind { ;Check the alignment value. Max for this instruction is 64 bits: ;CHECK: vld3.8 {d16, d18, d20}, [r0:64]! ;CHECK: vld3.8 {d17, d19, d21}, [r0:64] - %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32) + %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8.p0i8(i8* %A, i32 32) %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2 %tmp4 = add <16 x i8> %tmp2, %tmp3 @@ -113,7 +113,7 @@ define <8 x i16> @vld3Qi16(i16* %A) nounwind { ;CHECK: vld3.16 ;CHECK: vld3.16 %tmp0 = bitcast i16* %A to i8* - %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0, i32 1) + %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16.p0i8(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2 %tmp4 = add <8 x i16> %tmp2, %tmp3 @@ -125,7 +125,7 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind { ;CHECK: vld3.32 ;CHECK: vld3.32 %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1) + %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0i8(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2 %tmp4 = add <4 x i32> %tmp2, %tmp3 @@ -139,7 +139,7 @@ define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind { ;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]! %A = load i32*, i32** %ptr %tmp0 = bitcast i32* %A to i8* - %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1) + %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0i8(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2 %tmp4 = add <4 x i32> %tmp2, %tmp3 @@ -153,20 +153,20 @@ define <4 x float> @vld3Qf(float* %A) nounwind { ;CHECK: vld3.32 ;CHECK: vld3.32 %tmp0 = bitcast float* %A to i8* - %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0, i32 1) + %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32.p0i8(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2 %tmp4 = fadd <4 x float> %tmp2, %tmp3 ret <4 x float> %tmp4 } -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly -declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*, i32) nounwind readonly -declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly -declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*, i32) nounwind readonly -declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*, i32) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0i8(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32.p0i8(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32.p0i8(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8*, i32) nounwind readonly -declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly -declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*, i32) nounwind readonly -declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*, i32) nounwind readonly -declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*, i32) nounwind readonly +declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8.p0i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16.p0i8(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0i8(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32.p0i8(i8*, i32) nounwind readonly -- cgit v1.2.3