summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/ARM/vld3.ll13
-rw-r--r--llvm/test/CodeGen/ARM/vld4.ll13
-rw-r--r--llvm/test/CodeGen/ARM/vst3.ll12
-rw-r--r--llvm/test/CodeGen/ARM/vst4.ll12
4 files changed, 50 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/vld3.ll b/llvm/test/CodeGen/ARM/vld3.ll
index 0eaad0f9003..46e17c97e6e 100644
--- a/llvm/test/CodeGen/ARM/vld3.ll
+++ b/llvm/test/CodeGen/ARM/vld3.ll
@@ -96,6 +96,19 @@ define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {
ret <1 x i64> %tmp4
}
+define <1 x i64> @vld3i64_reg_update(i64** %ptr, i64* %A) nounwind {
+;CHECK-LABEL: vld3i64_reg_update:
+;CHECK: vld1.64 {d16, d17, d18}, [{{r[0-9]+|lr}}:64], {{r[0-9]+|lr}}
+ %tmp0 = bitcast i64* %A to i8*
+ %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16)
+ %tmp5 = getelementptr i64, i64* %A, i32 1
+ store i64* %tmp5, i64** %ptr
+ %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
+ %tmp4 = add <1 x i64> %tmp2, %tmp3
+ ret <1 x i64> %tmp4
+}
+
define <16 x i8> @vld3Qi8(i8* %A) nounwind {
;CHECK-LABEL: vld3Qi8:
;Check the alignment value. Max for this instruction is 64 bits:
diff --git a/llvm/test/CodeGen/ARM/vld4.ll b/llvm/test/CodeGen/ARM/vld4.ll
index 5663e6d41f0..679c9ae4450 100644
--- a/llvm/test/CodeGen/ARM/vld4.ll
+++ b/llvm/test/CodeGen/ARM/vld4.ll
@@ -96,6 +96,19 @@ define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {
ret <1 x i64> %tmp4
}
+define <1 x i64> @vld4i64_reg_update(i64** %ptr, i64* %A) nounwind {
+;CHECK-LABEL: vld4i64_reg_update:
+;CHECK: vld1.64 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256], {{r[0-9]+|lr}}
+ %tmp0 = bitcast i64* %A to i8*
+ %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8* %tmp0, i32 64)
+ %tmp5 = getelementptr i64, i64* %A, i32 1
+ store i64* %tmp5, i64** %ptr
+ %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
+ %tmp4 = add <1 x i64> %tmp2, %tmp3
+ ret <1 x i64> %tmp4
+}
+
define <16 x i8> @vld4Qi8(i8* %A) nounwind {
;CHECK-LABEL: vld4Qi8:
;Check the alignment value. Max for this instruction is 256 bits:
diff --git a/llvm/test/CodeGen/ARM/vst3.ll b/llvm/test/CodeGen/ARM/vst3.ll
index d70d5957900..1723304e82b 100644
--- a/llvm/test/CodeGen/ARM/vst3.ll
+++ b/llvm/test/CodeGen/ARM/vst3.ll
@@ -73,6 +73,18 @@ define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
ret void
}
+define void @vst3i64_reg_update(i64** %ptr, <1 x i64>* %B) nounwind {
+;CHECK-LABEL: vst3i64_reg_update
+;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}], r{{.*}}
+ %A = load i64*, i64** %ptr
+ %tmp0 = bitcast i64* %A to i8*
+ %tmp1 = load <1 x i64>, <1 x i64>* %B
+ call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+ %tmp2 = getelementptr i64, i64* %A, i32 1
+ store i64* %tmp2, i64** %ptr
+ ret void
+}
+
define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vst3Qi8:
;Check the alignment value. Max for this instruction is 64 bits:
diff --git a/llvm/test/CodeGen/ARM/vst4.ll b/llvm/test/CodeGen/ARM/vst4.ll
index afa4321c91a..ca9e5e7a59d 100644
--- a/llvm/test/CodeGen/ARM/vst4.ll
+++ b/llvm/test/CodeGen/ARM/vst4.ll
@@ -72,6 +72,18 @@ define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
ret void
}
+define void @vst4i64_reg_update(i64** %ptr, <1 x i64>* %B) nounwind {
+;CHECK-LABEL: vst4i64_reg_update:
+;CHECK: vst1.64 {d16, d17, d18, d19}, [r{{[0-9]+}}], r{{[0-9]+}}
+ %A = load i64*, i64** %ptr
+ %tmp0 = bitcast i64* %A to i8*
+ %tmp1 = load <1 x i64>, <1 x i64>* %B
+ call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+ %tmp2 = getelementptr i64, i64* %A, i32 1
+ store i64* %tmp2, i64** %ptr
+ ret void
+}
+
define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vst4Qi8:
;Check the alignment value. Max for this instruction is 256 bits:
OpenPOWER on IntegriCloud