summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td4
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll185
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll353
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll353
4 files changed, 668 insertions, 227 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index cd670819fad..ce51322f913 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -4846,11 +4846,11 @@ def aligned32_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
}]>;
def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
(pre_store node:$val, node:$ptr, node:$offset), [{
- return cast<StoreSDNode>(N)->getAlignment() == 2;
+ return cast<StoreSDNode>(N)->getAlignment() >= 2;
}]>;
def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
(post_store node:$val, node:$ptr, node:$offset), [{
- return cast<StoreSDNode>(N)->getAlignment() == 2;
+ return cast<StoreSDNode>(N)->getAlignment() >= 2;
}]>;
let Predicates = [HasMVEInt, IsLE] in {
diff --git a/llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll b/llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll
index bff8b351bfe..5a0a6051867 100644
--- a/llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
define i8* @ldrwu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_4:
@@ -720,11 +721,18 @@ entry:
}
define i8* @ldrwi32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwi32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrwi32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrwi32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x i32>*
@@ -735,11 +743,18 @@ entry:
}
define i8* @ldrhi16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhi16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrhi16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrhi16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x i16>*
@@ -772,12 +787,19 @@ entry:
ret i8* %x
}
-define i8* @ldrwf32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwf32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+define i8* @ldrf32_align1(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrf32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrf32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x float>*
@@ -787,12 +809,19 @@ entry:
ret i8* %x
}
-define i8* @ldrwf16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwf16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+define i8* @ldrf16_align1(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrf16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrf16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x half>*
@@ -802,6 +831,27 @@ entry:
ret i8* %x
}
+define i8* @ldrh16_align8(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrh16_align8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4]
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrh16_align8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4]
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
+entry:
+ %z = getelementptr inbounds i8, i8* %x, i32 4
+ %0 = bitcast i8* %z to <8 x i16>*
+ %1 = load <8 x i16>, <8 x i16>* %0, align 8
+ %2 = bitcast i8* %y to <8 x i16>*
+ store <8 x i16> %1, <8 x i16>* %2, align 2
+ ret i8* %x
+}
+
@@ -1294,11 +1344,18 @@ entry:
}
define i8* @strwi32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strwi32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strwi32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strwi32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -1309,11 +1366,18 @@ entry:
}
define i8* @strhi16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strhi16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strhi16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strhi16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -1347,11 +1411,18 @@ entry:
}
define i8* @strf32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x float>*
@@ -1362,11 +1433,18 @@ entry:
}
define i8* @strf16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x half>*
@@ -1375,3 +1453,24 @@ entry:
store <8 x half> %1, <8 x half>* %2, align 1
ret i8* %y
}
+
+define i8* @strf16_align8(i8* %y, i8* %x) {
+; CHECK-LE-LABEL: strf16_align8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrw.32 q0, [r0, #16]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_align8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16]
+; CHECK-BE-NEXT: bx lr
+entry:
+ %z = getelementptr inbounds i8, i8* %y, i32 16
+ %0 = bitcast i8* %x to <8 x i16>*
+ %1 = load <8 x i16>, <8 x i16>* %0, align 2
+ %2 = bitcast i8* %z to <8 x i16>*
+ store <8 x i16> %1, <8 x i16>* %2, align 8
+ ret i8* %y
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll b/llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll
index e97b7a51bcc..61afa727c62 100644
--- a/llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
define i8* @ldrwu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_4:
@@ -17,11 +18,18 @@ entry:
}
define i8* @ldrwu32_3(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwu32_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0], #3
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrwu32_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrwu32_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -291,11 +299,18 @@ entry:
}
define i8* @ldrhu16_3(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhu16_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0], #3
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrhu16_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrhu16_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -708,11 +723,19 @@ entry:
}
define i8* @ldrwi32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwi32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0], #3
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrwi32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrwi32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -723,11 +746,19 @@ entry:
}
define i8* @ldrhi16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhi16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0], #3
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrhi16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrhi16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -762,11 +793,19 @@ entry:
}
define i8* @ldrf32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrf32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0], #3
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrf32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrf32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <4 x float>*
@@ -777,11 +816,19 @@ entry:
}
define i8* @ldrf16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrf16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0], #3
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrf16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrf16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %x to <8 x half>*
@@ -791,16 +838,43 @@ entry:
ret i8* %z
}
+define i8* @ldrh16_align8(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrh16_align8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r0], #4
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrh16_align8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r0], #4
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
+entry:
+ %z = getelementptr inbounds i8, i8* %x, i32 4
+ %0 = bitcast i8* %x to <8 x i16>*
+ %1 = load <8 x i16>, <8 x i16>* %0, align 8
+ %2 = bitcast i8* %y to <8 x i16>*
+ store <8 x i16> %1, <8 x i16>* %2, align 2
+ ret i8* %z
+}
+
define i8* @strw32_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strw32_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strw32_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x i32>*
@@ -811,11 +885,18 @@ entry:
}
define i8* @strw32_3(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #3
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strw32_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strw32_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -826,11 +907,17 @@ entry:
}
define i8* @strw32_m4(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_m4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #-4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strw32_m4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #-4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strw32_m4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0], #-4
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 -4
%0 = bitcast i8* %x to <4 x i32>*
@@ -982,11 +1069,17 @@ entry:
define i8* @strh16_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strh16_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strh16_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x i16>*
@@ -997,11 +1090,18 @@ entry:
}
define i8* @strh16_3(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #3
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strh16_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strh16_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -1012,11 +1112,17 @@ entry:
}
define i8* @strh16_2(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_2:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #2
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strh16_2:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #2
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strh16_2:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0], #2
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
%0 = bitcast i8* %x to <8 x i16>*
@@ -1244,11 +1350,17 @@ entry:
}
define i8* @strf32_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf32_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf32_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x float>*
@@ -1259,11 +1371,17 @@ entry:
}
define i8* @strf16_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #4
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf16_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x half>*
@@ -1274,11 +1392,19 @@ entry:
}
define i8* @strwi32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strwi32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #3
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strwi32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strwi32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -1289,11 +1415,19 @@ entry:
}
define i8* @strhi16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strhi16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #3
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strhi16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strhi16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -1328,11 +1462,19 @@ entry:
}
define i8* @strf32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #3
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x float>*
@@ -1343,11 +1485,19 @@ entry:
}
define i8* @strf16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0], #3
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x half>*
@@ -1356,3 +1506,24 @@ entry:
store <8 x half> %1, <8 x half>* %2, align 1
ret i8* %z
}
+
+define i8* @strf16_align8(i8* %y, i8* %x) {
+; CHECK-LE-LABEL: strf16_align8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0], #16
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_align8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0], #16
+; CHECK-BE-NEXT: bx lr
+entry:
+ %z = getelementptr inbounds i8, i8* %y, i32 16
+ %0 = bitcast i8* %x to <8 x i16>*
+ %1 = load <8 x i16>, <8 x i16>* %0, align 2
+ %2 = bitcast i8* %y to <8 x i16>*
+ store <8 x i16> %1, <8 x i16>* %2, align 8
+ ret i8* %z
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll b/llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll
index 0cbf55b5a3d..ca1731a23d3 100644
--- a/llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
define i8* @ldrwu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_4:
@@ -17,11 +18,18 @@ entry:
}
define i8* @ldrwu32_3(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwu32_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrwu32_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrwu32_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vldrw.u32 q0, [r0]
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x i32>*
@@ -291,11 +299,18 @@ entry:
}
define i8* @ldrhu16_3(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhu16_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrhu16_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrhu16_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vldrh.u16 q0, [r0]
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x i16>*
@@ -708,11 +723,19 @@ entry:
}
define i8* @ldrwi32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrwi32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrwi32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrwi32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x i32>*
@@ -723,11 +746,19 @@ entry:
}
define i8* @ldrhi16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrhi16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrhi16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrhi16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x i16>*
@@ -762,11 +793,19 @@ entry:
}
define i8* @ldrf32_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrf32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrf32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT: vstrw.32 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrf32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrw.32 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <4 x float>*
@@ -777,11 +816,19 @@ entry:
}
define i8* @ldrf16_align1(i8* %x, i8* %y) {
-; CHECK-LABEL: ldrf16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
-; CHECK-NEXT: vstrh.16 q0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: ldrf16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrf16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %x, i32 3
%0 = bitcast i8* %z to <8 x half>*
@@ -791,16 +838,43 @@ entry:
ret i8* %z
}
+define i8* @ldrh16_align8(i8* %x, i8* %y) {
+; CHECK-LE-LABEL: ldrh16_align8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4]!
+; CHECK-LE-NEXT: vstrh.16 q0, [r1]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: ldrh16_align8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4]!
+; CHECK-BE-NEXT: vstrh.16 q0, [r1]
+; CHECK-BE-NEXT: bx lr
+entry:
+ %z = getelementptr inbounds i8, i8* %x, i32 4
+ %0 = bitcast i8* %z to <8 x i16>*
+ %1 = load <8 x i16>, <8 x i16>* %0, align 8
+ %2 = bitcast i8* %y to <8 x i16>*
+ store <8 x i16> %1, <8 x i16>* %2, align 2
+ ret i8* %z
+}
+
define i8* @strw32_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strw32_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strw32_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]!
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x i32>*
@@ -811,11 +885,18 @@ entry:
}
define i8* @strw32_3(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strw32_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strw32_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -826,11 +907,17 @@ entry:
}
define i8* @strw32_m4(i8* %y, i8* %x) {
-; CHECK-LABEL: strw32_m4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #-4]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strw32_m4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #-4]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strw32_m4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0, #-4]!
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 -4
%0 = bitcast i8* %x to <4 x i32>*
@@ -982,11 +1069,17 @@ entry:
define i8* @strh16_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strh16_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strh16_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]!
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x i16>*
@@ -997,11 +1090,18 @@ entry:
}
define i8* @strh16_3(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_3:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strh16_3:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strh16_3:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0]
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -1012,11 +1112,17 @@ entry:
}
define i8* @strh16_2(i8* %y, i8* %x) {
-; CHECK-LABEL: strh16_2:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #2]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strh16_2:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #2]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strh16_2:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0, #2]!
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
%0 = bitcast i8* %x to <8 x i16>*
@@ -1244,11 +1350,17 @@ entry:
}
define i8* @strf32_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf32_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf32_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]!
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <4 x float>*
@@ -1259,11 +1371,17 @@ entry:
}
define i8* @strf16_4(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf16_4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]!
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
%0 = bitcast i8* %x to <8 x half>*
@@ -1274,11 +1392,19 @@ entry:
}
define i8* @strwi32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strwi32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strwi32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strwi32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x i32>*
@@ -1289,11 +1415,19 @@ entry:
}
define i8* @strhi16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strhi16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strhi16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strhi16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x i16>*
@@ -1327,11 +1461,19 @@ entry:
}
define i8* @strf32_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf32_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf32_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf32_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <4 x float>*
@@ -1342,11 +1484,19 @@ entry:
}
define i8* @strf16_align1(i8* %y, i8* %x) {
-; CHECK-LABEL: strf16_align1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: strf16_align1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_align1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vrev16.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
+; CHECK-BE-NEXT: adds r0, #3
+; CHECK-BE-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
%0 = bitcast i8* %x to <8 x half>*
@@ -1355,3 +1505,24 @@ entry:
store <8 x half> %1, <8 x half>* %2, align 1
ret i8* %z
}
+
+define i8* @strf16_align8(i8* %y, i8* %x) {
+; CHECK-LE-LABEL: strf16_align8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-LE-NEXT: vstrb.8 q0, [r0, #16]!
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: strf16_align8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
+; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16]!
+; CHECK-BE-NEXT: bx lr
+entry:
+ %z = getelementptr inbounds i8, i8* %y, i32 16
+ %0 = bitcast i8* %x to <8 x i16>*
+ %1 = load <8 x i16>, <8 x i16>* %0, align 2
+ %2 = bitcast i8* %z to <8 x i16>*
+ store <8 x i16> %1, <8 x i16>* %2, align 8
+ ret i8* %z
+}
OpenPOWER on IntegriCloud