summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAlexandros Lamprineas <alexandros.lamprineas@arm.com>2016-10-10 16:01:54 +0000
committerAlexandros Lamprineas <alexandros.lamprineas@arm.com>2016-10-10 16:01:54 +0000
commit20e9ddba7349b556e43daab7433f2a3a7c649417 (patch)
tree8f7d5c15ad03995244edb98dbbc6226f4caa993f /llvm/lib
parent0c21b40d37d559800490560ad4037f792cf69fab (diff)
downloadbcm5719-llvm-20e9ddba7349b556e43daab7433f2a3a7c649417.tar.gz
bcm5719-llvm-20e9ddba7349b556e43daab7433f2a3a7c649417.zip
[ARM] Fix invalid VLDM/VSTM access when targeting Big Endian with NEON
The instructions VLDM/VSTM can only access word-aligned memory locations and produce alignment fault if the condition is not met. The compiler currently generates VLDM/VSTM for v2f64 load/store regardless the alignment of the memory access. Instead, if a v2f64 load/store is not word-aligned, the compiler should generate VLD1/VST1. For each non double-word-aligned VLD1/VST1, a VREV instruction should be generated when targeting Big Endian. Differential Revision: https://reviews.llvm.org/D25281 llvm-svn: 283763
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/ARMInstrNEON.td14
1 files changed, 12 insertions, 2 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index defef4ea907..b5fa8e999e2 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -610,14 +610,14 @@ def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
def VLDMQIA
: PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
IIC_fpLoad_m, "",
- [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>;
+ [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
// Use VSTM to store a Q register as a D register pair.
// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
def VSTMQIA
: PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
IIC_fpStore_m, "",
- [(store (v2f64 DPair:$src), GPR:$Rn)]>;
+ [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
// Classes for VLD* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
@@ -6849,6 +6849,16 @@ let Predicates = [IsBE] in {
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
}
+// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
+def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
+ (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>;
+def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>, Requires<[IsBE]>;
+def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
+ (VREV64q16 (VLD1q16 addrmode6:$addr))>, Requires<[IsBE]>;
+def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+ (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>, Requires<[IsBE]>;
+
// Fold extracting an element out of a v2i32 into a vfp register.
def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
(f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
OpenPOWER on IntegriCloud