summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorJim Grosbach <grosbach@apple.com>2014-07-08 23:28:48 +0000
committerJim Grosbach <grosbach@apple.com>2014-07-08 23:28:48 +0000
commit04691a530d9068b5fc9bfbe7d7f157e11bdef116 (patch)
tree5265d454ce18bc87e734c21c7c9b24e57fc8e396 /llvm/lib/Target
parent8ae0f8d618b9996ccd650d9339255bc67d854531 (diff)
downloadbcm5719-llvm-04691a530d9068b5fc9bfbe7d7f157e11bdef116.tar.gz
bcm5719-llvm-04691a530d9068b5fc9bfbe7d7f157e11bdef116.zip
AArch64: Better codegen for loading from __fp16.
Loading will generally extend to an f32 or an 64, so make sure to match those patterns directly to load into the FPR16 register class directly rather than going through the integer GPRs. This also eliminates an extra step in the convert-to-f64 path which was first converting to f32 and then to f64 from there. rdar://17594379 llvm-svn: 212573
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td35
1 files changed, 35 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a58c6b6fa6f..000a3be085c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2242,6 +2242,41 @@ def : Pat<(f32_to_f16 FPR32:$Rn),
def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn),
[(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>;
+// When converting from f16 coming directly from a load, make sure we
+// load into the FPR16 registers rather than going through the GPRs.
+// f16->f32
+def : Pat<(f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend))))),
+ (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
+def : Pat<(f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend))))),
+ (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
+def : Pat <(f32 (f16_to_f32 (i32
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
+def : Pat <(f32 (f16_to_f32 (i32
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
+ (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
+
+// f16->f64
+def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend))))))),
+ (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
+def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend))))))),
+ (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
+def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
+ (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
+def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
+ (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
+
+
//===----------------------------------------------------------------------===//
// Floating point single operand instructions.
//===----------------------------------------------------------------------===//
OpenPOWER on IntegriCloud