AArch64: Better codegen for loading from __fp16.

Loading will generally extend to an f32 or an 64, so make sure to match those patterns directly to load into the FPR16 register class directly rather than going through the integer GPRs. This also eliminates an extra step in the convert-to-f64 path which was first converting to f32 and then to f64 from there. rdar://17594379 llvm-svn: 212573
author: Jim Grosbach <grosbach@apple.com> 2014-07-08 23:28:48 +0000
committer: Jim Grosbach <grosbach@apple.com> 2014-07-08 23:28:48 +0000
commit: 04691a530d9068b5fc9bfbe7d7f157e11bdef116 (patch)
tree: 5265d454ce18bc87e734c21c7c9b24e57fc8e396 /llvm/lib/Target
parent: 8ae0f8d618b9996ccd650d9339255bc67d854531 (diff)
download: bcm5719-llvm-04691a530d9068b5fc9bfbe7d7f157e11bdef116.tar.gz
bcm5719-llvm-04691a530d9068b5fc9bfbe7d7f157e11bdef116.zip
1 files changed, 35 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a58c6b6fa6f..000a3be085c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2242,6 +2242,41 @@ def : Pat<(f32_to_f16 FPR32:$Rn),
 def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn),
                           [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>;
 
+// When converting from f16 coming directly from a load, make sure we
+// load into the FPR16 registers rather than going through the GPRs.
+//   f16->f32
+def : Pat<(f32 (f16_to_f32 (i32
+                (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+                                    ro_Wextend16:$extend))))),
+          (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
+def : Pat<(f32 (f16_to_f32 (i32
+                (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+                                    ro_Xextend16:$extend))))),
+          (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
+def : Pat <(f32 (f16_to_f32 (i32
+                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+           (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
+def : Pat <(f32 (f16_to_f32 (i32
+                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
+           (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
+
+//   f16->f64
+def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
+                (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+                                    ro_Wextend16:$extend))))))),
+          (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
+def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
+                (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+                                    ro_Xextend16:$extend))))))),
+          (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
+def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
+                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
+           (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
+def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
+                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
+           (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
+
+
 //===----------------------------------------------------------------------===//
 // Floating point single operand instructions.
 //===----------------------------------------------------------------------===//
author	Jim Grosbach <grosbach@apple.com>	2014-07-08 23:28:48 +0000
committer	Jim Grosbach <grosbach@apple.com>	2014-07-08 23:28:48 +0000
commit	04691a530d9068b5fc9bfbe7d7f157e11bdef116 (patch)
tree	5265d454ce18bc87e734c21c7c9b24e57fc8e396 /llvm/lib/Target
parent	8ae0f8d618b9996ccd650d9339255bc67d854531 (diff)
download	bcm5719-llvm-04691a530d9068b5fc9bfbe7d7f157e11bdef116.tar.gz bcm5719-llvm-04691a530d9068b5fc9bfbe7d7f157e11bdef116.zip