diff options
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 35 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/f16-convert.ll | 128 |
2 files changed, 163 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index a58c6b6fa6f..000a3be085c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2242,6 +2242,41 @@ def : Pat<(f32_to_f16 FPR32:$Rn), def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn), [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>; +// When converting from f16 coming directly from a load, make sure we +// load into the FPR16 registers rather than going through the GPRs. +// f16->f32 +def : Pat<(f32 (f16_to_f32 (i32 + (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend))))), + (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>; +def : Pat<(f32 (f16_to_f32 (i32 + (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend))))), + (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>; +def : Pat <(f32 (f16_to_f32 (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>; +def : Pat <(f32 (f16_to_f32 (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), + (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>; + +// f16->f64 +def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32 + (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend))))))), + (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>; +def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32 + (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend))))))), + (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>; +def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))), + (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>; +def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))), + (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>; + + //===----------------------------------------------------------------------===// // Floating point single operand instructions. //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/f16-convert.ll b/llvm/test/CodeGen/AArch64/f16-convert.ll new file mode 100644 index 00000000000..5d70fc2bbd1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/f16-convert.ll @@ -0,0 +1,128 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios -asm-verbose=false | FileCheck %s + +define float @load0(i16* nocapture readonly %a) nounwind { +; CHECK-LABEL: load0: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0] +; CHECK-NEXT: fcvt s0, [[HREG]] +; CHECK-NEXT: ret + + %tmp = load i16* %a, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + ret float %tmp1 +} + +define double @load1(i16* nocapture readonly %a) nounwind { +; CHECK-LABEL: load1: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0] +; CHECK-NEXT: fcvt d0, [[HREG]] +; CHECK-NEXT: ret + + %tmp = load i16* %a, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + %conv = fpext float %tmp1 to double + ret double %conv +} + +define float @load2(i16* nocapture readonly %a, i32 %i) nounwind { +; CHECK-LABEL: load2: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1] +; CHECK-NEXT: fcvt s0, [[HREG]] +; CHECK-NEXT: ret + + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + ret float %tmp1 +} + +define double @load3(i16* nocapture readonly %a, i32 %i) nounwind { +; CHECK-LABEL: load3: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, w1, sxtw #1] +; CHECK-NEXT: fcvt d0, [[HREG]] +; CHECK-NEXT: ret + + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + %conv = fpext float %tmp1 to double + ret double %conv +} + +define float @load4(i16* nocapture readonly %a, i64 %i) nounwind { +; CHECK-LABEL: load4: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1] +; CHECK-NEXT: fcvt s0, [[HREG]] +; CHECK-NEXT: ret + + %arrayidx = getelementptr inbounds i16* %a, i64 %i + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + ret float %tmp1 +} + +define double @load5(i16* nocapture readonly %a, i64 %i) nounwind { +; CHECK-LABEL: load5: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, x1, lsl #1] +; CHECK-NEXT: fcvt d0, [[HREG]] +; CHECK-NEXT: ret + + %arrayidx = getelementptr inbounds i16* %a, i64 %i + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + %conv = fpext float %tmp1 to double + ret double %conv +} + +define float @load6(i16* nocapture readonly %a) nounwind { +; CHECK-LABEL: load6: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20] +; CHECK-NEXT: fcvt s0, [[HREG]] +; CHECK-NEXT: ret + + %arrayidx = getelementptr inbounds i16* %a, i64 10 + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + ret float %tmp1 +} + +define double @load7(i16* nocapture readonly %a) nounwind { +; CHECK-LABEL: load7: +; CHECK-NEXT: ldr [[HREG:h[0-9]+]], [x0, #20] +; CHECK-NEXT: fcvt d0, [[HREG]] +; CHECK-NEXT: ret + + %arrayidx = getelementptr inbounds i16* %a, i64 10 + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + %conv = fpext float %tmp1 to double + ret double %conv +} + +define float @load8(i16* nocapture readonly %a) nounwind { +; CHECK-LABEL: load8: +; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20] +; CHECK-NEXT: fcvt s0, [[HREG]] +; CHECK-NEXT: ret + + %arrayidx = getelementptr inbounds i16* %a, i64 -10 + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + ret float %tmp1 +} + +define double @load9(i16* nocapture readonly %a) nounwind { +; CHECK-LABEL: load9: +; CHECK-NEXT: ldur [[HREG:h[0-9]+]], [x0, #-20] +; CHECK-NEXT: fcvt d0, [[HREG]] +; CHECK-NEXT: ret + + %arrayidx = getelementptr inbounds i16* %a, i64 -10 + %tmp = load i16* %arrayidx, align 2 + %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp) + %conv = fpext float %tmp1 to double + ret double %conv +} + +declare float @llvm.convert.from.fp16(i16) nounwind readnone |

