summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2014-04-02 14:38:54 +0000
committerTim Northover <tnorthover@apple.com>2014-04-02 14:38:54 +0000
commit5e3a484e3b325329e56a685357dc5e3c9bb0f1b3 (patch)
treefa7228832f4bffdcc664b0392bdeb4eb3b6cf9ec
parentf7da105f3903dfb3a9153f542dbada8a9bb45a8a (diff)
downloadbcm5719-llvm-5e3a484e3b325329e56a685357dc5e3c9bb0f1b3.tar.gz
bcm5719-llvm-5e3a484e3b325329e56a685357dc5e3c9bb0f1b3.zip
ARM64: convert fp16 narrowing ISel to pseudo-instruction
The previous attempt was fine with optimisations, but was actually rather cavalier with its types. When compiled at -O0, it produced invalid COPY MachineInstrs. llvm-svn: 205422
-rw-r--r--llvm/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp12
-rw-r--r--llvm/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp13
-rw-r--r--llvm/lib/Target/ARM64/ARM64InstrInfo.td2
-rw-r--r--llvm/test/CodeGen/ARM64/vcvt_f.ll4
4 files changed, 16 insertions, 15 deletions
diff --git a/llvm/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp b/llvm/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp
index acfc00d0128..68608bcd11b 100644
--- a/llvm/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp
@@ -624,6 +624,18 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return true;
}
+ case ARM64::FCVTSHpseudo: {
+ MachineOperand Src = MI.getOperand(1);
+ Src.setImplicit();
+ unsigned SrcH = TII->getRegisterInfo().getSubReg(Src.getReg(), ARM64::hsub);
+ auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::FCVTSHr))
+ .addOperand(MI.getOperand(0))
+ .addReg(SrcH, RegState::Undef)
+ .addOperand(Src);
+ transferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
case ARM64::LOADgot: {
// Expand into ADRP + LDR.
unsigned DstReg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/llvm/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
index adf1bfeddf0..2e234c92773 100644
--- a/llvm/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
@@ -1832,19 +1832,6 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
break;
}
- case ISD::FP16_TO_FP32: {
- assert(Node->getOperand(0).getValueType() == MVT::i32 && "vector convert?");
- EVT VT = Node->getValueType(0);
- SDLoc DL(Node);
- SDValue FPR32Id =
- CurDAG->getTargetConstant(ARM64::FPR32RegClass.getID(), MVT::i32);
- SDNode *Res =
- CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, MVT::i32,
- Node->getOperand(0), FPR32Id);
- SDValue FPR16Reg =
- CurDAG->getTargetExtractSubreg(ARM64::hsub, DL, VT, SDValue(Res, 0));
- return CurDAG->getMachineNode(ARM64::FCVTSHr, DL, VT, FPR16Reg);
- }
case ISD::SRL:
case ISD::AND:
case ISD::SRA:
diff --git a/llvm/lib/Target/ARM64/ARM64InstrInfo.td b/llvm/lib/Target/ARM64/ARM64InstrInfo.td
index ee066a38f4f..747f6c5db60 100644
--- a/llvm/lib/Target/ARM64/ARM64InstrInfo.td
+++ b/llvm/lib/Target/ARM64/ARM64InstrInfo.td
@@ -1927,6 +1927,8 @@ def : Pat<(f32_to_f16 FPR32:$Rn),
(f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
GPR32))>;
+def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn),
+ [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>;
//===----------------------------------------------------------------------===//
// Floating point single operand instructions.
diff --git a/llvm/test/CodeGen/ARM64/vcvt_f.ll b/llvm/test/CodeGen/ARM64/vcvt_f.ll
index 549d2f0aea3..d67aa3b9d47 100644
--- a/llvm/test/CodeGen/ARM64/vcvt_f.ll
+++ b/llvm/test/CodeGen/ARM64/vcvt_f.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -O0 -march=arm64 -arm64-neon-syntax=apple | FileCheck %s
define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp {
; CHECK-LABEL: test_vcvt_f64_f32:
@@ -64,8 +65,7 @@ declare <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind r
define i16 @to_half(float %in) {
; CHECK-LABEL: to_half:
; CHECK: fcvt h[[HALFVAL:[0-9]+]], s0
-; CHECK: fmov w0, s[[HALFVAL]]
-
+; CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
%res = call i16 @llvm.convert.to.fp16(float %in)
ret i16 %res
}
OpenPOWER on IntegriCloud