summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp8
-rw-r--r--llvm/lib/Target/ARM/ARMCallingConv.td3
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp49
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp73
-rw-r--r--llvm/lib/Target/ARM/ARMInstrFormats.td3
-rw-r--r--llvm/lib/Target/ARM/ARMInstrVFP.td35
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterInfo.td12
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp7
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h4
9 files changed, 166 insertions, 28 deletions
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 41c2130e338..5bcb1c1a94e 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2409,6 +2409,14 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
NumBits = 8;
Scale = 4;
break;
+ case ARMII::AddrMode5FP16:
+ ImmIdx = FrameRegIdx+1;
+ InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 2;
+ break;
default:
llvm_unreachable("Unsupported addressing mode!");
}
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td
index dcfd6518a84..11bf492815c 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -187,6 +187,7 @@ def RetCC_ARM_AAPCS : CallingConv<[
CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
+
CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>;
@@ -233,7 +234,7 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
- S9, S10, S11, S12, S13, S14, S15]>>,
+ S9, S10, S11, S12, S13, S14, S15]>>,
CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>;
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c2d0e636da9..d1113603195 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -118,8 +118,10 @@ public:
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
- bool SelectAddrMode5(SDValue N, SDValue &Base,
- SDValue &Offset);
+ bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
+ int Lwb, int Upb, bool FP16);
+ bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
@@ -886,8 +888,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
return true;
}
-bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
- SDValue &Base, SDValue &Offset) {
+bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
+ int Lwb, int Upb, bool FP16) {
if (!CurDAG->isBaseWithConstantOffset(N)) {
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
@@ -907,8 +909,9 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
// If the RHS is +/- imm8, fold into addr mode.
int RHSC;
- if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
- -256 + 1, 256, RHSC)) {
+ const int Scale = FP16 ? 2 : 4;
+
+ if (isScaledConstantInRange(N.getOperand(1), Scale, Lwb, Upb, RHSC)) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
@@ -921,17 +924,43 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
AddSub = ARM_AM::sub;
RHSC = -RHSC;
}
- Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
- SDLoc(N), MVT::i32);
+
+ if (FP16)
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
+ SDLoc(N), MVT::i32);
+ else
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+ SDLoc(N), MVT::i32);
+
return true;
}
Base = N;
- Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
- SDLoc(N), MVT::i32);
+
+ if (FP16)
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
+ SDLoc(N), MVT::i32);
+ else
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ SDLoc(N), MVT::i32);
+
return true;
}
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
+ SDValue &Base, SDValue &Offset) {
+ int Lwb = -256 + 1;
+ int Upb = 256;
+ return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ false);
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
+ SDValue &Base, SDValue &Offset) {
+ int Lwb = -512 + 1;
+ int Upb = 512;
+ return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ true);
+}
+
bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
SDValue &Align) {
Addr = N;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 7ecd961fbed..096bee8c286 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -522,6 +522,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
}
+ if (Subtarget->hasFullFP16()) {
+ addRegisterClass(MVT::f16, &ARM::HPRRegClass);
+ // Clean up bitcast of incoming arguments if hard float abi is enabled.
+ if (Subtarget->isTargetHardFloat())
+ setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+ }
+
for (MVT VT : MVT::vector_valuetypes()) {
for (MVT InnerVT : MVT::vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
@@ -2474,12 +2481,37 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue Arg = OutVals[realRVLocIdx];
+ bool ReturnF16 = false;
+
+ if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
+ // Half-precision return values can be returned like this:
+ //
+ // t11 f16 = fadd ...
+ // t12: i16 = bitcast t11
+ // t13: i32 = zero_extend t12
+ // t14: f32 = bitcast t13
+ //
+ // to avoid code generation for bitcasts, we simply set Arg to the node
+ // that produces the f16 value, t11 in this case.
+ //
+ if (Arg.getValueType() == MVT::f32) {
+ SDValue ZE = Arg.getOperand(0);
+ if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
+ SDValue BC = ZE.getOperand(0);
+ if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
+ Arg = BC.getOperand(0);
+ ReturnF16 = true;
+ }
+ }
+ }
+ }
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ if (!ReturnF16)
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
break;
}
@@ -2527,7 +2559,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Guarantee that all emitted copies are
// stuck together, avoiding something bad.
Flag = Chain.getValue(1);
- RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(),
+ ReturnF16 ? MVT::f16 : VA.getLocVT()));
}
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *I =
@@ -3684,7 +3717,10 @@ SDValue ARMTargetLowering::LowerFormalArguments(
} else {
const TargetRegisterClass *RC;
- if (RegVT == MVT::f32)
+
+ if (RegVT == MVT::f16)
+ RC = &ARM::HPRRegClass;
+ else if (RegVT == MVT::f32)
RC = &ARM::SPRRegClass;
else if (RegVT == MVT::f64)
RC = &ARM::DPRRegClass;
@@ -5024,6 +5060,37 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
// source or destination of the bit convert.
EVT SrcVT = Op.getValueType();
EVT DstVT = N->getValueType(0);
+
+ // Half-precision arguments can be passed in like this:
+ //
+ // t4: f32,ch = CopyFromReg t0, Register:f32 %1
+ // t8: i32 = bitcast t4
+ // t9: i16 = truncate t8
+ // t10: f16 = bitcast t9 <~~~~ SDNode N
+ //
+ // but we want to avoid code generation for the bitcast, so transform this
+ // into:
+ //
+ // t18: f16 = CopyFromReg t0, Register:f32 %0
+ //
+ if (SrcVT == MVT::i16 && DstVT == MVT::f16) {
+ if (Op.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+
+ SDValue Bitcast = Op.getOperand(0);
+ if (Bitcast.getOpcode() != ISD::BITCAST ||
+ Bitcast.getValueType() != MVT::i32)
+ return SDValue();
+
+ SDValue Copy = Bitcast.getOperand(0);
+ if (Copy.getOpcode() != ISD::CopyFromReg ||
+ Copy.getValueType() != MVT::f32)
+ return SDValue();
+
+ SDValue Ops[] = { Copy->getOperand(0), Copy->getOperand(1) };
+ return DAG.getNode(ISD::CopyFromReg, SDLoc(Copy), MVT::f16, Ops);
+ }
+
assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
"ExpandBITCAST called for non-i64 type");
diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
index f7c6c32eb4d..7b90e960f53 100644
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -108,6 +108,7 @@ def AddrModeT2_so : AddrMode<13>;
def AddrModeT2_pc : AddrMode<14>;
def AddrModeT2_i8s4 : AddrMode<15>;
def AddrMode_i12 : AddrMode<16>;
+def AddrMode5FP16 : AddrMode<17>;
// Load / store index mode.
class IndexMode<bits<2> val> {
@@ -1527,7 +1528,7 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
class AHI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
+ : VFPI<oops, iops, AddrMode5FP16, 4, IndexModeNone,
VFPLdStFrm, itin, opc, asm, "", pattern> {
list<Predicate> Predicates = [HasFullFP16];
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 22e157a7480..cf558b533af 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -69,10 +69,19 @@ def vfp_f64imm : Operand<f64>,
let ParserMatchClass = FPImmOperand;
}
+def alignedload16 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 2;
+}]>;
+
def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 4;
}]>;
+def alignedstore16 : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 2;
+}]>;
+
def alignedstore32 : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getAlignment() >= 4;
@@ -113,9 +122,9 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
let D = VFPNeonDomain;
}
-def VLDRH : AHI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5fp16:$addr),
+def VLDRH : AHI5<0b1101, 0b01, (outs HPR:$Sd), (ins addrmode5fp16:$addr),
IIC_fpLoad16, "vldr", ".16\t$Sd, $addr",
- []>,
+ [(set HPR:$Sd, (alignedload16 addrmode5fp16:$addr))]>,
Requires<[HasFullFP16]>;
} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
@@ -132,9 +141,9 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
let D = VFPNeonDomain;
}
-def VSTRH : AHI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5fp16:$addr),
+def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
IIC_fpStore16, "vstr", ".16\t$Sd, $addr",
- []>,
+ [(alignedstore16 HPR:$Sd, addrmode5fp16:$addr)]>,
Requires<[HasFullFP16]>;
//===----------------------------------------------------------------------===//
@@ -335,9 +344,9 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0,
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VADDH : AHbI<0b11100, 0b11, 0, 0,
- (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
- []>,
+ [(set HPR:$Sd, (fadd HPR:$Sn, HPR:$Sm))]>,
Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
@@ -360,9 +369,9 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VSUBH : AHbI<0b11100, 0b11, 1, 0,
- (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
- []>,
+ [(set HPR:$Sd, (fsub HPR:$Sn, HPR:$Sm))]>,
Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
@@ -658,17 +667,19 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
let Predicates = [HasVFP2, HasDPVFP];
}
-// Between half, single and double-precision. For disassembly only.
-
+// Between half, single and double-precision.
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>,
+ [ /* intentionally left blank, see rule below */ ]>,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
+def : Pat<(f32 (fpextend HPR:$Sm)),
+ (VCVTBHS (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
+
def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>,
+ []>,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td
index 14526b777c7..dc56186cb54 100644
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -307,6 +307,18 @@ def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> {
let DiagnosticString = "operand must be a register in range [s0, s31]";
}
+def HPR : RegisterClass<"ARM", [f16], 32, (sequence "S%u", 0, 31)> {
+ let AltOrders = [(add (decimate HPR, 2), SPR),
+ (add (decimate HPR, 4),
+ (decimate HPR, 2),
+ (decimate (rotl HPR, 1), 4),
+ (decimate (rotl HPR, 1), 2))];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ }];
+ let DiagnosticString = "operand must be a register in range [s0, s31]";
+}
+
// Subset of SPR which can be used as a source of NEON scalars for 16-bit
// operations
def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)> {
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 53c63587767..658a67511ff 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -158,6 +158,8 @@ static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -996,6 +998,11 @@ static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
static const uint16_t DPRDecoderTable[] = {
ARM::D0, ARM::D1, ARM::D2, ARM::D3,
ARM::D4, ARM::D5, ARM::D6, ARM::D7,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index c4480e3da50..5a0d1f9edcc 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -186,7 +186,8 @@ namespace ARMII {
AddrModeT2_so = 13,
AddrModeT2_pc = 14, // +/- i12 for pc relative data
AddrModeT2_i8s4 = 15, // i8 * 4
- AddrMode_i12 = 16
+ AddrMode_i12 = 16,
+ AddrMode5FP16 = 17 // i8 * 2
};
inline static const char *AddrModeToString(AddrMode addrmode) {
@@ -197,6 +198,7 @@ namespace ARMII {
case AddrMode3: return "AddrMode3";
case AddrMode4: return "AddrMode4";
case AddrMode5: return "AddrMode5";
+ case AddrMode5FP16: return "AddrMode5FP16";
case AddrMode6: return "AddrMode6";
case AddrModeT1_1: return "AddrModeT1_1";
case AddrModeT1_2: return "AddrModeT1_2";
OpenPOWER on IntegriCloud