summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/Sparc
diff options
context:
space:
mode:
authorFedor Sergeev <fedor.sergeev@oracle.com>2017-11-20 22:33:58 +0000
committerFedor Sergeev <fedor.sergeev@oracle.com>2017-11-20 22:33:58 +0000
commita476117e3aa6ef32306e39259d4da42aaca88e11 (patch)
tree11663c32c3f9b61b909d8d1d2a075288190e4af3 /llvm/lib/Target/Sparc
parent2bc260aba2b4bba4700ae30ab1d0edfc32925fee (diff)
downloadbcm5719-llvm-a476117e3aa6ef32306e39259d4da42aaca88e11.tar.gz
bcm5719-llvm-a476117e3aa6ef32306e39259d4da42aaca88e11.zip
[Sparc] efficient pattern for UINT_TO_FP conversion
Summary: while investigating performance degradation of imagick benchmark there were found inefficient pattern for UINT_TO_FP conversion. That pattern causes RAW hazard in assembly code. Specifically, uitofp IR operator results in poor assembler : st %i0, [%fp - 952] ldd [%fp - 952], %f0 it stores 32-bit integer register into memory location and then loads 64-bit floating point data from that location. That is exactly RAW hazard case. To optimize that case it is possible to use SPISD::ITOF and SPISD::XTOF for conversion from integer to floating point data type and to use ISD::BITCAST to copy from integer register into floating point register. The fix is to write custom UINT_TO_FP pattern using SPISD::ITOF, SPISD::XTOF, ISD::BITCAST. Patch by Alexey Lapshin Reviewers: fedor.sergeev, jyknight, dcederman, lero_chris Reviewed By: jyknight Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D36875 llvm-svn: 318704
Diffstat (limited to 'llvm/lib/Target/Sparc')
-rw-r--r--llvm/lib/Target/Sparc/SparcISelLowering.cpp92
-rw-r--r--llvm/lib/Target/Sparc/SparcISelLowering.h4
-rw-r--r--llvm/lib/Target/Sparc/SparcInstrVIS.td25
3 files changed, 92 insertions, 29 deletions
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index d011ec87bac..b355b9c5a76 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1559,9 +1559,6 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
- setOperationAction(ISD::BITCAST, MVT::f32, Expand);
- setOperationAction(ISD::BITCAST, MVT::i32, Expand);
-
// Sparc has no select or setcc: expand to SELECT_CC.
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Expand);
@@ -1590,13 +1587,14 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+ setOperationAction(ISD::BITCAST, MVT::f32, Custom);
+
if (Subtarget->is64Bit()) {
setOperationAction(ISD::ADDC, MVT::i64, Custom);
setOperationAction(ISD::ADDE, MVT::i64, Custom);
setOperationAction(ISD::SUBC, MVT::i64, Custom);
setOperationAction(ISD::SUBE, MVT::i64, Custom);
- setOperationAction(ISD::BITCAST, MVT::f64, Expand);
- setOperationAction(ISD::BITCAST, MVT::i64, Expand);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
setOperationAction(ISD::SETCC, MVT::i64, Expand);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
@@ -1610,6 +1608,9 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTL , MVT::i64, Expand);
setOperationAction(ISD::ROTR , MVT::i64, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ setOperationAction(ISD::BITCAST, MVT::f64, Custom);
}
// ATOMICs.
@@ -2425,23 +2426,76 @@ static SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG,
1);
}
-static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG,
- const SparcTargetLowering &TLI,
- bool hasHardQuad) {
+SDValue SparcTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT SrcVT = Op.getOperand(0).getValueType();
+
+ EVT DstVT = Op.getValueType();
+
+ if (Subtarget->isVIS3()) {
+ if (DstVT == MVT::f32 && SrcVT == MVT::i32) {
+ return Op; // Legal
+ } else if (DstVT == MVT::f64 && SrcVT == MVT::i64) {
+ return (Subtarget->is64Bit())
+ ? Op
+ : SDValue(); // Legal on 64 bit, otherwise Expand
+ } else if (DstVT == MVT::i64 && SrcVT == MVT::f64) {
+ return (Subtarget->is64Bit())
+ ? Op
+ : SDValue(); // Legal on 64 bit, otherwise Expand
+ }
+ }
+
+ // Expand
+ return SDValue();
+}
+
+SDValue SparcTargetLowering::LowerUINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT OpVT = Op.getOperand(0).getValueType();
assert(OpVT == MVT::i32 || OpVT == MVT::i64);
- // Expand if it does not involve f128 or the target has support for
- // quad floating point instructions and the operand type is legal.
- if (Op.getValueType() != MVT::f128 || (hasHardQuad && TLI.isTypeLegal(OpVT)))
- return SDValue();
+ // Expand f128 operations to fp128 ABI calls.
+ if (Op.getValueType() == MVT::f128 &&
+ (!Subtarget->hasHardQuad() || !isTypeLegal(OpVT))) {
+ return LowerF128Op(Op, DAG,
+ getLibcallName(OpVT == MVT::i32
+ ? RTLIB::UINTTOFP_I32_F128
+ : RTLIB::UINTTOFP_I64_F128),
+ 1);
+ }
+
+ // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
+ // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
+ // the optimization here.
+ if (DAG.SignBitIsZero(Op.getOperand(0))) {
+
+ EVT floatVT = MVT::f32;
+ unsigned IntToFloatOpcode = SPISD::ITOF;
+
+ if (OpVT == MVT::i64) {
+ floatVT = MVT::f64;
+ IntToFloatOpcode = SPISD::XTOF;
+ }
- return TLI.LowerF128Op(Op, DAG,
- TLI.getLibcallName(OpVT == MVT::i32
- ? RTLIB::UINTTOFP_I32_F128
- : RTLIB::UINTTOFP_I64_F128),
- 1);
+ // Convert the int value to FP in an FP register.
+ SDValue FloatTmp = DAG.getNode(ISD::BITCAST, dl, floatVT, Op.getOperand(0));
+
+ return DAG.getNode(IntToFloatOpcode, dl, Op.getValueType(), FloatTmp);
+ }
+
+ if (OpVT == MVT::i32 && Subtarget->is64Bit()) {
+
+ SDValue Int64Tmp =
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Op.getOperand(0));
+
+ SDValue Float64Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Int64Tmp);
+
+ return DAG.getNode(SPISD::XTOF, dl, Op.getValueType(), Float64Tmp);
+ }
+
+ return SDValue();
}
static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
@@ -3059,8 +3113,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
hasHardQuad);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG, *this,
hasHardQuad);
- case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG, *this,
- hasHardQuad);
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG, *this,
hasHardQuad);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, *this,
@@ -3097,6 +3150,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerATOMIC_LOAD_STORE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::BITCAST: return LowerBITCAST(Op, DAG);
}
}
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h
index bf700d6a99d..501e16dc2d9 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -192,6 +192,10 @@ namespace llvm {
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
bool ShouldShrinkFPConstant(EVT VT) const override {
// Do not shrink FP constpool if VT == MVT::f128.
// (ldd, call _Q_fdtoq) is more expensive than two ldds.
diff --git a/llvm/lib/Target/Sparc/SparcInstrVIS.td b/llvm/lib/Target/Sparc/SparcInstrVIS.td
index d9adf3e8b0f..dc3aa45bedc 100644
--- a/llvm/lib/Target/Sparc/SparcInstrVIS.td
+++ b/llvm/lib/Target/Sparc/SparcInstrVIS.td
@@ -243,16 +243,21 @@ def LZCNT : VISInstFormat<0b000010111, (outs I64Regs:$rd),
(ins I64Regs:$rs2), "lzcnt $rs2, $rd", []>;
let rs1 = 0 in {
-def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd),
- (ins DFPRegs:$rs2), "movstosw $rs2, $rd", []>;
-def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd),
- (ins DFPRegs:$rs2), "movstouw $rs2, $rd", []>;
-def MOVDTOX : VISInstFormat<0b100010000, (outs I64Regs:$rd),
- (ins DFPRegs:$rs2), "movdtox $rs2, $rd", []>;
-def MOVWTOS : VISInstFormat<0b100011001, (outs DFPRegs:$rd),
- (ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;
-def MOVXTOD : VISInstFormat<0b100011000, (outs DFPRegs:$rd),
- (ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;
+def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd), (ins FPRegs:$rs2),
+ "movstosw $rs2, $rd",
+ [(set I64Regs:$rd, (sext (i32 (bitconvert FPRegs:$rs2))))]>;
+def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd), (ins FPRegs:$rs2),
+ "movstouw $rs2, $rd",
+ [(set I64Regs:$rd, (zext (i32 (bitconvert FPRegs:$rs2))))]>;
+def MOVDTOX : VISInstFormat<0b100010000, (outs I64Regs:$rd), (ins DFPRegs:$rs2),
+ "movdtox $rs2, $rd",
+ [(set I64Regs:$rd, (bitconvert DFPRegs:$rs2))]>;
+def MOVWTOS : VISInstFormat<0b100011001, (outs FPRegs:$rd), (ins IntRegs:$rs2),
+ "movwtos $rs2, $rd",
+ [(set FPRegs:$rd, (bitconvert i32:$rs2))]>;
+def MOVXTOD : VISInstFormat<0b100011000, (outs DFPRegs:$rd), (ins I64Regs:$rs2),
+ "movxtod $rs2, $rd",
+ [(set DFPRegs:$rd, (bitconvert I64Regs:$rs2))]>;
}
def PDISTN : VISInst<0b000111111, "pdistn">;
OpenPOWER on IntegriCloud