diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 70 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 12 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFPStack.td | 13 |
3 files changed, 75 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2258e0ce24e..216fd5b8d14 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25584,17 +25584,18 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // Note: this turns large loads into lock cmpxchg8b/16b. // TODO: In 32-bit mode, use MOVLPS when SSE1 is available? -// TODO: In 32-bit mode, use FILD/FISTP when X87 is available? TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { Type *MemType = LI->getType(); // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we - // can use movq to do the load. + // can use movq to do the load. If we have X87 we can load into an 80-bit + // X87 register and store it to a stack temporary. bool NoImplicitFloatOps = LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && - !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2()) + !Subtarget.useSoftFloat() && !NoImplicitFloatOps && + (Subtarget.hasSSE2() || Subtarget.hasX87())) return AtomicExpansionKind::None; return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg @@ -27440,23 +27441,57 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, bool NoImplicitFloatOps = DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat); - if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps && - Subtarget.hasSSE2()) { + if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) { auto *Node = cast<AtomicSDNode>(N); - // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the lower - // 64-bits. - SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); - SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; - SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, - MVT::i64, Node->getMemOperand()); - SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - Results.push_back(Ld.getValue(1)); - return; + if (Subtarget.hasSSE2()) { + // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the + // lower 64-bits. + SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; + SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, + MVT::i64, Node->getMemOperand()); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld, + DAG.getIntPtrConstant(0, dl)); + Results.push_back(Res); + Results.push_back(Ld.getValue(1)); + return; + } + if (Subtarget.hasX87()) { + // First load this into an 80-bit X87 register. This will put the whole + // integer into the significand. + // FIXME: Do we need to glue? See FIXME comment in BuildFILD. + SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue); + SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; + SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG, + dl, Tys, Ops, MVT::i64, + Node->getMemOperand()); + SDValue Chain = Result.getValue(1); + SDValue InFlag = Result.getValue(2); + + // Now store the X87 register to a stack temporary and convert to i64. + // This store is not atomic and doesn't need to be. + // FIXME: We don't need a stack temporary if the result of the load + // is already being stored. We could just directly store there. + SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + MachinePointerInfo MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); + SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag }; + Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl, + DAG.getVTList(MVT::Other), StoreOps, + MVT::i64, MPI, 0 /*Align*/, + MachineMemOperand::MOStore); + + // Finally load the value back from the stack temporary and return it. + // This load is not atomic and doesn't need to be. + // This load will be further type legalized. + Result = DAG.getLoad(MVT::i64, dl, Chain, StackPtr, MPI); + Results.push_back(Result); + Results.push_back(Result.getValue(1)); + return; + } } // TODO: Use MOVLPS when SSE1 is available? - // TODO: Use FILD/FISTP when X87 is available? // Delegate to generic TypeLegalization. Situations we can really handle // should have already been dealt with by AtomicExpandPass.cpp. break; @@ -27649,6 +27684,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FXOR: return "X86ISD::FXOR"; case X86ISD::FILD: return "X86ISD::FILD"; case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; + case X86ISD::FIST: return "X86ISD::FIST"; case X86ISD::FP_TO_INT_IN_MEM: return "X86ISD::FP_TO_INT_IN_MEM"; case X86ISD::FLD: return "X86ISD::FLD"; case X86ISD::FST: return "X86ISD::FST"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index b46fb8ef6fc..193d04094e2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -608,16 +608,22 @@ namespace llvm { FILD, FILD_FLAG, + /// This instruction implements a fp->int store from FP stack + /// slots. This corresponds to the fist instruction. It takes a + /// chain operand, value to store, address, and glue. The memory VT + /// specifies the type to store as. + FIST, + /// This instruction implements an extending load to FP stack slots. /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain /// operand, and ptr to load from. The memory VT specifies the type to /// load from. FLD, - /// This instruction implements a truncating store to FP stack + /// This instruction implements a truncating store from FP stack /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a - /// chain operand, value to store, and address. The memory VT specifies - /// the type to store as. + /// chain operand, value to store, address, and glue. The memory VT + /// specifies the type to store as. FST, /// This instruction grabs the address of the next argument diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td index 8a756bfc3b1..2ec6d50f970 100644 --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -21,6 +21,7 @@ def SDTX86Fld : SDTypeProfile<1, 1, [SDTCisFP<0>, def SDTX86Fst : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>; +def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; @@ -35,6 +36,9 @@ def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild, def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild, [SDNPHasChain, SDNPOutGlue, SDNPMayLoad, SDNPMemOperand]>; +def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist, + [SDNPHasChain, SDNPInGlue, SDNPMayStore, + SDNPMemOperand]>; def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>; def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; @@ -79,6 +83,11 @@ def X86fildflag64 : PatFrag<(ops node:$ptr), (X86fildflag node:$ptr), [{ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; }]>; +def X86fist64 : PatFrag<(ops node:$val, node:$ptr), + (X86fist node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; +}]>; + def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr), (X86fp_to_mem node:$val, node:$ptr), [{ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; @@ -760,6 +769,10 @@ def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>; // Used to conv. i64 to f64 since there isn't a SSE version. def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m64 addr:$src)>; +// Used to conv. between f80 and i64 for i64 atomic loads. +def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m80 addr:$src)>; +def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>; + // FP extensions map onto simple pseudo-value conversions if they are to/from // the FP stack. def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, |