summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp49
1 files changed, 44 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5de8ea9c934..dc3292eed8c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -485,6 +485,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
+ if (!Subtarget.is64Bit())
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
+
if (Subtarget.hasCmpxchg16b()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
}
@@ -25494,11 +25497,22 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
}
// Note: this turns large loads into lock cmpxchg8b/16b.
-// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
+// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
+// TODO: In 32-bit mode, use FILD/FISTP when X87 is available?
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
- return needsCmpXchgNb(LI->getType()) ? AtomicExpansionKind::CmpXChg
- : AtomicExpansionKind::None;
+ Type *MemType = LI->getType();
+
+ // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
+ // can use movq to do the load.
+ bool NoImplicitFloatOps =
+ LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
+ if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
+ !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2())
+ return AtomicExpansionKind::None;
+
+ return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
+ : AtomicExpansionKind::None;
}
TargetLowering::AtomicExpansionKind
@@ -27312,6 +27326,32 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(EFLAGS.getValue(1));
return;
}
+ case ISD::ATOMIC_LOAD: {
+ assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+ Subtarget.hasSSE2()) {
+ auto *Node = cast<AtomicSDNode>(N);
+ // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the lower
+ // 64-bits.
+ SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
+ SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+ MVT::i64, Node->getMemOperand());
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(Res);
+ Results.push_back(Ld.getValue(1));
+ return;
+ }
+ // TODO: Use MOVLPS when SSE1 is available?
+ // TODO: Use FILD/FISTP when X87 is available?
+ // Delegate to generic TypeLegalization. Situations we can really handle
+ // should have already been dealt with by AtomicExpandPass.cpp.
+ break;
+ }
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
@@ -27323,11 +27363,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
- case ISD::ATOMIC_LOAD: {
// Delegate to generic TypeLegalization. Situations we can really handle
// should have already been dealt with by AtomicExpandPass.cpp.
break;
- }
+
case ISD::BITCAST: {
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
EVT DstVT = N->getValueType(0);
OpenPOWER on IntegriCloud