summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86CallingConv.td12
-rw-r--r--llvm/lib/Target/X86/X86FloatingPoint.cpp45
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll38
4 files changed, 87 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 4737728e4fa..eab11196c82 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -24,6 +24,8 @@ class RC_X86_RegCall {
list<Register> GPR_16 = [];
list<Register> GPR_32 = [];
list<Register> GPR_64 = [];
+ list<Register> FP_CALL = [FP0];
+ list<Register> FP_RET = [FP0, FP1];
list<Register> XMM = [];
list<Register> YMM = [];
list<Register> ZMM = [];
@@ -90,14 +92,14 @@ def CC_#NAME : CallingConv<[
// TODO: Handle the case of mask types (v*i1)
CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>,
- // TODO: Handle the case of long double (f80)
- CCIfType<[f80], CCCustom<"CC_X86_RegCall_Error">>,
-
// float, double, float128 --> XMM
// In the case of SSE disabled --> save to stack
CCIfType<[f32, f64, f128],
CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
+ // long double --> FP
+ CCIfType<[f80], CCAssignToReg<RC.FP_CALL>>,
+
// __m128, __m128i, __m128d --> XMM
// In the case of SSE disabled --> save to stack
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
@@ -129,7 +131,7 @@ def CC_#NAME : CallingConv<[
// float 128 get stack slots whose size and alignment depends
// on the subtarget.
- CCIfType<[f128], CCAssignToStack<0, 0>>,
+ CCIfType<[f80, f128], CCAssignToStack<0, 0>>,
// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
@@ -166,7 +168,7 @@ def RetCC_#NAME : CallingConv<[
CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>,
// long double --> FP
- CCIfType<[f80], CCAssignToReg<[FP0]>>,
+ CCIfType<[f80], CCAssignToReg<RC.FP_RET>>,
// float, double, float128 --> XMM
CCIfType<[f32, f64, f128],
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 3daa56b4db2..a5489b9aa8b 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -206,6 +206,13 @@ namespace {
RegMap[Reg] = StackTop++;
}
+ // popReg - Pop a register from the stack.
+ void popReg() {
+ if (StackTop == 0)
+ report_fatal_error("Cannot pop empty stack!");
+ RegMap[Stack[--StackTop]] = ~0; // Update state
+ }
+
bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
@@ -329,6 +336,25 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
df_iterator_default_set<MachineBasicBlock*> Processed;
MachineBasicBlock *Entry = &MF.front();
+ LiveBundle &Bundle =
+ LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
+
+ // In regcall convention, some FP registers may not be passed through
+ // the stack, so they will need to be assigned to the stack first
+ if ((Entry->getParent()->getFunction()->getCallingConv() ==
+ CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
+ // In the register calling convention, up to one FP argument could be
+ // saved in the first FP register.
+ // If bundle.mask is non-zero and Bundle.FixCount is zero, it means
+ // that the FP registers contain arguments.
+ // The actual value is passed in FP0.
+ // Here we fix the stack and mark FP0 as pre-assigned register.
+ assert((Bundle.Mask & 0xFE) == 0 &&
+ "Only FP0 could be passed as an argument");
+ Bundle.FixCount = 1;
+ Bundle.FixStack[0] = 0;
+ }
+
bool Changed = false;
for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
Changed |= processBasicBlock(MF, *BB);
@@ -791,9 +817,8 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
MachineInstr &MI = *I;
const DebugLoc &dl = MI.getDebugLoc();
ASSERT_SORTED(PopTable);
- if (StackTop == 0)
- report_fatal_error("Cannot pop empty stack!");
- RegMap[Stack[--StackTop]] = ~0; // Update state
+
+ popReg();
// Check to see if there is a popping version of this instruction...
int Opcode = Lookup(PopTable, I->getOpcode());
@@ -929,6 +954,7 @@ void FPS::shuffleStackTop(const unsigned char *FixStack,
void FPS::handleCall(MachineBasicBlock::iterator &I) {
unsigned STReturns = 0;
+ const MachineFunction* MF = I->getParent()->getParent();
for (const auto &MO : I->operands()) {
if (!MO.isReg())
@@ -937,7 +963,10 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
unsigned R = MO.getReg() - X86::FP0;
if (R < 8) {
- assert(MO.isDef() && MO.isImplicit());
+ if (MF->getFunction()->getCallingConv() != CallingConv::X86_RegCall) {
+ assert(MO.isDef() && MO.isImplicit());
+ }
+
STReturns |= 1 << R;
}
}
@@ -945,9 +974,15 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
unsigned N = countTrailingOnes(STReturns);
// FP registers used for function return must be consecutive starting at
- // FP0.
+ // FP0
assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
+ // Reset the FP Stack - It is required because of possible leftovers from
+ // passed arguments. The caller should assume that the FP stack is
+ // returned empty (unless the callee returns values on FP stack).
+ while (StackTop > 0)
+ popReg();
+
for (unsigned I = 0; I < N; ++I)
pushReg(N - I - 1);
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cd6d9a402e0..377ec636a66 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2816,6 +2816,8 @@ SDValue X86TargetLowering::LowerFormalArguments(
RC = &X86::FR32RegClass;
else if (RegVT == MVT::f64)
RC = &X86::FR64RegClass;
+ else if (RegVT == MVT::f80)
+ RC = &X86::RFP80RegClass;
else if (RegVT == MVT::f128)
RC = &X86::FR128RegClass;
else if (RegVT.is512BitVector())
diff --git a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
index 9d6a88d5b78..ce8fca036c9 100644
--- a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
+++ b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
@@ -246,6 +246,44 @@ define x86_regcallcc double @test_CallargRetDouble(double %a) {
ret double %d
}
+; X32: test_argRetf80
+; X32-NOT: fldt
+; X32: fadd %st(0), %st(0)
+; X32: retl
+
+; WIN64: test_argRetf80
+; WIN64-NOT: fldt
+; WIN64: fadd %st(0), %st(0)
+; WIN64: retq
+
+; Test regcall when receiving/returning long double
+define x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %a0) nounwind {
+ %r0 = fadd x86_fp80 %a0, %a0
+ ret x86_fp80 %r0
+}
+
+; X32: test_CallargRetf80
+; X32-NOT: fldt
+; X32: fadd %st({{[0-7]}}), %st({{[0-7]}})
+; X32: call{{.*}} {{.*}}test_argRetf80
+; X32: fadd{{.*}} %st({{[0-7]}})
+; X32: retl
+
+; WIN64: test_CallargRetf80
+; WIN64-NOT: fldt
+; WIN64: fadd %st({{[0-7]}}), %st({{[0-7]}})
+; WIN64: call{{.*}} {{.*}}test_argRetf80
+; WIN64: fadd{{.*}} %st({{[0-7]}})
+; WIN64: retq
+
+; Test regcall when passing/retrieving long double
+define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a) {
+ %b = fadd x86_fp80 %a, %a
+ %c = call x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %b)
+ %d = fadd x86_fp80 %c, %c
+ ret x86_fp80 %d
+}
+
; X32-LABEL: test_argRetPointer:
; X32: incl %eax
; X32: ret{{.*}}
OpenPOWER on IntegriCloud