diff options
author | Reid Kleckner <rnk@google.com> | 2019-11-19 14:33:47 -0800 |
---|---|---|
committer | Reid Kleckner <rnk@google.com> | 2020-01-14 17:19:35 -0800 |
commit | 40cd26c7008183e01d8276396339aea2a99d83d7 (patch) | |
tree | 7b863cc490d6fd1adaad11f1ee332d420348eb26 /llvm | |
parent | 65c8abb14e77b28d8357c52dddb8e0a6b12b4ba2 (diff) | |
download | bcm5719-llvm-40cd26c7008183e01d8276396339aea2a99d83d7.tar.gz bcm5719-llvm-40cd26c7008183e01d8276396339aea2a99d83d7.zip |
[Win64] Handle FP arguments more gracefully under -mno-sse
Pass small FP values in GPRs or stack memory according the the normal
convention. This is what gcc -mno-sse does on Win64.
I adjusted the conditions under which we emit an error to check if the
argument or return value would be passed in an XMM register when SSE is
disabled. This has a side effect of no longer emitting an error for FP
arguments marked 'inreg' when targetting x86 with SSE disabled. Our
calling convention logic was already assigning it to FP0/FP1, and then
we emitted this error. That seems unnecessary, we can ignore 'inreg' and
compile it without SSE.
Reviewers: jyknight, aemerson
Differential Revision: https://reviews.llvm.org/D70465
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/X86/X86CallingConv.td | 20 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 33 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/no-sse-win64.ll | 129 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/no-sse-x86.ll | 76 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/nosse-error2.ll | 36 |
5 files changed, 236 insertions, 58 deletions
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 30d05c63814..db1aef2fd09 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -346,6 +346,10 @@ def RetCC_X86_Win64_C : CallingConv<[ // The X86-Win64 calling convention always returns __m64 values in RAX. CCIfType<[x86mmx], CCBitConvertToType<i64>>, + // GCC returns FP values in RAX on Win64. + CCIfType<[f32], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i32>>>, + CCIfType<[f64], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i64>>>, + // Otherwise, everything is the same as 'normal' X86-64 C CC. CCDelegateTo<RetCC_X86_64_C> ]>; @@ -613,7 +617,6 @@ def CC_X86_Win64_C : CallingConv<[ // 128 bit vectors are passed by pointer CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>, - // 256 bit vectors are passed by pointer CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect<i64>>, @@ -626,6 +629,16 @@ def CC_X86_Win64_C : CallingConv<[ // The first 4 MMX vector arguments are passed in GPRs. CCIfType<[x86mmx], CCBitConvertToType<i64>>, + // If SSE was disabled, pass FP values smaller than 64-bits as integers in + // GPRs or on the stack. + CCIfType<[f32], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i32>>>, + CCIfType<[f64], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i64>>>, + + // The first 4 FP/Vector arguments are passed in XMM registers. + CCIfType<[f32, f64], + CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3], + [RCX , RDX , R8 , R9 ]>>, + // The first 4 integer arguments are passed in integer registers. CCIfType<[i8 ], CCAssignToRegWithShadow<[CL , DL , R8B , R9B ], [XMM0, XMM1, XMM2, XMM3]>>, @@ -643,11 +656,6 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ], [XMM0, XMM1, XMM2, XMM3]>>, - // The first 4 FP/Vector arguments are passed in XMM registers. - CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3], - [RCX , RDX , R8 , R9 ]>>, - // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. CCIfType<[i8, i16, i32, i64, f32, f64], CCAssignToStack<8, 8>> diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 664c6b9af76..d7593f52573 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2693,18 +2693,16 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."); - // If this is x86-64, and we disabled SSE, we can't return FP values, - // or SSE or MMX vectors. - if ((ValVT == MVT::f32 || ValVT == MVT::f64 || - VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && - (Subtarget.is64Bit() && !Subtarget.hasSSE1())) { + // Report an error if we have attempted to return a value via an XMM + // register and SSE was disabled. + if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. - } else if (ValVT == MVT::f64 && - (Subtarget.is64Bit() && !Subtarget.hasSSE2())) { - // Likewise we can't return F64 values with SSE1 only. gcc does so, but - // llvm-gcc has never done it right and no one has noticed, so this - // should be OK for now. + } else if (!Subtarget.hasSSE2() && + X86::FR64XRegClass.contains(VA.getLocReg()) && + ValVT == MVT::f64) { + // When returning a double via an XMM register, report an error if SSE2 is + // not enabled. errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. } @@ -2999,7 +2997,6 @@ SDValue X86TargetLowering::LowerCallResult( const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; - bool Is64Bit = Subtarget.is64Bit(); CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_X86); @@ -3018,16 +3015,17 @@ SDValue X86TargetLowering::LowerCallResult( RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); } - // If this is x86-64, and we disabled SSE, we can't return FP values - if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && - ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) { + // Report an error if there was an attempt to return FP values via XMM + // registers. + if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); if (VA.getLocReg() == X86::XMM1) VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts. else VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. - } else if (CopyVT == MVT::f64 && - (Is64Bit && !Subtarget.hasSSE2())) { + } else if (!Subtarget.hasSSE2() && + X86::FR64XRegClass.contains(VA.getLocReg()) && + CopyVT == MVT::f64) { errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); if (VA.getLocReg() == X86::XMM1) VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts. @@ -3074,6 +3072,9 @@ SDValue X86TargetLowering::LowerCallResult( Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); } + if (VA.getLocInfo() == CCValAssign::BCvt) + Val = DAG.getBitcast(VA.getValVT(), Val); + InVals.push_back(Val); } diff --git a/llvm/test/CodeGen/X86/no-sse-win64.ll b/llvm/test/CodeGen/X86/no-sse-win64.ll new file mode 100644 index 00000000000..c220b960612 --- /dev/null +++ b/llvm/test/CodeGen/X86/no-sse-win64.ll @@ -0,0 +1,129 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-windows-msvc < %s -mattr=-sse | FileCheck %s +; RUN: llc -mtriple=x86_64-windows-gnu < %s -mattr=-sse | FileCheck %s + +define void @recv_double(double %v, double* %p) { +; CHECK-LABEL: recv_double: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rcx, (%rdx) +; CHECK-NEXT: retq + store double %v, double* %p + ret void +} + +define void @recv_float(float %v, float* %p) { +; CHECK-LABEL: recv_float: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %ecx, (%rdx) +; CHECK-NEXT: retq + store float %v, float* %p + ret void +} + +define dso_local double @ret_double(double* %p) { +; CHECK-LABEL: ret_double: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq (%rcx), %rax +; CHECK-NEXT: retq +entry: + %v = load double, double* %p + ret double %v +} + +define dso_local float @ret_float(float* %p) { +; CHECK-LABEL: ret_float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl (%rcx), %eax +; CHECK-NEXT: retq +entry: + %v = load float, float* %p + ret float %v +} + +declare void @take_double(double) +declare void @take_float(float) + +define void @pass_double(double* %p) { +; CHECK-LABEL: pass_double: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .seh_stackalloc 40 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: movq (%rcx), %rcx +; CHECK-NEXT: callq take_double +; CHECK-NEXT: nop +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq +; CHECK-NEXT: .seh_handlerdata +; CHECK-NEXT: .text +; CHECK-NEXT: .seh_endproc + %v = load double, double* %p + call void @take_double(double %v) + ret void +} + +define void @pass_float(float* %p) { +; CHECK-LABEL: pass_float: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .seh_stackalloc 40 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: movl (%rcx), %ecx +; CHECK-NEXT: callq take_float +; CHECK-NEXT: nop +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq +; CHECK-NEXT: .seh_handlerdata +; CHECK-NEXT: .text +; CHECK-NEXT: .seh_endproc + %v = load float, float* %p + call void @take_float(float %v) + ret void +} + +declare double @produce_double() +declare float @produce_float() + +define void @call_double(double* %p) { +; CHECK-LABEL: call_double: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rsi +; CHECK-NEXT: .seh_pushreg %rsi +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: movq %rcx, %rsi +; CHECK-NEXT: callq produce_double +; CHECK-NEXT: movq %rax, (%rsi) +; CHECK-NEXT: addq $32, %rsp +; CHECK-NEXT: popq %rsi +; CHECK-NEXT: retq +; CHECK-NEXT: .seh_handlerdata +; CHECK-NEXT: .text +; CHECK-NEXT: .seh_endproc + %v = call double @produce_double() + store double %v, double* %p + ret void +} + +define void @call_float(float* %p) { +; CHECK-LABEL: call_float: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rsi +; CHECK-NEXT: .seh_pushreg %rsi +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: movq %rcx, %rsi +; CHECK-NEXT: callq produce_float +; CHECK-NEXT: movl %eax, (%rsi) +; CHECK-NEXT: addq $32, %rsp +; CHECK-NEXT: popq %rsi +; CHECK-NEXT: retq +; CHECK-NEXT: .seh_handlerdata +; CHECK-NEXT: .text +; CHECK-NEXT: .seh_endproc + %v = call float @produce_float() + store float %v, float* %p + ret void +} diff --git a/llvm/test/CodeGen/X86/no-sse-x86.ll b/llvm/test/CodeGen/X86/no-sse-x86.ll new file mode 100644 index 00000000000..45fea53af7f --- /dev/null +++ b/llvm/test/CodeGen/X86/no-sse-x86.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mcpu=i686 -mattr=+sse | FileCheck %s +; RUN: llc < %s -mcpu=i686 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" +target triple = "i386-unknown-linux-gnu" +@f = external global float +@d = external global double + +define void @test() nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss %xmm0, (%esp) +; CHECK-NEXT: calll foo1 +; CHECK-NEXT: fstps f +; CHECK-NEXT: fldl d +; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: calll foo2 +; CHECK-NEXT: fstpl d +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss %xmm0, (%esp) +; CHECK-NEXT: calll foo3 +; CHECK-NEXT: fstps f +; CHECK-NEXT: fldl d +; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: calll foo4 +; CHECK-NEXT: fstpl d +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: retl +; +; NOSSE-LABEL: test: +; NOSSE: # %bb.0: # %entry +; NOSSE-NEXT: subl $12, %esp +; NOSSE-NEXT: flds f +; NOSSE-NEXT: fstps (%esp) +; NOSSE-NEXT: calll foo1 +; NOSSE-NEXT: fstps f +; NOSSE-NEXT: fldl d +; NOSSE-NEXT: fstpl (%esp) +; NOSSE-NEXT: calll foo2 +; NOSSE-NEXT: fstpl d +; NOSSE-NEXT: flds f +; NOSSE-NEXT: fstps (%esp) +; NOSSE-NEXT: calll foo3 +; NOSSE-NEXT: fstps f +; NOSSE-NEXT: fldl d +; NOSSE-NEXT: fstpl (%esp) +; NOSSE-NEXT: calll foo4 +; NOSSE-NEXT: fstpl d +; NOSSE-NEXT: addl $12, %esp +; NOSSE-NEXT: retl +entry: + %0 = load float, float* @f, align 4 + %1 = tail call inreg float @foo1(float inreg %0) nounwind + store float %1, float* @f, align 4 + %2 = load double, double* @d, align 8 + %3 = tail call inreg double @foo2(double inreg %2) nounwind + store double %3, double* @d, align 8 + %4 = load float, float* @f, align 4 + %5 = tail call inreg float @foo3(float inreg %4) nounwind + store float %5, float* @f, align 4 + %6 = load double, double* @d, align 8 + %7 = tail call inreg double @foo4(double inreg %6) nounwind + store double %7, double* @d, align 8 + ret void +} + +declare inreg float @foo1(float inreg) + +declare inreg double @foo2(double inreg) + +declare inreg float @foo3(float inreg) + +declare inreg double @foo4(double inreg) diff --git a/llvm/test/CodeGen/X86/nosse-error2.ll b/llvm/test/CodeGen/X86/nosse-error2.ll deleted file mode 100644 index b88ddf85e0e..00000000000 --- a/llvm/test/CodeGen/X86/nosse-error2.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: not llc < %s -mcpu=i686 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s -; RUN: llc < %s -mcpu=i686 -mattr=+sse | FileCheck %s - -; NOSSE: {{SSE register return with SSE disabled}} - -; CHECK: xmm - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" -target triple = "i386-unknown-linux-gnu" -@f = external global float ; <float*> [#uses=4] -@d = external global double ; <double*> [#uses=4] - -define void @test() nounwind { -entry: - %0 = load float, float* @f, align 4 ; <float> [#uses=1] - %1 = tail call inreg float @foo1(float inreg %0) nounwind ; <float> [#uses=1] - store float %1, float* @f, align 4 - %2 = load double, double* @d, align 8 ; <double> [#uses=1] - %3 = tail call inreg double @foo2(double inreg %2) nounwind ; <double> [#uses=1] - store double %3, double* @d, align 8 - %4 = load float, float* @f, align 4 ; <float> [#uses=1] - %5 = tail call inreg float @foo3(float inreg %4) nounwind ; <float> [#uses=1] - store float %5, float* @f, align 4 - %6 = load double, double* @d, align 8 ; <double> [#uses=1] - %7 = tail call inreg double @foo4(double inreg %6) nounwind ; <double> [#uses=1] - store double %7, double* @d, align 8 - ret void -} - -declare inreg float @foo1(float inreg) - -declare inreg double @foo2(double inreg) - -declare inreg float @foo3(float inreg) - -declare inreg double @foo4(double inreg) |