diff options
-rw-r--r-- | llvm/lib/Target/X86/X86CallingConv.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 28 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86RegisterInfo.cpp | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/x86-interrupt_cc.ll | 19 |
4 files changed, 44 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 47e581fa1cd..b8c473dcbac 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -899,6 +899,8 @@ def CSR_64_AllRegs : CalleeSavedRegs<(add CSR_64_MostRegs, RAX, RSP, def CSR_64_AllRegs_AVX : CalleeSavedRegs<(sub (add CSR_64_MostRegs, RAX, RSP, (sequence "YMM%u", 0, 15)), (sequence "XMM%u", 0, 15))>; +def CSR_64_AllRegs_AVX512 : CalleeSavedRegs<(add CSR_64_AllRegs_AVX, + (sequence "YMM%u", 16, 31))>; // Standard C + YMM6-15 def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 7208f5a2f5a..4b507022868 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4645,23 +4645,35 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, assert((X86::VR128RegClass.hasSubClassEq(RC) || X86::VR128XRegClass.hasSubClassEq(RC))&& "Unknown 16-byte regclass"); // If stack is realigned we can use aligned stores. + if (X86::VR128RegClass.hasSubClassEq(RC)) { + if (isStackAligned) + return load ? (HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) + : (HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr); + else + return load ? (HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) + : (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); + } + assert(STI.hasAVX512() && "Using extended register requires AVX512"); if (isStackAligned) - return load ? - (HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) : - (HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr); + return load ? X86::VMOVAPSZ128rm : X86::VMOVAPSZ128mr; else - return load ? - (HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) : - (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); + return load ? X86::VMOVUPSZ128rm : X86::VMOVUPSZ128mr; } case 32: assert((X86::VR256RegClass.hasSubClassEq(RC) || X86::VR256XRegClass.hasSubClassEq(RC)) && "Unknown 32-byte regclass"); // If stack is realigned we can use aligned stores. + if (X86::VR256RegClass.hasSubClassEq(RC)) { + if (isStackAligned) + return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr; + else + return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr; + } + assert(STI.hasAVX512() && "Using extended register requires AVX512"); if (isStackAligned) - return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr; + return load ? X86::VMOVAPSZ256rm : X86::VMOVAPSZ256mr; else - return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr; + return load ? X86::VMOVUPSZ256rm : X86::VMOVUPSZ256mr; case 64: assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass"); if (isStackAligned) diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index cb4170cc623..7b1059ee32d 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -294,10 +294,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_64_SaveList; case CallingConv::X86_INTR: if (Is64Bit) { + if (HasAVX512) + return CSR_64_AllRegs_AVX512_SaveList; if (HasAVX) return CSR_64_AllRegs_AVX_SaveList; - else - return CSR_64_AllRegs_SaveList; + return CSR_64_AllRegs_SaveList; } else { if (HasSSE) return CSR_32_AllRegs_SSE_SaveList; diff --git a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll new file mode 100644 index 00000000000..f5d2b39fc63 --- /dev/null +++ b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll @@ -0,0 +1,19 @@ +; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mattr=+avx512f < %s | FileCheck %s + + +; Make sure we spill the high numbered YMM registers with the right encoding. +; CHECK-LABEL: foo +; CHECK: movups %ymm31, {{.+}} +; CHECK: encoding: [0x62,0x61,0x7c,0x28,0x11,0xbc,0x24,0xf0,0x03,0x00,0x00] +; ymm30 is used as an anchor for the previous regexp. +; CHECK-NEXT: movups %ymm30 +; CHECK: call +; CHECK: iret + +define x86_intrcc void @foo(i8* %frame) { + call void @bar() + ret void +} + +declare void @bar() + |