summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorReid Kleckner <reid@kleckner.net>2014-10-28 01:29:26 +0000
committerReid Kleckner <reid@kleckner.net>2014-10-28 01:29:26 +0000
commit9ccce99e1d02975afe3c4d4cf2c154c6f154f3ff (patch)
treee817bf79e4eec6eaaead78448151a5717f143ee4 /llvm/lib
parent00917897b246cef0d5fa5fda28a03679788e224b (diff)
downloadbcm5719-llvm-9ccce99e1d02975afe3c4d4cf2c154c6f154f3ff.tar.gz
bcm5719-llvm-9ccce99e1d02975afe3c4d4cf2c154c6f154f3ff.zip
X86: Implement the vectorcall calling convention
This is a Microsoft calling convention that supports both x86 and x86_64 subtargets. It passes vector and floating point arguments in XMM0-XMM5, and passes them indirectly once they are consumed. Homogenous vector aggregates of up to four elements can be passed in sequential vector registers, but this part is not implemented in LLVM and will be handled in Clang. On 32-bit x86, it is similar to fastcall in that it uses ecx:edx as integer register parameters and is callee cleanup. On x86_64, it delegates to the normal win64 calling convention. Reviewers: majnemer Differential Revision: http://reviews.llvm.org/D5943 llvm-svn: 220745
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/AsmParser/LLLexer.cpp1
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp2
-rw-r--r--llvm/lib/AsmParser/LLToken.h2
-rw-r--r--llvm/lib/IR/AsmWriter.cpp1
-rw-r--r--llvm/lib/IR/Mangler.cpp81
-rw-r--r--llvm/lib/Target/X86/X86CallingConv.h13
-rw-r--r--llvm/lib/Target/X86/X86CallingConv.td64
7 files changed, 129 insertions, 35 deletions
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index e205a7abe4e..6523bcee060 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -580,6 +580,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(x86_stdcallcc);
KEYWORD(x86_fastcallcc);
KEYWORD(x86_thiscallcc);
+ KEYWORD(x86_vectorcallcc);
KEYWORD(arm_apcscc);
KEYWORD(arm_aapcscc);
KEYWORD(arm_aapcs_vfpcc);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index cfdb1d4742f..b7818bbf527 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1448,6 +1448,7 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'x86_stdcallcc'
/// ::= 'x86_fastcallcc'
/// ::= 'x86_thiscallcc'
+/// ::= 'x86_vectorcallcc'
/// ::= 'arm_apcscc'
/// ::= 'arm_aapcscc'
/// ::= 'arm_aapcs_vfpcc'
@@ -1473,6 +1474,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break;
case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
+ case lltok::kw_x86_vectorcallcc:CC = CallingConv::X86_VectorCall; break;
case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break;
case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
diff --git a/llvm/lib/AsmParser/LLToken.h b/llvm/lib/AsmParser/LLToken.h
index 2b39ebd2b57..f9821f76efe 100644
--- a/llvm/lib/AsmParser/LLToken.h
+++ b/llvm/lib/AsmParser/LLToken.h
@@ -87,7 +87,7 @@ namespace lltok {
kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
kw_intel_ocl_bicc,
- kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
+ kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, kw_x86_vectorcallcc,
kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
kw_msp430_intrcc,
kw_ptx_kernel, kw_ptx_device,
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 31d960e96cb..449225ae8c3 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -285,6 +285,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
+ case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break;
case CallingConv::ARM_APCS: Out << "arm_apcscc"; break;
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
diff --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp
index bfed3e39f4e..c7eb666ee0f 100644
--- a/llvm/lib/IR/Mangler.cpp
+++ b/llvm/lib/IR/Mangler.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
static void getNameWithPrefixx(raw_ostream &OS, const Twine &GVName,
Mangler::ManglerPrefixTy PrefixTy,
- const DataLayout &DL, bool UseAt) {
+ const DataLayout &DL, char Prefix) {
SmallString<256> TmpData;
StringRef Name = GVName.toStringRef(TmpData);
assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
@@ -39,13 +39,8 @@ static void getNameWithPrefixx(raw_ostream &OS, const Twine &GVName,
else if (PrefixTy == Mangler::LinkerPrivate)
OS << DL.getLinkerPrivateGlobalPrefix();
- if (UseAt) {
- OS << '@';
- } else {
- char Prefix = DL.getGlobalPrefix();
- if (Prefix != '\0')
- OS << Prefix;
- }
+ if (Prefix != '\0')
+ OS << Prefix;
// If this is a simple string that doesn't need escaping, just append it.
OS << Name;
@@ -53,7 +48,8 @@ static void getNameWithPrefixx(raw_ostream &OS, const Twine &GVName,
void Mangler::getNameWithPrefix(raw_ostream &OS, const Twine &GVName,
ManglerPrefixTy PrefixTy) const {
- return getNameWithPrefixx(OS, GVName, PrefixTy, *DL, false);
+ char Prefix = DL->getGlobalPrefix();
+ return getNameWithPrefixx(OS, GVName, PrefixTy, *DL, Prefix);
}
void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
@@ -63,11 +59,21 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
return getNameWithPrefix(OS, GVName, PrefixTy);
}
-/// AddFastCallStdCallSuffix - Microsoft fastcall and stdcall functions require
-/// a suffix on their name indicating the number of words of arguments they
-/// take.
-static void AddFastCallStdCallSuffix(raw_ostream &OS, const Function *F,
- const DataLayout &TD) {
+static bool hasByteCountSuffix(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::X86_FastCall:
+ case CallingConv::X86_StdCall:
+ case CallingConv::X86_VectorCall:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Microsoft fastcall and stdcall functions require a suffix on their name
+/// indicating the number of words of arguments they take.
+static void addByteCountSuffix(raw_ostream &OS, const Function *F,
+ const DataLayout &TD) {
// Calculate arguments size total.
unsigned ArgWords = 0;
for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
@@ -76,8 +82,9 @@ static void AddFastCallStdCallSuffix(raw_ostream &OS, const Function *F,
// 'Dereference' type in case of byval or inalloca parameter attribute.
if (AI->hasByValOrInAllocaAttr())
Ty = cast<PointerType>(Ty)->getElementType();
- // Size should be aligned to DWORD boundary
- ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+ // Size should be aligned to pointer size.
+ unsigned PtrSize = TD.getPointerSize();
+ ArgWords += RoundUpToAlignment(TD.getTypeAllocSize(Ty), PtrSize);
}
OS << '@' << ArgWords;
@@ -106,34 +113,40 @@ void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV,
}
StringRef Name = GV->getName();
-
- bool UseAt = false;
- const Function *MSFunc = nullptr;
- CallingConv::ID CC;
- if (Name[0] != '\1' && DL->hasMicrosoftFastStdCallMangling()) {
- if ((MSFunc = dyn_cast<Function>(GV))) {
- CC = MSFunc->getCallingConv();
- // fastcall functions need to start with @ instead of _.
- if (CC == CallingConv::X86_FastCall)
- UseAt = true;
- }
+ char Prefix = DL->getGlobalPrefix();
+
+ // Mangle functions with Microsoft calling conventions specially. Only do
+ // this mangling for x86_64 vectorcall and 32-bit x86.
+ const Function *MSFunc = dyn_cast<Function>(GV);
+ if (Name.startswith("\01"))
+ MSFunc = nullptr; // Don't mangle when \01 is present.
+ CallingConv::ID CC = MSFunc ? MSFunc->getCallingConv() : CallingConv::C;
+ if (!DL->hasMicrosoftFastStdCallMangling() &&
+ CC != CallingConv::X86_VectorCall)
+ MSFunc = nullptr;
+ if (MSFunc) {
+ if (CC == CallingConv::X86_FastCall)
+ Prefix = '@'; // fastcall functions have an @ prefix instead of _.
+ else if (CC == CallingConv::X86_VectorCall)
+ Prefix = '\0'; // vectorcall functions have no prefix.
}
- getNameWithPrefixx(OS, Name, PrefixTy, *DL, UseAt);
+ getNameWithPrefixx(OS, Name, PrefixTy, *DL, Prefix);
if (!MSFunc)
return;
- // If we are supposed to add a microsoft-style suffix for stdcall/fastcall,
- // add it.
- // fastcall and stdcall functions usually need @42 at the end to specify
- // the argument info.
+ // If we are supposed to add a microsoft-style suffix for stdcall, fastcall,
+ // or vectorcall, add it. These functions have a suffix of @N where N is the
+ // cumulative byte size of all of the parameters to the function in decimal.
+ if (CC == CallingConv::X86_VectorCall)
+ OS << '@'; // vectorcall functions use a double @ suffix.
FunctionType *FT = MSFunc->getFunctionType();
- if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) &&
+ if (hasByteCountSuffix(CC) &&
// "Pure" variadic functions do not receive @0 suffix.
(!FT->isVarArg() || FT->getNumParams() == 0 ||
(FT->getNumParams() == 1 && MSFunc->hasStructRetAttr())))
- AddFastCallStdCallSuffix(OS, MSFunc, *DL);
+ addByteCountSuffix(OS, MSFunc, *DL);
}
void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
diff --git a/llvm/lib/Target/X86/X86CallingConv.h b/llvm/lib/Target/X86/X86CallingConv.h
index 15a455ae29b..0eb2494f1d6 100644
--- a/llvm/lib/Target/X86/X86CallingConv.h
+++ b/llvm/lib/Target/X86/X86CallingConv.h
@@ -20,6 +20,19 @@
namespace llvm {
+inline bool CC_X86_32_VectorCallIndirect(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ // Similar to CCPassIndirect, with the addition of inreg.
+ LocVT = MVT::i32;
+ LocInfo = CCValAssign::Indirect;
+ ArgFlags.setInReg();
+ return false; // Continue the search, but now for i32.
+}
+
+
inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
CCState &) {
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index dec73eac606..75a2ec00468 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -124,6 +124,24 @@ def RetCC_X86_32_HiPE : CallingConv<[
CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
]>;
+// X86-32 HiPE return-value convention.
+def RetCC_X86_32_VectorCall : CallingConv<[
+ // Vector types are returned in XMM0,XMM1,XMMM2 and XMM3.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+ // 256-bit FP vectors
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
+
+ // 512-bit FP vectors
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
+
+ // Return integers in the standard way.
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
// X86-64 C return-value convention.
def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
@@ -179,6 +197,7 @@ def RetCC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
// If HiPE, use RetCC_X86_32_HiPE.
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
+ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
// Otherwise, use RetCC_X86_32_C.
CCDelegateTo<RetCC_X86_32_C>
@@ -330,6 +349,25 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[f80], CCAssignToStack<0, 0>>
]>;
+def CC_X86_Win64_VectorCall : CallingConv<[
+ // The first 6 floating point and vector types of 128 bits or less use
+ // XMM0-XMM5.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
+
+ // 256-bit vectors use YMM registers.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
+
+ // 512-bit vectors use ZMM registers.
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
+
+ // Delegate to fastcall to handle integer types.
+ CCDelegateTo<CC_X86_Win64_C>
+]>;
+
+
def CC_X86_64_GHC : CallingConv<[
// Promote i8/i16/i32 arguments to i64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
@@ -463,6 +501,30 @@ def CC_X86_32_FastCall : CallingConv<[
CCDelegateTo<CC_X86_32_Common>
]>;
+def CC_X86_32_VectorCall : CallingConv<[
+ // The first 6 floating point and vector types of 128 bits or less use
+ // XMM0-XMM5.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
+
+ // 256-bit vectors use YMM registers.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
+
+ // 512-bit vectors use ZMM registers.
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
+
+ // Otherwise, pass it indirectly.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64,
+ v32i8, v16i16, v8i32, v4i64, v8f32, v4f64,
+ v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCCustom<"CC_X86_32_VectorCallIndirect">>,
+
+ // Delegate to fastcall to handle integer types.
+ CCDelegateTo<CC_X86_32_FastCall>
+]>;
+
def CC_X86_32_ThisCall_Common : CallingConv<[
// The first integer argument is passed in ECX
CCIfType<[i32], CCAssignToReg<[ECX]>>,
@@ -576,6 +638,7 @@ def CC_Intel_OCL_BI : CallingConv<[
// This is the root argument convention for the X86-32 backend.
def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
+ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_32_VectorCall>>,
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
@@ -593,6 +656,7 @@ def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_X86_64_AnyReg>>,
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<CC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
+ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
OpenPOWER on IntegriCloud