summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGCall.cpp8
-rw-r--r--clang/lib/CodeGen/MicrosoftCXXABI.cpp11
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp152
3 files changed, 143 insertions, 28 deletions
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index b31cdf3a909..564a754a81c 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -50,7 +50,7 @@ static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) {
// TODO: Add support for __pascal to LLVM.
case CC_X86Pascal: return llvm::CallingConv::C;
// TODO: Add support for __vectorcall to LLVM.
- case CC_X86VectorCall: return llvm::CallingConv::C;
+ case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall;
}
}
@@ -603,6 +603,9 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) {
CharUnits UnionSize = CharUnits::Zero();
for (const auto *FD : RD->fields()) {
+ // Skip zero length bitfields.
+ if (FD->isBitField() && FD->getBitWidthValue(Context) == 0)
+ continue;
assert(!FD->isBitField() &&
"Cannot expand structure with bit-field members.");
CharUnits FieldSize = Context.getTypeSizeInChars(FD->getType());
@@ -622,6 +625,9 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) {
}
for (const auto *FD : RD->fields()) {
+ // Skip zero length bitfields.
+ if (FD->isBitField() && FD->getBitWidthValue(Context) == 0)
+ continue;
assert(!FD->isBitField() &&
"Cannot expand structure with bit-field members.");
Fields.push_back(FD);
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index 49907c965d3..428c7b11a47 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -617,8 +617,15 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const {
if (RD->hasNonTrivialCopyConstructor())
return RAA_Indirect;
- // Win64 passes objects larger than 8 bytes indirectly.
- if (getContext().getTypeSize(RD->getTypeForDecl()) > 64)
+ // If an object has a destructor, we'd really like to pass it indirectly
+ // because it allows us to elide copies. Unfortunately, MSVC makes that
+ // impossible for small types, which it will pass in a single register or
+ // stack slot. Most objects with dtors are large-ish, so handle that early.
+ // We can't call out all large objects as being indirect because there are
+ // multiple x64 calling conventions and the C++ ABI code shouldn't dictate
+ // how we pass large POD types.
+ if (RD->hasNonTrivialDestructor() &&
+ getContext().getTypeSize(RD->getTypeForDecl()) > 64)
return RAA_Indirect;
// We have a trivial copy constructor or no copy constructors, but we have
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index ed9e83fda3b..c776db61049 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -508,18 +508,39 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
return Ty;
}
+/// Returns true if this type can be passed in SSE registers with the
+/// X86_VectorCall calling convention. Shared between x86_32 and x86_64.
+static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) {
+ if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+ if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half)
+ return true;
+ } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
+ // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX
+ // registers specially.
+ unsigned VecSize = Context.getTypeSize(VT);
+ if (VecSize == 128 || VecSize == 256 || VecSize == 512)
+ return true;
+ }
+ return false;
+}
+
+/// Returns true if this aggregate is small enough to be passed in SSE registers
+/// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64.
+static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) {
+ return NumMembers <= 4;
+}
+
//===----------------------------------------------------------------------===//
// X86-32 ABI Implementation
//===----------------------------------------------------------------------===//
/// \brief Similar to llvm::CCState, but for Clang.
struct CCState {
- CCState(unsigned CC) : CC(CC), FreeRegs(0) {}
+ CCState(unsigned CC) : CC(CC), FreeRegs(0), FreeSSERegs(0) {}
unsigned CC;
unsigned FreeRegs;
- unsigned StackOffset;
- bool UseInAlloca;
+ unsigned FreeSSERegs;
};
/// X86_32ABIInfo - The X86-32 ABI information.
@@ -540,6 +561,17 @@ class X86_32ABIInfo : public ABIInfo {
return (Size == 8 || Size == 16 || Size == 32 || Size == 64);
}
+ bool isHomogeneousAggregateBaseType(QualType Ty) const override {
+ // FIXME: Assumes vectorcall is in use.
+ return isX86VectorTypeForVectorCall(getContext(), Ty);
+ }
+
+ bool isHomogeneousAggregateSmallEnough(const Type *Ty,
+ uint64_t NumMembers) const override {
+ // FIXME: Assumes vectorcall is in use.
+ return isX86VectorCallAggregateSmallEnough(NumMembers);
+ }
+
bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const;
/// getIndirectResult - Give a source type \arg Ty, return a suitable result
@@ -767,6 +799,14 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, CCState &State) con
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ if (State.CC == llvm::CallingConv::X86_VectorCall &&
+ isHomogeneousAggregate(RetTy, Base, NumElts)) {
+ // The LLVM struct type for such an aggregate should lower properly.
+ return ABIArgInfo::getDirect();
+ }
+
if (const VectorType *VT = RetTy->getAs<VectorType>()) {
// On Darwin, some vectors are returned in registers.
if (IsDarwinVectorABI) {
@@ -939,7 +979,8 @@ bool X86_32ABIInfo::shouldUseInReg(QualType Ty, CCState &State,
State.FreeRegs -= SizeInRegs;
- if (State.CC == llvm::CallingConv::X86_FastCall) {
+ if (State.CC == llvm::CallingConv::X86_FastCall ||
+ State.CC == llvm::CallingConv::X86_VectorCall) {
if (Size > 32)
return false;
@@ -964,17 +1005,36 @@ bool X86_32ABIInfo::shouldUseInReg(QualType Ty, CCState &State,
ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
CCState &State) const {
// FIXME: Set alignment on indirect arguments.
- if (isAggregateTypeForABI(Ty)) {
- if (const RecordType *RT = Ty->getAs<RecordType>()) {
- // Check with the C++ ABI first.
- CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
- if (RAA == CGCXXABI::RAA_Indirect) {
- return getIndirectResult(Ty, false, State);
- } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
- // The field index doesn't matter, we'll fix it up later.
- return ABIArgInfo::getInAlloca(/*FieldIndex=*/0);
- }
+ // Check with the C++ ABI first.
+ const RecordType *RT = Ty->getAs<RecordType>();
+ if (RT) {
+ CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
+ if (RAA == CGCXXABI::RAA_Indirect) {
+ return getIndirectResult(Ty, false, State);
+ } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
+ // The field index doesn't matter, we'll fix it up later.
+ return ABIArgInfo::getInAlloca(/*FieldIndex=*/0);
+ }
+ }
+
+ // vectorcall adds the concept of a homogenous vector aggregate, similar
+ // to other targets.
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ if (State.CC == llvm::CallingConv::X86_VectorCall &&
+ isHomogeneousAggregate(Ty, Base, NumElts)) {
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ if (Ty->isBuiltinType() || Ty->isVectorType())
+ return ABIArgInfo::getDirect();
+ return ABIArgInfo::getExpand();
+ }
+ return getIndirectResult(Ty, /*ByVal=*/false, State);
+ }
+
+ if (isAggregateTypeForABI(Ty)) {
+ if (RT) {
// Structs are always byval on win32, regardless of what they contain.
if (IsWin32StructABI)
return getIndirectResult(Ty, true, State);
@@ -1006,7 +1066,9 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
if (getContext().getTypeSize(Ty) <= 4*32 &&
canExpandIndirectArgument(Ty, getContext()))
return ABIArgInfo::getExpandWithPadding(
- State.CC == llvm::CallingConv::X86_FastCall, PaddingType);
+ State.CC == llvm::CallingConv::X86_FastCall ||
+ State.CC == llvm::CallingConv::X86_VectorCall,
+ PaddingType);
return getIndirectResult(Ty, true, State);
}
@@ -1049,7 +1111,10 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
CCState State(FI.getCallingConvention());
if (State.CC == llvm::CallingConv::X86_FastCall)
State.FreeRegs = 2;
- else if (FI.getHasRegParm())
+ else if (State.CC == llvm::CallingConv::X86_VectorCall) {
+ State.FreeRegs = 2;
+ State.FreeSSERegs = 6;
+ } else if (FI.getHasRegParm())
State.FreeRegs = FI.getRegParm();
else
State.FreeRegs = DefaultNumRegisterParameters;
@@ -1434,7 +1499,8 @@ public:
/// WinX86_64ABIInfo - The Windows X86_64 ABI information.
class WinX86_64ABIInfo : public ABIInfo {
- ABIArgInfo classify(QualType Ty, bool IsReturnType) const;
+ ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs,
+ bool IsReturnType) const;
public:
WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {}
@@ -1443,6 +1509,17 @@ public:
llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
CodeGenFunction &CGF) const override;
+
+ bool isHomogeneousAggregateBaseType(QualType Ty) const override {
+ // FIXME: Assumes vectorcall is in use.
+ return isX86VectorTypeForVectorCall(getContext(), Ty);
+ }
+
+ bool isHomogeneousAggregateSmallEnough(const Type *Ty,
+ uint64_t NumMembers) const override {
+ // FIXME: Assumes vectorcall is in use.
+ return isX86VectorCallAggregateSmallEnough(NumMembers);
+ }
};
class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -2844,7 +2921,8 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
return ResAddr;
}
-ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, bool IsReturnType) const {
+ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
+ bool IsReturnType) const {
if (Ty->isVoidType())
return ABIArgInfo::getIgnore();
@@ -2852,7 +2930,9 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, bool IsReturnType) const {
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
- uint64_t Size = getContext().getTypeSize(Ty);
+ TypeInfo Info = getContext().getTypeInfo(Ty);
+ uint64_t Width = Info.Width;
+ unsigned Align = getContext().toCharUnitsFromBits(Info.Align).getQuantity();
const RecordType *RT = Ty->getAs<RecordType>();
if (RT) {
@@ -2865,11 +2945,26 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, bool IsReturnType) const {
return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
// FIXME: mingw-w64-gcc emits 128-bit struct as i128
- if (Size == 128 && getTarget().getTriple().isWindowsGNUEnvironment())
+ if (Width == 128 && getTarget().getTriple().isWindowsGNUEnvironment())
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
- Size));
+ Width));
+ }
+
+ // vectorcall adds the concept of a homogenous vector aggregate, similar to
+ // other targets.
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+ if (FreeSSERegs && isHomogeneousAggregate(Ty, Base, NumElts)) {
+ if (FreeSSERegs >= NumElts) {
+ FreeSSERegs -= NumElts;
+ if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())
+ return ABIArgInfo::getDirect();
+ return ABIArgInfo::getExpand();
+ }
+ return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
}
+
if (Ty->isMemberPointerType()) {
// If the member pointer is represented by an LLVM int or ptr, pass it
// directly.
@@ -2881,11 +2976,11 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, bool IsReturnType) const {
if (RT || Ty->isMemberPointerType()) {
// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
// not 1, 2, 4, or 8 bytes, must be passed by reference."
- if (Size > 64 || !llvm::isPowerOf2_64(Size))
+ if (Width > 64 || !llvm::isPowerOf2_64(Width))
return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
// Otherwise, coerce it to a small integer.
- return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
+ return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width));
}
// Bool type is always extended to the ABI, other builtin types are not
@@ -2898,11 +2993,18 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, bool IsReturnType) const {
}
void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
+ bool IsVectorCall =
+ FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall;
+
+ // We can use up to 4 SSE return registers with vectorcall.
+ unsigned FreeSSERegs = IsVectorCall ? 4 : 0;
if (!getCXXABI().classifyReturnType(FI))
- FI.getReturnInfo() = classify(FI.getReturnType(), true);
+ FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true);
+ // We can use up to 6 SSE register parameters with vectorcall.
+ FreeSSERegs = IsVectorCall ? 6 : 0;
for (auto &I : FI.arguments())
- I.info = classify(I.type, false);
+ I.info = classify(I.type, FreeSSERegs, false);
}
llvm::Value *WinX86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
OpenPOWER on IntegriCloud