summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp86
1 files changed, 49 insertions, 37 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index eb4c1202586..ac82737e777 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -1059,19 +1059,41 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
}
+/// Cast value to the specified type.
+static llvm::Value *
+castValueToType(CodeGenFunction &CGF, llvm::Value *Val, llvm::Type *CastTy,
+ llvm::Optional<bool> IsSigned = llvm::None) {
+ if (Val->getType() == CastTy)
+ return Val;
+ if (Val->getType()->getPrimitiveSizeInBits() > 0 &&
+ CastTy->getPrimitiveSizeInBits() > 0 &&
+ Val->getType()->getPrimitiveSizeInBits() ==
+ CastTy->getPrimitiveSizeInBits())
+ return CGF.Builder.CreateBitCast(Val, CastTy);
+ if (IsSigned.hasValue() && CastTy->isIntegerTy() &&
+ Val->getType()->isIntegerTy())
+ return CGF.Builder.CreateIntCast(Val, CastTy, *IsSigned);
+ Address CastItem = CGF.CreateTempAlloca(
+ CastTy,
+ CharUnits::fromQuantity(
+ CGF.CGM.getDataLayout().getPrefTypeAlignment(Val->getType())));
+ Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()));
+ CGF.Builder.CreateStore(Val, ValCastItem);
+ return CGF.Builder.CreateLoad(CastItem);
+}
+
/// This function creates calls to one of two shuffle functions to copy
/// variables between lanes in a warp.
static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
- QualType ElemTy,
llvm::Value *Elem,
llvm::Value *Offset) {
auto &CGM = CGF.CGM;
- auto &C = CGM.getContext();
auto &Bld = CGF.Builder;
CGOpenMPRuntimeNVPTX &RT =
*(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime()));
- unsigned Size = CGM.getContext().getTypeSizeInChars(ElemTy).getQuantity();
+ unsigned Size = CGM.getDataLayout().getTypeStoreSize(Elem->getType());
assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction.");
OpenMPRTLFunctionNVPTX ShuffleFn = Size <= 4
@@ -1079,17 +1101,16 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
: OMPRTL_NVPTX__kmpc_shuffle_int64;
// Cast all types to 32- or 64-bit values before calling shuffle routines.
- auto CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty;
- auto *ElemCast = Bld.CreateSExtOrBitCast(Elem, CastTy);
- auto *WarpSize = CGF.EmitScalarConversion(
- getNVPTXWarpSize(CGF), C.getIntTypeForBitwidth(32, /* Signed */ true),
- C.getIntTypeForBitwidth(16, /* Signed */ true), SourceLocation());
+ llvm::Type *CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty;
+ llvm::Value *ElemCast = castValueToType(CGF, Elem, CastTy, /*isSigned=*/true);
+ auto *WarpSize =
+ Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true);
auto *ShuffledVal =
CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn),
{ElemCast, Offset, WarpSize});
- return Bld.CreateTruncOrBitCast(ShuffledVal, CGF.ConvertTypeForMem(ElemTy));
+ return castValueToType(CGF, ShuffledVal, Elem->getType(), /*isSigned=*/true);
}
namespace {
@@ -1151,10 +1172,9 @@ static void emitReductionListCopy(
// Step 1.1: Get the address for the src element in the Reduce list.
Address SrcElementPtrAddr =
Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
- llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
- SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- SrcElementAddr =
- Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ SrcElementAddr = CGF.EmitLoadOfPointer(
+ SrcElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
// Step 1.2: Create a temporary to store the element in the destination
// Reduce list.
@@ -1170,32 +1190,26 @@ static void emitReductionListCopy(
// Step 1.1: Get the address for the src element in the Reduce list.
Address SrcElementPtrAddr =
Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
- llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
- SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- SrcElementAddr =
- Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ SrcElementAddr = CGF.EmitLoadOfPointer(
+ SrcElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
// Step 1.2: Get the address for dest element. The destination
// element has already been created on the thread's stack.
DestElementPtrAddr =
Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize());
- llvm::Value *DestElementPtr =
- CGF.EmitLoadOfScalar(DestElementPtrAddr, /*Volatile=*/false,
- C.VoidPtrTy, SourceLocation());
- Address DestElemAddr =
- Address(DestElementPtr, C.getTypeAlignInChars(Private->getType()));
- DestElementAddr = Bld.CreateElementBitCast(
- DestElemAddr, CGF.ConvertTypeForMem(Private->getType()));
+ DestElementAddr = CGF.EmitLoadOfPointer(
+ DestElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
break;
}
case ThreadToScratchpad: {
// Step 1.1: Get the address for the src element in the Reduce list.
Address SrcElementPtrAddr =
Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
- llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
- SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- SrcElementAddr =
- Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ SrcElementAddr = CGF.EmitLoadOfPointer(
+ SrcElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
// Step 1.2: Get the address for dest element:
// address = base + index * ElementSizeInChars.
@@ -1208,11 +1222,8 @@ static void emitReductionListCopy(
Bld.CreateAdd(DestBase.getPointer(), CurrentOffset);
ScratchPadElemAbsolutePtrVal =
Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
- Address ScratchpadPtr =
- Address(ScratchPadElemAbsolutePtrVal,
- C.getTypeAlignInChars(Private->getType()));
- DestElementAddr = Bld.CreateElementBitCast(
- ScratchpadPtr, CGF.ConvertTypeForMem(Private->getType()));
+ DestElementAddr = Address(ScratchPadElemAbsolutePtrVal,
+ C.getTypeAlignInChars(Private->getType()));
IncrScratchpadDest = true;
break;
}
@@ -1253,10 +1264,11 @@ static void emitReductionListCopy(
// Now that all active lanes have read the element in the
// Reduce list, shuffle over the value from the remote lane.
- if (ShuffleInElement) {
- Elem = createRuntimeShuffleFunction(CGF, Private->getType(), Elem,
- RemoteLaneOffset);
- }
+ if (ShuffleInElement)
+ Elem = createRuntimeShuffleFunction(CGF, Elem, RemoteLaneOffset);
+
+ DestElementAddr = Bld.CreateElementBitCast(DestElementAddr,
+ SrcElementAddr.getElementType());
// Store the source element value to the dest element address.
CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
OpenPOWER on IntegriCloud