summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp86
-rw-r--r--clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp8
-rw-r--r--clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp43
3 files changed, 65 insertions, 72 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index eb4c1202586..ac82737e777 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -1059,19 +1059,41 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
}
+/// Cast value to the specified type.
+static llvm::Value *
+castValueToType(CodeGenFunction &CGF, llvm::Value *Val, llvm::Type *CastTy,
+ llvm::Optional<bool> IsSigned = llvm::None) {
+ if (Val->getType() == CastTy)
+ return Val;
+ if (Val->getType()->getPrimitiveSizeInBits() > 0 &&
+ CastTy->getPrimitiveSizeInBits() > 0 &&
+ Val->getType()->getPrimitiveSizeInBits() ==
+ CastTy->getPrimitiveSizeInBits())
+ return CGF.Builder.CreateBitCast(Val, CastTy);
+ if (IsSigned.hasValue() && CastTy->isIntegerTy() &&
+ Val->getType()->isIntegerTy())
+ return CGF.Builder.CreateIntCast(Val, CastTy, *IsSigned);
+ Address CastItem = CGF.CreateTempAlloca(
+ CastTy,
+ CharUnits::fromQuantity(
+ CGF.CGM.getDataLayout().getPrefTypeAlignment(Val->getType())));
+ Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()));
+ CGF.Builder.CreateStore(Val, ValCastItem);
+ return CGF.Builder.CreateLoad(CastItem);
+}
+
/// This function creates calls to one of two shuffle functions to copy
/// variables between lanes in a warp.
static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
- QualType ElemTy,
llvm::Value *Elem,
llvm::Value *Offset) {
auto &CGM = CGF.CGM;
- auto &C = CGM.getContext();
auto &Bld = CGF.Builder;
CGOpenMPRuntimeNVPTX &RT =
*(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime()));
- unsigned Size = CGM.getContext().getTypeSizeInChars(ElemTy).getQuantity();
+ unsigned Size = CGM.getDataLayout().getTypeStoreSize(Elem->getType());
assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction.");
OpenMPRTLFunctionNVPTX ShuffleFn = Size <= 4
@@ -1079,17 +1101,16 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
: OMPRTL_NVPTX__kmpc_shuffle_int64;
// Cast all types to 32- or 64-bit values before calling shuffle routines.
- auto CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty;
- auto *ElemCast = Bld.CreateSExtOrBitCast(Elem, CastTy);
- auto *WarpSize = CGF.EmitScalarConversion(
- getNVPTXWarpSize(CGF), C.getIntTypeForBitwidth(32, /* Signed */ true),
- C.getIntTypeForBitwidth(16, /* Signed */ true), SourceLocation());
+ llvm::Type *CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty;
+ llvm::Value *ElemCast = castValueToType(CGF, Elem, CastTy, /*isSigned=*/true);
+ auto *WarpSize =
+ Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true);
auto *ShuffledVal =
CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn),
{ElemCast, Offset, WarpSize});
- return Bld.CreateTruncOrBitCast(ShuffledVal, CGF.ConvertTypeForMem(ElemTy));
+ return castValueToType(CGF, ShuffledVal, Elem->getType(), /*isSigned=*/true);
}
namespace {
@@ -1151,10 +1172,9 @@ static void emitReductionListCopy(
// Step 1.1: Get the address for the src element in the Reduce list.
Address SrcElementPtrAddr =
Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
- llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
- SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- SrcElementAddr =
- Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ SrcElementAddr = CGF.EmitLoadOfPointer(
+ SrcElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
// Step 1.2: Create a temporary to store the element in the destination
// Reduce list.
@@ -1170,32 +1190,26 @@ static void emitReductionListCopy(
// Step 1.1: Get the address for the src element in the Reduce list.
Address SrcElementPtrAddr =
Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
- llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
- SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- SrcElementAddr =
- Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ SrcElementAddr = CGF.EmitLoadOfPointer(
+ SrcElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
// Step 1.2: Get the address for dest element. The destination
// element has already been created on the thread's stack.
DestElementPtrAddr =
Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize());
- llvm::Value *DestElementPtr =
- CGF.EmitLoadOfScalar(DestElementPtrAddr, /*Volatile=*/false,
- C.VoidPtrTy, SourceLocation());
- Address DestElemAddr =
- Address(DestElementPtr, C.getTypeAlignInChars(Private->getType()));
- DestElementAddr = Bld.CreateElementBitCast(
- DestElemAddr, CGF.ConvertTypeForMem(Private->getType()));
+ DestElementAddr = CGF.EmitLoadOfPointer(
+ DestElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
break;
}
case ThreadToScratchpad: {
// Step 1.1: Get the address for the src element in the Reduce list.
Address SrcElementPtrAddr =
Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
- llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
- SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- SrcElementAddr =
- Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ SrcElementAddr = CGF.EmitLoadOfPointer(
+ SrcElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
// Step 1.2: Get the address for dest element:
// address = base + index * ElementSizeInChars.
@@ -1208,11 +1222,8 @@ static void emitReductionListCopy(
Bld.CreateAdd(DestBase.getPointer(), CurrentOffset);
ScratchPadElemAbsolutePtrVal =
Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
- Address ScratchpadPtr =
- Address(ScratchPadElemAbsolutePtrVal,
- C.getTypeAlignInChars(Private->getType()));
- DestElementAddr = Bld.CreateElementBitCast(
- ScratchpadPtr, CGF.ConvertTypeForMem(Private->getType()));
+ DestElementAddr = Address(ScratchPadElemAbsolutePtrVal,
+ C.getTypeAlignInChars(Private->getType()));
IncrScratchpadDest = true;
break;
}
@@ -1253,10 +1264,11 @@ static void emitReductionListCopy(
// Now that all active lanes have read the element in the
// Reduce list, shuffle over the value from the remote lane.
- if (ShuffleInElement) {
- Elem = createRuntimeShuffleFunction(CGF, Private->getType(), Elem,
- RemoteLaneOffset);
- }
+ if (ShuffleInElement)
+ Elem = createRuntimeShuffleFunction(CGF, Elem, RemoteLaneOffset);
+
+ DestElementAddr = Bld.CreateElementBitCast(DestElementAddr,
+ SrcElementAddr.getElementType());
// Store the source element value to the dest element address.
CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
index c4c3e977b0e..d636240f44d 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
@@ -168,9 +168,9 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align
// CHECK: br label {{%?}}[[COPY_CONT:.+]]
//
@@ -405,9 +405,9 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align
// CHECK: br label {{%?}}[[COPY_CONT:.+]]
//
@@ -714,18 +714,18 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align
//
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align
// CHECK: br label {{%?}}[[COPY_CONT:.+]]
//
diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
index d77231807fb..696940bcf14 100644
--- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
@@ -168,9 +168,9 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align
// CHECK: br label {{%?}}[[COPY_CONT:.+]]
//
@@ -249,9 +249,9 @@ int bar(int n){
// CHECK: [[P:%.+]] = mul i[[SZ]] 8, [[TEAM]]
// CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
- // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to double*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: [[ELT_VAL:%.+]] = load double, double* [[ELT]], align
+ // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to double*
// CHECK: store double [[ELT_VAL]], double* [[SCRATCHPAD_ELT_PTR]], align
//
// CHECK: ret
@@ -298,25 +298,15 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align
// CHECK: br label {{%?}}[[REDUCE_CONT]]
//
// CHECK: [[REDUCE_CONT]]
// CHECK: ret
-
-
-
-
-
-
-
-
-
-
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l33}}_worker()
// CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l33]](
@@ -480,9 +470,9 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align
// CHECK: br label {{%?}}[[COPY_CONT:.+]]
//
@@ -617,9 +607,9 @@ int bar(int n){
// CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
// CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
- // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to float*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: [[ELT_VAL:%.+]] = load float, float* [[ELT]], align
+ // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to float*
// CHECK: store float [[ELT_VAL]], float* [[SCRATCHPAD_ELT_PTR]], align
//
// CHECK: ret
@@ -690,24 +680,15 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align
// CHECK: br label {{%?}}[[REDUCE_CONT]]
//
// CHECK: [[REDUCE_CONT]]
// CHECK: ret
-
-
-
-
-
-
-
-
-
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l40}}_worker()
// CHECK: define {{.*}}void [[T3:@__omp_offloading_.+template.+l40]](
@@ -903,18 +884,18 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align
//
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align
// CHECK: br label {{%?}}[[COPY_CONT:.+]]
//
@@ -1035,9 +1016,9 @@ int bar(int n){
// CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
// CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
- // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i32*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align
+ // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i32*
// CHECK: store i32 [[ELT_VAL]], i32* [[SCRATCHPAD_ELT_PTR]], align
//
// CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 4
@@ -1053,9 +1034,9 @@ int bar(int n){
// CHECK: [[P:%.+]] = mul i[[SZ]] 2, [[TEAM]]
// CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
- // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i16*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align
+ // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i16*
// CHECK: store i16 [[ELT_VAL]], i16* [[SCRATCHPAD_ELT_PTR]], align
//
// CHECK: ret
@@ -1121,18 +1102,18 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align
//
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align
// CHECK: br label {{%?}}[[REDUCE_CONT]]
//
OpenPOWER on IntegriCloud