diff options
Diffstat (limited to 'clang')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 86 | ||||
-rw-r--r-- | clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp | 8 | ||||
-rw-r--r-- | clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp | 43 |
3 files changed, 65 insertions, 72 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index eb4c1202586..ac82737e777 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -1059,19 +1059,41 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); } +/// Cast value to the specified type. +static llvm::Value * +castValueToType(CodeGenFunction &CGF, llvm::Value *Val, llvm::Type *CastTy, + llvm::Optional<bool> IsSigned = llvm::None) { + if (Val->getType() == CastTy) + return Val; + if (Val->getType()->getPrimitiveSizeInBits() > 0 && + CastTy->getPrimitiveSizeInBits() > 0 && + Val->getType()->getPrimitiveSizeInBits() == + CastTy->getPrimitiveSizeInBits()) + return CGF.Builder.CreateBitCast(Val, CastTy); + if (IsSigned.hasValue() && CastTy->isIntegerTy() && + Val->getType()->isIntegerTy()) + return CGF.Builder.CreateIntCast(Val, CastTy, *IsSigned); + Address CastItem = CGF.CreateTempAlloca( + CastTy, + CharUnits::fromQuantity( + CGF.CGM.getDataLayout().getPrefTypeAlignment(Val->getType()))); + Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace())); + CGF.Builder.CreateStore(Val, ValCastItem); + return CGF.Builder.CreateLoad(CastItem); +} + /// This function creates calls to one of two shuffle functions to copy /// variables between lanes in a warp. static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, - QualType ElemTy, llvm::Value *Elem, llvm::Value *Offset) { auto &CGM = CGF.CGM; - auto &C = CGM.getContext(); auto &Bld = CGF.Builder; CGOpenMPRuntimeNVPTX &RT = *(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime())); - unsigned Size = CGM.getContext().getTypeSizeInChars(ElemTy).getQuantity(); + unsigned Size = CGM.getDataLayout().getTypeStoreSize(Elem->getType()); assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction."); OpenMPRTLFunctionNVPTX ShuffleFn = Size <= 4 @@ -1079,17 +1101,16 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, : OMPRTL_NVPTX__kmpc_shuffle_int64; // Cast all types to 32- or 64-bit values before calling shuffle routines. - auto CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty; - auto *ElemCast = Bld.CreateSExtOrBitCast(Elem, CastTy); - auto *WarpSize = CGF.EmitScalarConversion( - getNVPTXWarpSize(CGF), C.getIntTypeForBitwidth(32, /* Signed */ true), - C.getIntTypeForBitwidth(16, /* Signed */ true), SourceLocation()); + llvm::Type *CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty; + llvm::Value *ElemCast = castValueToType(CGF, Elem, CastTy, /*isSigned=*/true); + auto *WarpSize = + Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true); auto *ShuffledVal = CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize}); - return Bld.CreateTruncOrBitCast(ShuffledVal, CGF.ConvertTypeForMem(ElemTy)); + return castValueToType(CGF, ShuffledVal, Elem->getType(), /*isSigned=*/true); } namespace { @@ -1151,10 +1172,9 @@ static void emitReductionListCopy( // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); - llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( - SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); - SrcElementAddr = - Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + SrcElementAddr = CGF.EmitLoadOfPointer( + SrcElementPtrAddr, + C.getPointerType(Private->getType())->castAs<PointerType>()); // Step 1.2: Create a temporary to store the element in the destination // Reduce list. @@ -1170,32 +1190,26 @@ static void emitReductionListCopy( // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); - llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( - SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); - SrcElementAddr = - Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + SrcElementAddr = CGF.EmitLoadOfPointer( + SrcElementPtrAddr, + C.getPointerType(Private->getType())->castAs<PointerType>()); // Step 1.2: Get the address for dest element. The destination // element has already been created on the thread's stack. DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); - llvm::Value *DestElementPtr = - CGF.EmitLoadOfScalar(DestElementPtrAddr, /*Volatile=*/false, - C.VoidPtrTy, SourceLocation()); - Address DestElemAddr = - Address(DestElementPtr, C.getTypeAlignInChars(Private->getType())); - DestElementAddr = Bld.CreateElementBitCast( - DestElemAddr, CGF.ConvertTypeForMem(Private->getType())); + DestElementAddr = CGF.EmitLoadOfPointer( + DestElementPtrAddr, + C.getPointerType(Private->getType())->castAs<PointerType>()); break; } case ThreadToScratchpad: { // Step 1.1: Get the address for the src element in the Reduce list. Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); - llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar( - SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); - SrcElementAddr = - Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType())); + SrcElementAddr = CGF.EmitLoadOfPointer( + SrcElementPtrAddr, + C.getPointerType(Private->getType())->castAs<PointerType>()); // Step 1.2: Get the address for dest element: // address = base + index * ElementSizeInChars. @@ -1208,11 +1222,8 @@ static void emitReductionListCopy( Bld.CreateAdd(DestBase.getPointer(), CurrentOffset); ScratchPadElemAbsolutePtrVal = Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); - Address ScratchpadPtr = - Address(ScratchPadElemAbsolutePtrVal, - C.getTypeAlignInChars(Private->getType())); - DestElementAddr = Bld.CreateElementBitCast( - ScratchpadPtr, CGF.ConvertTypeForMem(Private->getType())); + DestElementAddr = Address(ScratchPadElemAbsolutePtrVal, + C.getTypeAlignInChars(Private->getType())); IncrScratchpadDest = true; break; } @@ -1253,10 +1264,11 @@ static void emitReductionListCopy( // Now that all active lanes have read the element in the // Reduce list, shuffle over the value from the remote lane. - if (ShuffleInElement) { - Elem = createRuntimeShuffleFunction(CGF, Private->getType(), Elem, - RemoteLaneOffset); - } + if (ShuffleInElement) + Elem = createRuntimeShuffleFunction(CGF, Elem, RemoteLaneOffset); + + DestElementAddr = Bld.CreateElementBitCast(DestElementAddr, + SrcElementAddr.getElementType()); // Store the source element value to the dest element address. CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false, diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp index c4c3e977b0e..d636240f44d 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -168,9 +168,9 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] // @@ -405,9 +405,9 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] // @@ -714,18 +714,18 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align // // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] // diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp index d77231807fb..696940bcf14 100644 --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -168,9 +168,9 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] // @@ -249,9 +249,9 @@ int bar(int n){ // CHECK: [[P:%.+]] = mul i[[SZ]] 8, [[TEAM]] // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* - // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to double* // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: [[ELT_VAL:%.+]] = load double, double* [[ELT]], align + // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to double* // CHECK: store double [[ELT_VAL]], double* [[SCRATCHPAD_ELT_PTR]], align // // CHECK: ret @@ -298,25 +298,15 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double* // CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align // CHECK: br label {{%?}}[[REDUCE_CONT]] // // CHECK: [[REDUCE_CONT]] // CHECK: ret - - - - - - - - - - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l33}}_worker() // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l33]]( @@ -480,9 +470,9 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] // @@ -617,9 +607,9 @@ int bar(int n){ // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]] // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* - // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to float* // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: [[ELT_VAL:%.+]] = load float, float* [[ELT]], align + // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to float* // CHECK: store float [[ELT_VAL]], float* [[SCRATCHPAD_ELT_PTR]], align // // CHECK: ret @@ -690,24 +680,15 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float* // CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align // CHECK: br label {{%?}}[[REDUCE_CONT]] // // CHECK: [[REDUCE_CONT]] // CHECK: ret - - - - - - - - - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l40}}_worker() // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+template.+l40]]( @@ -903,18 +884,18 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align // // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] // @@ -1035,9 +1016,9 @@ int bar(int n){ // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]] // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* - // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i32* // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align + // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i32* // CHECK: store i32 [[ELT_VAL]], i32* [[SCRATCHPAD_ELT_PTR]], align // // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 4 @@ -1053,9 +1034,9 @@ int bar(int n){ // CHECK: [[P:%.+]] = mul i[[SZ]] 2, [[TEAM]] // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]] // CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8* - // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i16* // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align + // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i16* // CHECK: store i16 [[ELT_VAL]], i16* [[SCRATCHPAD_ELT_PTR]], align // // CHECK: ret @@ -1121,18 +1102,18 @@ int bar(int n){ // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align // // CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]], // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], - // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16* // CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align + // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align // CHECK: br label {{%?}}[[REDUCE_CONT]] // |