summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-04-06 16:03:36 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-04-06 16:03:36 +0000
commite290ec02c7b38929c6d78d5c026a1db312641add (patch)
tree77d75026c4c3bc59a488274be2da33199a3d662c /clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
parent63ae5579e7cabf519db0e5c8cd69d4683b34b6e8 (diff)
downloadbcm5719-llvm-e290ec02c7b38929c6d78d5c026a1db312641add.tar.gz
bcm5719-llvm-e290ec02c7b38929c6d78d5c026a1db312641add.zip
[OPENMP, NVPTX] Fix codegen for the teams reduction.
Added NUW flags for all the add|mul|sub operations + replaced sdiv by udiv as we operate on unsigned values only (addresses, converted to integers) llvm-svn: 329411
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp44
1 files changed, 19 insertions, 25 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 75d3ba75b34..0d7386490df 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -529,8 +529,8 @@ static llvm::Value *getThreadLimit(CodeGenFunction &CGF,
CGBuilderTy &Bld = CGF.Builder;
return IsInSpmdExecutionMode
? getNVPTXNumThreads(CGF)
- : Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
- "thread_limit");
+ : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
+ "thread_limit");
}
/// Get the thread id of the OMP master thread.
@@ -545,9 +545,9 @@ static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
// We assume that the warp size is a power of 2.
- llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
+ llvm::Value *Mask = Bld.CreateNUWSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
- return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)),
+ return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)),
Bld.CreateNot(Mask), "master_tid");
}
@@ -1714,13 +1714,11 @@ static void emitReductionListCopy(
// Step 1.2: Get the address for dest element:
// address = base + index * ElementSizeInChars.
- unsigned ElementSizeInChars =
- C.getTypeSizeInChars(Private->getType()).getQuantity();
+ llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
auto *CurrentOffset =
- Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars),
- ScratchpadIndex);
+ Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
auto *ScratchPadElemAbsolutePtrVal =
- Bld.CreateAdd(DestBase.getPointer(), CurrentOffset);
+ Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset);
ScratchPadElemAbsolutePtrVal =
Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
DestElementAddr = Address(ScratchPadElemAbsolutePtrVal,
@@ -1731,13 +1729,11 @@ static void emitReductionListCopy(
case ScratchpadToThread: {
// Step 1.1: Get the address for the src element in the scratchpad.
// address = base + index * ElementSizeInChars.
- unsigned ElementSizeInChars =
- C.getTypeSizeInChars(Private->getType()).getQuantity();
+ llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
auto *CurrentOffset =
- Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars),
- ScratchpadIndex);
+ Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
auto *ScratchPadElemAbsolutePtrVal =
- Bld.CreateAdd(SrcBase.getPointer(), CurrentOffset);
+ Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset);
ScratchPadElemAbsolutePtrVal =
Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal,
@@ -1796,22 +1792,20 @@ static void emitReductionListCopy(
if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
llvm::Value *ScratchpadBasePtr =
IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer();
- unsigned ElementSizeInChars =
- C.getTypeSizeInChars(Private->getType()).getQuantity();
- ScratchpadBasePtr = Bld.CreateAdd(
+ llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
+ ScratchpadBasePtr = Bld.CreateNUWAdd(
ScratchpadBasePtr,
- Bld.CreateMul(ScratchpadWidth, llvm::ConstantInt::get(
- CGM.SizeTy, ElementSizeInChars)));
+ Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
// Take care of global memory alignment for performance
- ScratchpadBasePtr = Bld.CreateSub(ScratchpadBasePtr,
- llvm::ConstantInt::get(CGM.SizeTy, 1));
- ScratchpadBasePtr = Bld.CreateSDiv(
+ ScratchpadBasePtr = Bld.CreateNUWSub(
+ ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
+ ScratchpadBasePtr = Bld.CreateUDiv(
ScratchpadBasePtr,
llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
- ScratchpadBasePtr = Bld.CreateAdd(ScratchpadBasePtr,
- llvm::ConstantInt::get(CGM.SizeTy, 1));
- ScratchpadBasePtr = Bld.CreateMul(
+ ScratchpadBasePtr = Bld.CreateNUWAdd(
+ ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
+ ScratchpadBasePtr = Bld.CreateNUWMul(
ScratchpadBasePtr,
llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
OpenPOWER on IntegriCloud