summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp60
-rw-r--r--llvm/test/CodeGen/NVPTX/access-non-generic.ll7
-rw-r--r--llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll6
-rw-r--r--llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll11
4 files changed, 19 insertions, 65 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 916b0e11566..fd63fdbaced 100644
--- a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -45,8 +45,6 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {}
private:
- Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV,
- IRBuilder<> &Builder);
Value *remapConstant(Module *M, Function *F, Constant *C,
IRBuilder<> &Builder);
Value *remapConstantVectorOrConstantAggregate(Module *M, Function *F,
@@ -156,46 +154,6 @@ bool GenericToNVVM::runOnModule(Module &M) {
return true;
}
-Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F,
- GlobalVariable *GV,
- IRBuilder<> &Builder) {
- PointerType *GVType = GV->getType();
- Value *CVTA = nullptr;
-
- // See if the address space conversion requires the operand to be bitcast
- // to i8 addrspace(n)* first.
- EVT ExtendedGVType = EVT::getEVT(GV->getValueType(), true);
- if (!ExtendedGVType.isInteger() && !ExtendedGVType.isFloatingPoint()) {
- // A bitcast to i8 addrspace(n)* on the operand is needed.
- LLVMContext &Context = M->getContext();
- unsigned int AddrSpace = GVType->getAddressSpace();
- Type *DestTy = PointerType::get(Type::getInt8Ty(Context), AddrSpace);
- CVTA = Builder.CreateBitCast(GV, DestTy, "cvta");
- // Insert the address space conversion.
- Type *ResultType =
- PointerType::get(Type::getInt8Ty(Context), llvm::ADDRESS_SPACE_GENERIC);
- Function *CVTAFunction = Intrinsic::getDeclaration(
- M, Intrinsic::nvvm_ptr_global_to_gen, {ResultType, DestTy});
- CVTA = Builder.CreateCall(CVTAFunction, CVTA, "cvta");
- // Another bitcast from i8 * to <the element type of GVType> * is
- // required.
- DestTy =
- PointerType::get(GV->getValueType(), llvm::ADDRESS_SPACE_GENERIC);
- CVTA = Builder.CreateBitCast(CVTA, DestTy, "cvta");
- } else {
- // A simple CVTA is enough.
- SmallVector<Type *, 2> ParamTypes;
- ParamTypes.push_back(PointerType::get(GV->getValueType(),
- llvm::ADDRESS_SPACE_GENERIC));
- ParamTypes.push_back(GVType);
- Function *CVTAFunction = Intrinsic::getDeclaration(
- M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
- CVTA = Builder.CreateCall(CVTAFunction, GV, "cvta");
- }
-
- return CVTA;
-}
-
Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C,
IRBuilder<> &Builder) {
// If the constant C has been converted already in the given function F, just
@@ -207,17 +165,17 @@ Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C,
Value *NewValue = C;
if (isa<GlobalVariable>(C)) {
- // If the constant C is a global variable and is found in GVMap, generate a
- // set set of instructions that convert the clone of C with the global
- // address space specifier to a generic pointer.
- // The constant C cannot be used here, as it will be erased from the
- // module eventually. And the clone of C with the global address space
- // specifier cannot be used here either, as it will affect the types of
- // other instructions in the function. Hence, this address space conversion
- // is required.
+ // If the constant C is a global variable and is found in GVMap, substitute
+ //
+ // addrspacecast GVMap[C] to addrspace(0)
+ //
+ // for our use of C.
GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(C));
if (I != GVMap.end()) {
- NewValue = getOrInsertCVTA(M, F, I->second, Builder);
+ GlobalVariable *GV = I->second;
+ NewValue = Builder.CreateAddrSpaceCast(
+ GV,
+ PointerType::get(GV->getValueType(), llvm::ADDRESS_SPACE_GENERIC));
}
} else if (isa<ConstantAggregate>(C)) {
// If any element in the constant vector or aggregate C is or uses a global
diff --git a/llvm/test/CodeGen/NVPTX/access-non-generic.ll b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
index d5776d77b10..62520be2cf5 100644
--- a/llvm/test/CodeGen/NVPTX/access-non-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
@@ -5,13 +5,6 @@
@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4
@scalar = internal addrspace(3) global float 0.000000e+00, align 4
-@generic_scalar = internal global float 0.000000e+00, align 4
-
-define float @ld_from_shared() {
- %1 = addrspacecast float* @generic_scalar to float addrspace(3)*
- %2 = load float, float addrspace(3)* %1
- ret float %2
-}
; Verifies nvptx-favor-non-generic correctly optimizes generic address space
; usage to non-generic address space usage for the patterns we claim to handle:
diff --git a/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll b/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll
index 5df5183dc2f..568208d5775 100644
--- a/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll
+++ b/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll
@@ -16,11 +16,11 @@ define void @func() !dbg !8 {
;CHECK-LABEL: @func()
;CHECK-SAME: !dbg [[FUNCNODE:![0-9]+]]
entry:
-; References to the variables must be converted back to generic address space via llvm intrinsic call
-; CHECK-DAG: call i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8({{.*}} addrspace(1)* @.str
+; References to the variables must be converted back to generic address space.
+; CHECK-DAG: addrspacecast ([4 x i8] addrspace(1)* @.str to [4 x i8]*)
%0 = load i8, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), align 1
call void @extfunc(i8 signext %0)
-; CHECK-DAG: call i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8(i8 addrspace(1)* @static_var
+; CHECK-DAG: addrspacecast (i8 addrspace(1)* @static_var to i8*)
%1 = load i8, i8* @static_var, align 1
call void @extfunc(i8 signext %1)
ret void
diff --git a/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll b/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll
index 66917d5cb18..5b29b219e9f 100644
--- a/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll
+++ b/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll
@@ -7,14 +7,17 @@ target triple = "nvptx-nvidia-cuda"
; CHECK: .global .align 4 .u32 myglobal = 42;
@myglobal = internal global i32 42, align 4
-; CHECK: .global .align 4 .u32 myconst = 42;
-@myconst = internal constant i32 42, align 4
+; CHECK: .global .align 4 .u32 myconst = 420;
+@myconst = internal constant i32 420, align 4
define void @foo(i32* %a, i32* %b) {
-; CHECK: cvta.global.u32
+; Expect one load -- @myconst isn't loaded from, because we know its value
+; statically.
+; CHECK: ld.global.u32
+; CHECK: st.global.u32
+; CHECK: st.global.u32
%ld1 = load i32, i32* @myglobal
-; CHECK: cvta.global.u32
%ld2 = load i32, i32* @myconst
store i32 %ld1, i32* %a
store i32 %ld2, i32* %b
OpenPOWER on IntegriCloud