diff options
-rw-r--r-- | clang/include/clang/Basic/BuiltinsNVPTX.def | 2 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 10 | ||||
-rw-r--r-- | clang/test/CodeGen/builtins-nvptx-ptx50.cu | 23 | ||||
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsNVVM.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/NVPTX/atomics-sm60.ll | 19 |
7 files changed, 73 insertions, 2 deletions
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index caa860480f7..b596793c9c1 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -481,7 +481,7 @@ TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", "satom") TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", "satom") BUILTIN(__nvvm_atom_add_g_d, "ddD*1d", "n") BUILTIN(__nvvm_atom_add_s_d, "ddD*3d", "n") -BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n") +TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", "satom") TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", "satom") TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", "satom") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f1b8e2e6110..369240f316c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9554,6 +9554,16 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(FnALAF32, {Ptr, Val}); } + case NVPTX::BI__nvvm_atom_add_gen_d: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + // atomicrmw only deals with integer arguments, so we need to use + // LLVM's nvvm_atomic_load_add_f64 intrinsic. + Value *FnALAF64 = + CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType()); + return Builder.CreateCall(FnALAF64, {Ptr, Val}); + } + case NVPTX::BI__nvvm_atom_inc_gen_ui: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); diff --git a/clang/test/CodeGen/builtins-nvptx-ptx50.cu b/clang/test/CodeGen/builtins-nvptx-ptx50.cu new file mode 100644 index 00000000000..e85be442eb4 --- /dev/null +++ b/clang/test/CodeGen/builtins-nvptx-ptx50.cu @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_60 \ +// RUN: -fcuda-is-device -S -emit-llvm -o - -x cuda %s \ +// RUN: | FileCheck -check-prefix=CHECK %s +// +// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_50 \ +// RUN: -fcuda-is-device -S -o /dev/null -x cuda -verify %s + +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __shared__ __attribute__((shared)) +#define __constant__ __attribute__((constant)) + +// We have to keep all builtins that depend on particular target feature in the +// same function, because the codegen will stop after the very first function +// that encounters an error, so -verify will not be able to find errors in +// subsequent functions. + +// CHECK-LABEL: test_fn +__device__ void test_fn(double d, double* double_ptr) { + // CHECK: call double @llvm.nvvm.atomic.load.add.f64.p0f64 + // expected-error@+1 {{'__nvvm_atom_add_gen_d' needs target feature satom}} + __nvvm_atom_add_gen_d(double_ptr, d); +} diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 7ba1a3eb2e5..249419d15d3 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -683,10 +683,15 @@ let TargetPrefix = "nvvm" in { Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; -// Atomic not available as an llvm intrinsic. +// Atomics not available as llvm intrinsics. def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty], [LLVMAnyPointerType<llvm_float_ty>, llvm_float_ty], [IntrArgMemOnly, NoCapture<0>]>; + // Atomic add of f64 requires sm_60. + def int_nvvm_atomic_load_add_f64 : Intrinsic<[llvm_double_ty], + [LLVMAnyPointerType<llvm_double_ty>, llvm_double_ty], + [IntrArgMemOnly, NoCapture<0>]>; + def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty], [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty], [IntrArgMemOnly, NoCapture<0>]>; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 7b9acb20b75..ac4f2544fc3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3449,6 +3449,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( } case Intrinsic::nvvm_atomic_load_add_f32: + case Intrinsic::nvvm_atomic_load_add_f64: case Intrinsic::nvvm_atomic_load_inc_32: case Intrinsic::nvvm_atomic_load_dec_32: diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index f745b6f6635..478f3e9d057 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1095,6 +1095,12 @@ def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; +def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>; +def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>; +def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>; defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>; @@ -1121,6 +1127,13 @@ defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>; +defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add", + atomic_load_add_f64_g, f64imm, fpimm, hasAtomAddF64>; +defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add", + atomic_load_add_f64_s, f64imm, fpimm, hasAtomAddF64>; +defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add", + atomic_load_add_f64_gen, f64imm, fpimm, hasAtomAddF64>; + // atom_sub def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll new file mode 100644 index 00000000000..0b5bafb780c --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_60 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s + +; CHECK-LABEL .func test( +define void @test(double* %dp0, double addrspace(1)* %dp1, double addrspace(3)* %dp3, double %d) { +; CHECK: atom.add.f64 + %r1 = call double @llvm.nvvm.atomic.load.add.f64.p0f64(double* %dp0, double %d) +; CHECK: atom.global.add.f64 + %r2 = call double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* %dp1, double %d) +; CHECK: atom.shared.add.f64 + %ret = call double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* %dp3, double %d) + ret void +} + +declare double @llvm.nvvm.atomic.load.add.f64.p0f64(double* nocapture, double) #1 +declare double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* nocapture, double) #1 +declare double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* nocapture, double) #1 + +attributes #1 = { argmemonly nounwind } |