diff options
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsNVVM.td | 920 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/CMakeLists.txt | 2 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTX.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 323 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h | 6 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 592 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 435 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 95 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp | 178 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 1823 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h | 46 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp | 357 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/NVPTX/surf-read.ll | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/NVPTX/surf-write.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/NVPTX/tex-read.ll | 20 |
18 files changed, 4831 insertions, 20 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 7f72ce8b66f..26dc70acfd3 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -875,6 +875,14 @@ def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], "llvm.nvvm.move.ptr">; +// For getting the handle from a texture or surface variable +def int_nvvm_texsurf_handle + : Intrinsic<[llvm_i64_ty], [llvm_metadata_ty, llvm_anyi64ptr_ty], + [IntrNoMem], "llvm.nvvm.texsurf.handle">; +def int_nvvm_texsurf_handle_internal + : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty], + [IntrNoMem], "llvm.nvvm.texsurf.handle.internal">; + /// Error / Warn def int_nvvm_compiler_error : Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">; @@ -882,6 +890,918 @@ def int_nvvm_compiler_warn : Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">; +// Texture Fetch +def int_nvvm_tex_1d_v4f32_i32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.tex.1d.v4f32.i32">; +def int_nvvm_tex_1d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [], + "llvm.nvvm.tex.1d.v4f32.f32">; +def int_nvvm_tex_1d_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.1d.level.v4f32.f32">; +def int_nvvm_tex_1d_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.1d.grad.v4f32.f32">; +def int_nvvm_tex_1d_v4i32_i32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.tex.1d.v4i32.i32">; +def int_nvvm_tex_1d_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [], + "llvm.nvvm.tex.1d.v4i32.f32">; +def int_nvvm_tex_1d_level_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.1d.level.v4i32.f32.level">; +def int_nvvm_tex_1d_grad_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.1d.grad.v4i32.f32">; + +def int_nvvm_tex_1d_array_v4f32_i32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.tex.1d.array.v4f32.i32">; +def int_nvvm_tex_1d_array_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [], + "llvm.nvvm.tex.1d.array.v4f32.f32">; +def int_nvvm_tex_1d_array_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.1d.array.level.v4f32.f32">; +def int_nvvm_tex_1d_array_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.1d.array.grad.v4f32.f32">; +def int_nvvm_tex_1d_array_v4i32_i32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.tex.1d.array.v4i32.i32">; +def int_nvvm_tex_1d_array_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [], + "llvm.nvvm.tex.1d.array.v4i32.f32">; +def int_nvvm_tex_1d_array_level_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.1d.array.level.v4i32.f32">; +def int_nvvm_tex_1d_array_grad_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.1d.array.grad.v4i32.f32">; + +def int_nvvm_tex_2d_v4f32_i32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.tex.2d.v4f32.i32">; +def int_nvvm_tex_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.2d.v4f32.f32">; +def int_nvvm_tex_2d_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.2d.level.v4f32.f32">; +def int_nvvm_tex_2d_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.2d.grad.v4f32.f32">; +def int_nvvm_tex_2d_v4i32_i32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.tex.2d.v4i32.i32">; +def int_nvvm_tex_2d_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.2d.v4i32.f32">; +def int_nvvm_tex_2d_level_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.2d.level.v4i32.f32">; +def int_nvvm_tex_2d_grad_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.2d.grad.v4i32.f32">; + +def int_nvvm_tex_2d_array_v4f32_i32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], [], + "llvm.nvvm.tex.2d.array.v4f32.i32">; +def int_nvvm_tex_2d_array_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.2d.array.v4f32.f32">; +def int_nvvm_tex_2d_array_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.2d.array.level.v4f32.f32">; +def int_nvvm_tex_2d_array_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.2d.array.grad.v4f32.f32">; +def int_nvvm_tex_2d_array_v4i32_i32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], [], + "llvm.nvvm.tex.2d.array.v4i32.i32">; +def int_nvvm_tex_2d_array_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.2d.array.v4i32.f32">; +def int_nvvm_tex_2d_array_level_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.2d.array.level.v4i32.f32">; +def int_nvvm_tex_2d_array_grad_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.2d.array.grad.v4i32.f32">; + +def int_nvvm_tex_3d_v4f32_i32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [], "llvm.nvvm.tex.3d.v4f32.i32">; +def int_nvvm_tex_3d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.3d.v4f32.f32">; +def int_nvvm_tex_3d_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.3d.level.v4f32.f32">; +def int_nvvm_tex_3d_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.3d.grad.v4f32.f32">; +def int_nvvm_tex_3d_v4i32_i32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [], "llvm.nvvm.tex.3d.v4i32.i32">; +def int_nvvm_tex_3d_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], [], + "llvm.nvvm.tex.3d.v4i32.f32">; +def int_nvvm_tex_3d_level_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.3d.level.v4i32.f32">; +def int_nvvm_tex_3d_grad_v4i32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], [], + "llvm.nvvm.tex.3d.grad.v4i32.f32">; + +// Surface Load +def int_nvvm_suld_1d_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i8.trap">; +def int_nvvm_suld_1d_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i16.trap">; +def int_nvvm_suld_1d_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.i32.trap">; +def int_nvvm_suld_1d_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i8.trap">; +def int_nvvm_suld_1d_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i16.trap">; +def int_nvvm_suld_1d_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v2i32.trap">; +def int_nvvm_suld_1d_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v4i8.trap">; +def int_nvvm_suld_1d_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v4i16.trap">; +def int_nvvm_suld_1d_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.v4i32.trap">; + +def int_nvvm_suld_1d_array_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i8.trap">; +def int_nvvm_suld_1d_array_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i16.trap">; +def int_nvvm_suld_1d_array_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.i32.trap">; +def int_nvvm_suld_1d_array_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i8.trap">; +def int_nvvm_suld_1d_array_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i16.trap">; +def int_nvvm_suld_1d_array_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v2i32.trap">; +def int_nvvm_suld_1d_array_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v4i8.trap">; +def int_nvvm_suld_1d_array_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v4i16.trap">; +def int_nvvm_suld_1d_array_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.1d.array.v4i32.trap">; + +def int_nvvm_suld_2d_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.i8.trap">; +def int_nvvm_suld_2d_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.i16.trap">; +def int_nvvm_suld_2d_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.i32.trap">; +def int_nvvm_suld_2d_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v2i8.trap">; +def int_nvvm_suld_2d_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v2i16.trap">; +def int_nvvm_suld_2d_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v2i32.trap">; +def int_nvvm_suld_2d_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v4i8.trap">; +def int_nvvm_suld_2d_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v4i16.trap">; +def int_nvvm_suld_2d_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.v4i32.trap">; + +def int_nvvm_suld_2d_array_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.i8.trap">; +def int_nvvm_suld_2d_array_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.i16.trap">; +def int_nvvm_suld_2d_array_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.i32.trap">; +def int_nvvm_suld_2d_array_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v2i8.trap">; +def int_nvvm_suld_2d_array_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v2i16.trap">; +def int_nvvm_suld_2d_array_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v2i32.trap">; +def int_nvvm_suld_2d_array_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v4i8.trap">; +def int_nvvm_suld_2d_array_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v4i16.trap">; +def int_nvvm_suld_2d_array_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.2d.array.v4i32.trap">; + +def int_nvvm_suld_3d_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.i8.trap">; +def int_nvvm_suld_3d_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.i16.trap">; +def int_nvvm_suld_3d_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.i32.trap">; +def int_nvvm_suld_3d_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v2i8.trap">; +def int_nvvm_suld_3d_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v2i16.trap">; +def int_nvvm_suld_3d_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v2i32.trap">; +def int_nvvm_suld_3d_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v4i8.trap">; +def int_nvvm_suld_3d_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v4i16.trap">; +def int_nvvm_suld_3d_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.suld.3d.v4i32.trap">; + +//===- Texture Query ------------------------------------------------------===// + +def int_nvvm_txq_channel_order + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.txq.channel.order">, + GCCBuiltin<"__nvvm_txq_channel_order">; +def int_nvvm_txq_channel_data_type + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.txq.channel.data.type">, + GCCBuiltin<"__nvvm_txq_channel_data_type">; +def int_nvvm_txq_width + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.txq.width">, + GCCBuiltin<"__nvvm_txq_width">; +def int_nvvm_txq_height + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.txq.height">, + GCCBuiltin<"__nvvm_txq_height">; +def int_nvvm_txq_depth + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.txq.depth">, + GCCBuiltin<"__nvvm_txq_depth">; +def int_nvvm_txq_array_size + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.txq.array.size">, + GCCBuiltin<"__nvvm_txq_array_size">; +def int_nvvm_txq_num_samples + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.txq.num.samples">, + GCCBuiltin<"__nvvm_txq_num_samples">; +def int_nvvm_txq_num_mipmap_levels + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.txq.num.mipmap.levels">, + GCCBuiltin<"__nvvm_txq_num_mipmap_levels">; + +//===- Surface Query ------------------------------------------------------===// + +def int_nvvm_suq_channel_order + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.suq.channel.order">, + GCCBuiltin<"__nvvm_suq_channel_order">; +def int_nvvm_suq_channel_data_type + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.suq.channel.data.type">, + GCCBuiltin<"__nvvm_suq_channel_data_type">; +def int_nvvm_suq_width + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.suq.width">, + GCCBuiltin<"__nvvm_suq_width">; +def int_nvvm_suq_height + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.suq.height">, + GCCBuiltin<"__nvvm_suq_height">; +def int_nvvm_suq_depth + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.suq.depth">, + GCCBuiltin<"__nvvm_suq_depth">; +def int_nvvm_suq_array_size + : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.suq.array.size">, + GCCBuiltin<"__nvvm_suq_array_size">; + + +//===- Handle Query -------------------------------------------------------===// + +def int_nvvm_istypep_sampler + : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.istypep.sampler">, + GCCBuiltin<"__nvvm_istypep_sampler">; +def int_nvvm_istypep_surface + : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.istypep.surface">, + GCCBuiltin<"__nvvm_istypep_surface">; +def int_nvvm_istypep_texture + : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem], + "llvm.nvvm.istypep.texture">, + GCCBuiltin<"__nvvm_istypep_texture">; + + + +//===- Surface Stores -----------------------------------------------------===// + +// Unformatted + +def int_nvvm_sust_b_1d_i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.i8.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_i8_trap">; +def int_nvvm_sust_b_1d_i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.i16.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_i16_trap">; +def int_nvvm_sust_b_1d_i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.i32.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_i32_trap">; +def int_nvvm_sust_b_1d_v2i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.v2i8.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_v2i8_trap">; +def int_nvvm_sust_b_1d_v2i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.v2i16.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_v2i16_trap">; +def int_nvvm_sust_b_1d_v2i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.v2i32.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_v2i32_trap">; +def int_nvvm_sust_b_1d_v4i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.v4i8.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_v4i8_trap">; +def int_nvvm_sust_b_1d_v4i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.v4i16.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_v4i16_trap">; +def int_nvvm_sust_b_1d_v4i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.v4i32.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_v4i32_trap">; + + +def int_nvvm_sust_b_1d_array_i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.i8.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_array_i8_trap">; +def int_nvvm_sust_b_1d_array_i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.i16.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_array_i16_trap">; +def int_nvvm_sust_b_1d_array_i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.array.i32.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_array_i32_trap">; +def int_nvvm_sust_b_1d_array_v2i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.v2i8.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v2i8_trap">; +def int_nvvm_sust_b_1d_array_v2i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.v2i16.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v2i16_trap">; +def int_nvvm_sust_b_1d_array_v2i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.array.v2i32.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v2i32_trap">; +def int_nvvm_sust_b_1d_array_v4i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.v4i8.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v4i8_trap">; +def int_nvvm_sust_b_1d_array_v4i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.1d.array.v4i16.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v4i16_trap">; +def int_nvvm_sust_b_1d_array_v4i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.1d.array.v4i32.trap">, + GCCBuiltin<"__nvvm_sust_b_1d_array_v4i32_trap">; + + +def int_nvvm_sust_b_2d_i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.i8.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_i8_trap">; +def int_nvvm_sust_b_2d_i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.i16.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_i16_trap">; +def int_nvvm_sust_b_2d_i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.i32.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_i32_trap">; +def int_nvvm_sust_b_2d_v2i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.v2i8.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_v2i8_trap">; +def int_nvvm_sust_b_2d_v2i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.v2i16.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_v2i16_trap">; +def int_nvvm_sust_b_2d_v2i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.v2i32.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_v2i32_trap">; +def int_nvvm_sust_b_2d_v4i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.v4i8.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_v4i8_trap">; +def int_nvvm_sust_b_2d_v4i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.v4i16.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_v4i16_trap">; +def int_nvvm_sust_b_2d_v4i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.v4i32.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_v4i32_trap">; + + +def int_nvvm_sust_b_2d_array_i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.i8.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_array_i8_trap">; +def int_nvvm_sust_b_2d_array_i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.i16.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_array_i16_trap">; +def int_nvvm_sust_b_2d_array_i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.array.i32.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_array_i32_trap">; +def int_nvvm_sust_b_2d_array_v2i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.v2i8.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v2i8_trap">; +def int_nvvm_sust_b_2d_array_v2i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.v2i16.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v2i16_trap">; +def int_nvvm_sust_b_2d_array_v2i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.array.v2i32.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v2i32_trap">; +def int_nvvm_sust_b_2d_array_v4i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.v4i8.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v4i8_trap">; +def int_nvvm_sust_b_2d_array_v4i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.2d.array.v4i16.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v4i16_trap">; +def int_nvvm_sust_b_2d_array_v4i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.2d.array.v4i32.trap">, + GCCBuiltin<"__nvvm_sust_b_2d_array_v4i32_trap">; + + +def int_nvvm_sust_b_3d_i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.i8.trap">, + GCCBuiltin<"__nvvm_sust_b_3d_i8_trap">; +def int_nvvm_sust_b_3d_i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.i16.trap">, + GCCBuiltin<"__nvvm_sust_b_3d_i16_trap">; +def int_nvvm_sust_b_3d_i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.3d.i32.trap">, + GCCBuiltin<"__nvvm_sust_b_3d_i32_trap">; +def int_nvvm_sust_b_3d_v2i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.v2i8.trap">, + GCCBuiltin<"__nvvm_sust_b_3d_v2i8_trap">; +def int_nvvm_sust_b_3d_v2i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.v2i16.trap">, + GCCBuiltin<"__nvvm_sust_b_3d_v2i16_trap">; +def int_nvvm_sust_b_3d_v2i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.3d.v2i32.trap">, + GCCBuiltin<"__nvvm_sust_b_3d_v2i32_trap">; +def int_nvvm_sust_b_3d_v4i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.v4i8.trap">, + GCCBuiltin<"__nvvm_sust_b_3d_v4i8_trap">; +def int_nvvm_sust_b_3d_v4i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.b.3d.v4i16.trap">, + GCCBuiltin<"__nvvm_sust_b_3d_v4i16_trap">; +def int_nvvm_sust_b_3d_v4i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.b.3d.v4i32.trap">, + GCCBuiltin<"__nvvm_sust_b_3d_v4i32_trap">; + +// Formatted + +def int_nvvm_sust_p_1d_i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.1d.i8.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_i8_trap">; +def int_nvvm_sust_p_1d_i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.1d.i16.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_i16_trap">; +def int_nvvm_sust_p_1d_i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.1d.i32.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_i32_trap">; +def int_nvvm_sust_p_1d_v2i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.1d.v2i8.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_v2i8_trap">; +def int_nvvm_sust_p_1d_v2i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.1d.v2i16.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_v2i16_trap">; +def int_nvvm_sust_p_1d_v2i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.1d.v2i32.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_v2i32_trap">; +def int_nvvm_sust_p_1d_v4i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.1d.v4i8.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_v4i8_trap">; +def int_nvvm_sust_p_1d_v4i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.1d.v4i16.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_v4i16_trap">; +def int_nvvm_sust_p_1d_v4i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.1d.v4i32.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_v4i32_trap">; + + +def int_nvvm_sust_p_1d_array_i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.1d.array.i8.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_array_i8_trap">; +def int_nvvm_sust_p_1d_array_i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.1d.array.i16.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_array_i16_trap">; +def int_nvvm_sust_p_1d_array_i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.1d.array.i32.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_array_i32_trap">; +def int_nvvm_sust_p_1d_array_v2i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.1d.array.v2i8.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_array_v2i8_trap">; +def int_nvvm_sust_p_1d_array_v2i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.1d.array.v2i16.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_array_v2i16_trap">; +def int_nvvm_sust_p_1d_array_v2i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.1d.array.v2i32.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_array_v2i32_trap">; +def int_nvvm_sust_p_1d_array_v4i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.1d.array.v4i8.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_array_v4i8_trap">; +def int_nvvm_sust_p_1d_array_v4i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.1d.array.v4i16.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_array_v4i16_trap">; +def int_nvvm_sust_p_1d_array_v4i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.1d.array.v4i32.trap">, + GCCBuiltin<"__nvvm_sust_p_1d_array_v4i32_trap">; + + +def int_nvvm_sust_p_2d_i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.2d.i8.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_i8_trap">; +def int_nvvm_sust_p_2d_i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.2d.i16.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_i16_trap">; +def int_nvvm_sust_p_2d_i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.2d.i32.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_i32_trap">; +def int_nvvm_sust_p_2d_v2i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.2d.v2i8.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_v2i8_trap">; +def int_nvvm_sust_p_2d_v2i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.2d.v2i16.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_v2i16_trap">; +def int_nvvm_sust_p_2d_v2i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.2d.v2i32.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_v2i32_trap">; +def int_nvvm_sust_p_2d_v4i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.2d.v4i8.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_v4i8_trap">; +def int_nvvm_sust_p_2d_v4i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.2d.v4i16.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_v4i16_trap">; +def int_nvvm_sust_p_2d_v4i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.2d.v4i32.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_v4i32_trap">; + + +def int_nvvm_sust_p_2d_array_i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.2d.array.i8.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_array_i8_trap">; +def int_nvvm_sust_p_2d_array_i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.2d.array.i16.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_array_i16_trap">; +def int_nvvm_sust_p_2d_array_i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.2d.array.i32.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_array_i32_trap">; +def int_nvvm_sust_p_2d_array_v2i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.2d.array.v2i8.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_array_v2i8_trap">; +def int_nvvm_sust_p_2d_array_v2i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.2d.array.v2i16.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_array_v2i16_trap">; +def int_nvvm_sust_p_2d_array_v2i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.2d.array.v2i32.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_array_v2i32_trap">; +def int_nvvm_sust_p_2d_array_v4i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.2d.array.v4i8.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_array_v4i8_trap">; +def int_nvvm_sust_p_2d_array_v4i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.2d.array.v4i16.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_array_v4i16_trap">; +def int_nvvm_sust_p_2d_array_v4i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.2d.array.v4i32.trap">, + GCCBuiltin<"__nvvm_sust_p_2d_array_v4i32_trap">; + + +def int_nvvm_sust_p_3d_i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.3d.i8.trap">, + GCCBuiltin<"__nvvm_sust_p_3d_i8_trap">; +def int_nvvm_sust_p_3d_i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.3d.i16.trap">, + GCCBuiltin<"__nvvm_sust_p_3d_i16_trap">; +def int_nvvm_sust_p_3d_i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.3d.i32.trap">, + GCCBuiltin<"__nvvm_sust_p_3d_i32_trap">; +def int_nvvm_sust_p_3d_v2i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.3d.v2i8.trap">, + GCCBuiltin<"__nvvm_sust_p_3d_v2i8_trap">; +def int_nvvm_sust_p_3d_v2i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.3d.v2i16.trap">, + GCCBuiltin<"__nvvm_sust_p_3d_v2i16_trap">; +def int_nvvm_sust_p_3d_v2i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.3d.v2i32.trap">, + GCCBuiltin<"__nvvm_sust_p_3d_v2i32_trap">; +def int_nvvm_sust_p_3d_v4i8_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.3d.v4i8.trap">, + GCCBuiltin<"__nvvm_sust_p_3d_v4i8_trap">; +def int_nvvm_sust_p_3d_v4i16_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [], + "llvm.nvvm.sust.p.3d.v4i16.trap">, + GCCBuiltin<"__nvvm_sust_p_3d_v4i16_trap">; +def int_nvvm_sust_p_3d_v4i32_trap + : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [], + "llvm.nvvm.sust.p.3d.v4i32.trap">, + GCCBuiltin<"__nvvm_sust_p_3d_v4i32_trap">; + + + // Old PTX back-end intrinsics retained here for backwards-compatibility multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> { diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt index 915b6ce53ed..4e35b181129 100644 --- a/llvm/lib/Target/NVPTX/CMakeLists.txt +++ b/llvm/lib/Target/NVPTX/CMakeLists.txt @@ -27,6 +27,8 @@ set(NVPTXCodeGen_sources NVPTXAssignValidGlobalNames.cpp NVPTXPrologEpilogPass.cpp NVPTXMCExpr.cpp + NVPTXReplaceImageHandles.cpp + NVPTXImageOptimizer.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index cd9f96577ba..e74c808f855 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -67,6 +67,8 @@ FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass(); ModulePass *createNVVMReflectPass(); ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping); MachineFunctionPass *createNVPTXPrologEpilogPass(); +MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); +FunctionPass *createNVPTXImageOptimizerPass(); bool isImageOrSamplerVal(const Value *, const Module *); diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 760c59e2251..af6fc094331 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/NVPTXMCAsmInfo.h" #include "NVPTX.h" #include "NVPTXInstrInfo.h" +#include "NVPTXMachineFunctionInfo.h" #include "NVPTXMCExpr.h" #include "NVPTXRegisterInfo.h" #include "NVPTXTargetMachine.h" @@ -325,13 +326,279 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(OutStreamer, Inst); } +// Handle symbol backtracking for targets that do not support image handles +bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, + unsigned OpNo, MCOperand &MCOp) { + const MachineOperand &MO = MI->getOperand(OpNo); + + switch (MI->getOpcode()) { + default: return false; + case NVPTX::TEX_1D_F32_I32: + case NVPTX::TEX_1D_F32_F32: + case NVPTX::TEX_1D_F32_F32_LEVEL: + case NVPTX::TEX_1D_F32_F32_GRAD: + case NVPTX::TEX_1D_I32_I32: + case NVPTX::TEX_1D_I32_F32: + case NVPTX::TEX_1D_I32_F32_LEVEL: + case NVPTX::TEX_1D_I32_F32_GRAD: + case NVPTX::TEX_1D_ARRAY_F32_I32: + case NVPTX::TEX_1D_ARRAY_F32_F32: + case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL: + case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD: + case NVPTX::TEX_1D_ARRAY_I32_I32: + case NVPTX::TEX_1D_ARRAY_I32_F32: + case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL: + case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD: + case NVPTX::TEX_2D_F32_I32: + case NVPTX::TEX_2D_F32_F32: + case NVPTX::TEX_2D_F32_F32_LEVEL: + case NVPTX::TEX_2D_F32_F32_GRAD: + case NVPTX::TEX_2D_I32_I32: + case NVPTX::TEX_2D_I32_F32: + case NVPTX::TEX_2D_I32_F32_LEVEL: + case NVPTX::TEX_2D_I32_F32_GRAD: + case NVPTX::TEX_2D_ARRAY_F32_I32: + case NVPTX::TEX_2D_ARRAY_F32_F32: + case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL: + case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD: + case NVPTX::TEX_2D_ARRAY_I32_I32: + case NVPTX::TEX_2D_ARRAY_I32_F32: + case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL: + case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD: + case NVPTX::TEX_3D_F32_I32: + case NVPTX::TEX_3D_F32_F32: + case NVPTX::TEX_3D_F32_F32_LEVEL: + case NVPTX::TEX_3D_F32_F32_GRAD: + case NVPTX::TEX_3D_I32_I32: + case NVPTX::TEX_3D_I32_F32: + case NVPTX::TEX_3D_I32_F32_LEVEL: + case NVPTX::TEX_3D_I32_F32_GRAD: + { + // This is a texture fetch, so operand 4 is a texref and operand 5 is + // a samplerref + if (OpNo == 4) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + if (OpNo == 5) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + + return false; + } + case NVPTX::SULD_1D_I8_TRAP: + case NVPTX::SULD_1D_I16_TRAP: + case NVPTX::SULD_1D_I32_TRAP: + case NVPTX::SULD_1D_ARRAY_I8_TRAP: + case NVPTX::SULD_1D_ARRAY_I16_TRAP: + case NVPTX::SULD_1D_ARRAY_I32_TRAP: + case NVPTX::SULD_2D_I8_TRAP: + case NVPTX::SULD_2D_I16_TRAP: + case NVPTX::SULD_2D_I32_TRAP: + case NVPTX::SULD_2D_ARRAY_I8_TRAP: + case NVPTX::SULD_2D_ARRAY_I16_TRAP: + case NVPTX::SULD_2D_ARRAY_I32_TRAP: + case NVPTX::SULD_3D_I8_TRAP: + case NVPTX::SULD_3D_I16_TRAP: + case NVPTX::SULD_3D_I32_TRAP: { + // This is a V1 surface load, so operand 1 is a surfref + if (OpNo == 1) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + + return false; + } + case NVPTX::SULD_1D_V2I8_TRAP: + case NVPTX::SULD_1D_V2I16_TRAP: + case NVPTX::SULD_1D_V2I32_TRAP: + case NVPTX::SULD_1D_ARRAY_V2I8_TRAP: + case NVPTX::SULD_1D_ARRAY_V2I16_TRAP: + case NVPTX::SULD_1D_ARRAY_V2I32_TRAP: + case NVPTX::SULD_2D_V2I8_TRAP: + case NVPTX::SULD_2D_V2I16_TRAP: + case NVPTX::SULD_2D_V2I32_TRAP: + case NVPTX::SULD_2D_ARRAY_V2I8_TRAP: + case NVPTX::SULD_2D_ARRAY_V2I16_TRAP: + case NVPTX::SULD_2D_ARRAY_V2I32_TRAP: + case NVPTX::SULD_3D_V2I8_TRAP: + case NVPTX::SULD_3D_V2I16_TRAP: + case NVPTX::SULD_3D_V2I32_TRAP: { + // This is a V2 surface load, so operand 2 is a surfref + if (OpNo == 2) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + + return false; + } + case NVPTX::SULD_1D_V4I8_TRAP: + case NVPTX::SULD_1D_V4I16_TRAP: + case NVPTX::SULD_1D_V4I32_TRAP: + case NVPTX::SULD_1D_ARRAY_V4I8_TRAP: + case NVPTX::SULD_1D_ARRAY_V4I16_TRAP: + case NVPTX::SULD_1D_ARRAY_V4I32_TRAP: + case NVPTX::SULD_2D_V4I8_TRAP: + case NVPTX::SULD_2D_V4I16_TRAP: + case NVPTX::SULD_2D_V4I32_TRAP: + case NVPTX::SULD_2D_ARRAY_V4I8_TRAP: + case NVPTX::SULD_2D_ARRAY_V4I16_TRAP: + case NVPTX::SULD_2D_ARRAY_V4I32_TRAP: + case NVPTX::SULD_3D_V4I8_TRAP: + case NVPTX::SULD_3D_V4I16_TRAP: + case NVPTX::SULD_3D_V4I32_TRAP: { + // This is a V4 surface load, so operand 4 is a surfref + if (OpNo == 4) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + + return false; + } + case NVPTX::SUST_B_1D_B8_TRAP: + case NVPTX::SUST_B_1D_B16_TRAP: + case NVPTX::SUST_B_1D_B32_TRAP: + case NVPTX::SUST_B_1D_V2B8_TRAP: + case NVPTX::SUST_B_1D_V2B16_TRAP: + case NVPTX::SUST_B_1D_V2B32_TRAP: + case NVPTX::SUST_B_1D_V4B8_TRAP: + case NVPTX::SUST_B_1D_V4B16_TRAP: + case NVPTX::SUST_B_1D_V4B32_TRAP: + case NVPTX::SUST_B_1D_ARRAY_B8_TRAP: + case NVPTX::SUST_B_1D_ARRAY_B16_TRAP: + case NVPTX::SUST_B_1D_ARRAY_B32_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_B_2D_B8_TRAP: + case NVPTX::SUST_B_2D_B16_TRAP: + case NVPTX::SUST_B_2D_B32_TRAP: + case NVPTX::SUST_B_2D_V2B8_TRAP: + case NVPTX::SUST_B_2D_V2B16_TRAP: + case NVPTX::SUST_B_2D_V2B32_TRAP: + case NVPTX::SUST_B_2D_V4B8_TRAP: + case NVPTX::SUST_B_2D_V4B16_TRAP: + case NVPTX::SUST_B_2D_V4B32_TRAP: + case NVPTX::SUST_B_2D_ARRAY_B8_TRAP: + case NVPTX::SUST_B_2D_ARRAY_B16_TRAP: + case NVPTX::SUST_B_2D_ARRAY_B32_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_B_3D_B8_TRAP: + case NVPTX::SUST_B_3D_B16_TRAP: + case NVPTX::SUST_B_3D_B32_TRAP: + case NVPTX::SUST_B_3D_V2B8_TRAP: + case NVPTX::SUST_B_3D_V2B16_TRAP: + case NVPTX::SUST_B_3D_V2B32_TRAP: + case NVPTX::SUST_B_3D_V4B8_TRAP: + case NVPTX::SUST_B_3D_V4B16_TRAP: + case NVPTX::SUST_B_3D_V4B32_TRAP: + case NVPTX::SUST_P_1D_B8_TRAP: + case NVPTX::SUST_P_1D_B16_TRAP: + case NVPTX::SUST_P_1D_B32_TRAP: + case NVPTX::SUST_P_1D_V2B8_TRAP: + case NVPTX::SUST_P_1D_V2B16_TRAP: + case NVPTX::SUST_P_1D_V2B32_TRAP: + case NVPTX::SUST_P_1D_V4B8_TRAP: + case NVPTX::SUST_P_1D_V4B16_TRAP: + case NVPTX::SUST_P_1D_V4B32_TRAP: + case NVPTX::SUST_P_1D_ARRAY_B8_TRAP: + case NVPTX::SUST_P_1D_ARRAY_B16_TRAP: + case NVPTX::SUST_P_1D_ARRAY_B32_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_P_2D_B8_TRAP: + case NVPTX::SUST_P_2D_B16_TRAP: + case NVPTX::SUST_P_2D_B32_TRAP: + case NVPTX::SUST_P_2D_V2B8_TRAP: + case NVPTX::SUST_P_2D_V2B16_TRAP: + case NVPTX::SUST_P_2D_V2B32_TRAP: + case NVPTX::SUST_P_2D_V4B8_TRAP: + case NVPTX::SUST_P_2D_V4B16_TRAP: + case NVPTX::SUST_P_2D_V4B32_TRAP: + case NVPTX::SUST_P_2D_ARRAY_B8_TRAP: + case NVPTX::SUST_P_2D_ARRAY_B16_TRAP: + case NVPTX::SUST_P_2D_ARRAY_B32_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_P_3D_B8_TRAP: + case NVPTX::SUST_P_3D_B16_TRAP: + case NVPTX::SUST_P_3D_B32_TRAP: + case NVPTX::SUST_P_3D_V2B8_TRAP: + case NVPTX::SUST_P_3D_V2B16_TRAP: + case NVPTX::SUST_P_3D_V2B32_TRAP: + case NVPTX::SUST_P_3D_V4B8_TRAP: + case NVPTX::SUST_P_3D_V4B16_TRAP: + case NVPTX::SUST_P_3D_V4B32_TRAP: { + // This is a surface store, so operand 0 is a surfref + if (OpNo == 0) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + + return false; + } + case NVPTX::TXQ_CHANNEL_ORDER: + case NVPTX::TXQ_CHANNEL_DATA_TYPE: + case NVPTX::TXQ_WIDTH: + case NVPTX::TXQ_HEIGHT: + case NVPTX::TXQ_DEPTH: + case NVPTX::TXQ_ARRAY_SIZE: + case NVPTX::TXQ_NUM_SAMPLES: + case NVPTX::TXQ_NUM_MIPMAP_LEVELS: + case NVPTX::SUQ_CHANNEL_ORDER: + case NVPTX::SUQ_CHANNEL_DATA_TYPE: + case NVPTX::SUQ_WIDTH: + case NVPTX::SUQ_HEIGHT: + case NVPTX::SUQ_DEPTH: + case NVPTX::SUQ_ARRAY_SIZE: { + // This is a query, so operand 1 is a surfref/texref + if (OpNo == 1) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + + return false; + } + } +} + +void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { + // Ewwww + TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget()); + NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM); + const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>(); + const char *Sym = MFI->getImageHandleSymbol(Index); + std::string *SymNamePtr = + nvTM.getManagedStrPool()->getManagedString(Sym); + MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol( + StringRef(SymNamePtr->c_str()))); +} + void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { OutMI.setOpcode(MI->getOpcode()); + const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>(); // Special: Do not mangle symbol operand of CALL_PROTOTYPE if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { const MachineOperand &MO = MI->getOperand(0); - OutMI.addOperand(GetSymbolRef(MO, + OutMI.addOperand(GetSymbolRef( OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName())))); return; } @@ -340,6 +607,13 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { const MachineOperand &MO = MI->getOperand(i); MCOperand MCOp; + if (!ST.hasImageHandles()) { + if (lowerImageHandleOperand(MI, i, MCOp)) { + OutMI.addOperand(MCOp); + continue; + } + } + if (lowerOperand(MO, MCOp)) OutMI.addOperand(MCOp); } @@ -360,10 +634,10 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, MO.getMBB()->getSymbol(), OutContext)); break; case MachineOperand::MO_ExternalSymbol: - MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); + MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName())); break; case MachineOperand::MO_GlobalAddress: - MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal())); + MCOp = GetSymbolRef(getSymbol(MO.getGlobal())); break; case MachineOperand::MO_FPImmediate: { const ConstantFP *Cnt = MO.getFPImm(); @@ -422,8 +696,7 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { } } -MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, - const MCSymbol *Symbol) { +MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { const MCExpr *Expr; Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); @@ -1512,19 +1785,33 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { first = false; // Handle image/sampler parameters - if (llvm::isSampler(*I) || llvm::isImage(*I)) { - if (llvm::isImage(*I)) { - std::string sname = I->getName(); - if (llvm::isImageWriteOnly(*I)) - O << "\t.param .surfref " << *getSymbol(F) << "_param_" - << paramIndex; - else // Default image is read_only - O << "\t.param .texref " << *getSymbol(F) << "_param_" - << paramIndex; - } else // Should be llvm::isSampler(*I) - O << "\t.param .samplerref " << *getSymbol(F) << "_param_" - << paramIndex; - continue; + if (isKernelFunction(*F)) { + if (isSampler(*I) || isImage(*I)) { + if (isImage(*I)) { + std::string sname = I->getName(); + if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { + if (nvptxSubtarget.hasImageHandles()) + O << "\t.param .u64 .ptr .surfref "; + else + O << "\t.param .surfref "; + O << *CurrentFnSym << "_param_" << paramIndex; + } + else { // Default image is read_only + if (nvptxSubtarget.hasImageHandles()) + O << "\t.param .u64 .ptr .texref "; + else + O << "\t.param .texref "; + O << *CurrentFnSym << "_param_" << paramIndex; + } + } else { + if (nvptxSubtarget.hasImageHandles()) + O << "\t.param .u64 .ptr .samplerref "; + else + O << "\t.param .samplerref "; + O << *CurrentFnSym << "_param_" << paramIndex; + } + continue; + } } if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h index 9a9c33b6f52..c83a0f1f848 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -205,7 +205,7 @@ private: void EmitInstruction(const MachineInstr *); void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI); bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); - MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); + MCOperand GetSymbolRef(const MCSymbol *Symbol); unsigned encodeVirtualRegister(unsigned Reg); void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {} @@ -287,6 +287,10 @@ private: static const char *getRegisterName(unsigned RegNo); void emitDemotedVars(const Function *, raw_ostream &); + bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo, + MCOperand &MCOp); + void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp); + LineReader *reader; LineReader *getReader(std::string); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index bd08d2d8ad9..9fd4bb1c699 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -162,6 +162,98 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { case NVPTXISD::StoreParamU32: ResNode = SelectStoreParam(N); break; + case ISD::INTRINSIC_WO_CHAIN: + ResNode = SelectIntrinsicNoChain(N); + break; + case NVPTXISD::Tex1DFloatI32: + case NVPTXISD::Tex1DFloatFloat: + case NVPTXISD::Tex1DFloatFloatLevel: + case NVPTXISD::Tex1DFloatFloatGrad: + case NVPTXISD::Tex1DI32I32: + case NVPTXISD::Tex1DI32Float: + case NVPTXISD::Tex1DI32FloatLevel: + case NVPTXISD::Tex1DI32FloatGrad: + case NVPTXISD::Tex1DArrayFloatI32: + case NVPTXISD::Tex1DArrayFloatFloat: + case NVPTXISD::Tex1DArrayFloatFloatLevel: + case NVPTXISD::Tex1DArrayFloatFloatGrad: + case NVPTXISD::Tex1DArrayI32I32: + case NVPTXISD::Tex1DArrayI32Float: + case NVPTXISD::Tex1DArrayI32FloatLevel: + case NVPTXISD::Tex1DArrayI32FloatGrad: + case NVPTXISD::Tex2DFloatI32: + case NVPTXISD::Tex2DFloatFloat: + case NVPTXISD::Tex2DFloatFloatLevel: + case NVPTXISD::Tex2DFloatFloatGrad: + case NVPTXISD::Tex2DI32I32: + case NVPTXISD::Tex2DI32Float: + case NVPTXISD::Tex2DI32FloatLevel: + case NVPTXISD::Tex2DI32FloatGrad: + case NVPTXISD::Tex2DArrayFloatI32: + case NVPTXISD::Tex2DArrayFloatFloat: + case NVPTXISD::Tex2DArrayFloatFloatLevel: + case NVPTXISD::Tex2DArrayFloatFloatGrad: + case NVPTXISD::Tex2DArrayI32I32: + case NVPTXISD::Tex2DArrayI32Float: + case NVPTXISD::Tex2DArrayI32FloatLevel: + case NVPTXISD::Tex2DArrayI32FloatGrad: + case NVPTXISD::Tex3DFloatI32: + case NVPTXISD::Tex3DFloatFloat: + case NVPTXISD::Tex3DFloatFloatLevel: + case NVPTXISD::Tex3DFloatFloatGrad: + case NVPTXISD::Tex3DI32I32: + case NVPTXISD::Tex3DI32Float: + case NVPTXISD::Tex3DI32FloatLevel: + case NVPTXISD::Tex3DI32FloatGrad: + ResNode = SelectTextureIntrinsic(N); + break; + case NVPTXISD::Suld1DI8Trap: + case NVPTXISD::Suld1DI16Trap: + case NVPTXISD::Suld1DI32Trap: + case NVPTXISD::Suld1DV2I8Trap: + case NVPTXISD::Suld1DV2I16Trap: + case NVPTXISD::Suld1DV2I32Trap: + case NVPTXISD::Suld1DV4I8Trap: + case NVPTXISD::Suld1DV4I16Trap: + case NVPTXISD::Suld1DV4I32Trap: + case NVPTXISD::Suld1DArrayI8Trap: + case NVPTXISD::Suld1DArrayI16Trap: + case NVPTXISD::Suld1DArrayI32Trap: + case NVPTXISD::Suld1DArrayV2I8Trap: + case NVPTXISD::Suld1DArrayV2I16Trap: + case NVPTXISD::Suld1DArrayV2I32Trap: + case NVPTXISD::Suld1DArrayV4I8Trap: + case NVPTXISD::Suld1DArrayV4I16Trap: + case NVPTXISD::Suld1DArrayV4I32Trap: + case NVPTXISD::Suld2DI8Trap: + case NVPTXISD::Suld2DI16Trap: + case NVPTXISD::Suld2DI32Trap: + case NVPTXISD::Suld2DV2I8Trap: + case NVPTXISD::Suld2DV2I16Trap: + case NVPTXISD::Suld2DV2I32Trap: + case NVPTXISD::Suld2DV4I8Trap: + case NVPTXISD::Suld2DV4I16Trap: + case NVPTXISD::Suld2DV4I32Trap: + case NVPTXISD::Suld2DArrayI8Trap: + case NVPTXISD::Suld2DArrayI16Trap: + case NVPTXISD::Suld2DArrayI32Trap: + case NVPTXISD::Suld2DArrayV2I8Trap: + case NVPTXISD::Suld2DArrayV2I16Trap: + case NVPTXISD::Suld2DArrayV2I32Trap: + case NVPTXISD::Suld2DArrayV4I8Trap: + case NVPTXISD::Suld2DArrayV4I16Trap: + case NVPTXISD::Suld2DArrayV4I32Trap: + case NVPTXISD::Suld3DI8Trap: + case NVPTXISD::Suld3DI16Trap: + case NVPTXISD::Suld3DI32Trap: + case NVPTXISD::Suld3DV2I8Trap: + case NVPTXISD::Suld3DV2I16Trap: + case NVPTXISD::Suld3DV2I32Trap: + case NVPTXISD::Suld3DV4I8Trap: + case NVPTXISD::Suld3DV4I16Trap: + case NVPTXISD::Suld3DV4I32Trap: + ResNode = SelectSurfaceIntrinsic(N); + break; case ISD::ADDRSPACECAST: ResNode = SelectAddrSpaceCast(N); break; @@ -194,6 +286,24 @@ static unsigned int getCodeAddrSpace(MemSDNode *N, return NVPTX::PTXLdStInstCode::GENERIC; } +SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) { + unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + switch (IID) { + default: + return NULL; + case Intrinsic::nvvm_texsurf_handle_internal: + return SelectTexSurfHandle(N); + } +} + +SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { + // Op 0 is the intrinsic ID + SDValue Wrapper = N->getOperand(1); + SDValue GlobalVal = Wrapper.getOperand(0); + return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64, + GlobalVal); +} + SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { SDValue Src = N->getOperand(0); AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N); @@ -2371,6 +2481,488 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { return Ret; } +SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue TexRef = N->getOperand(1); + SDValue SampRef = N->getOperand(2); + SDNode *Ret = NULL; + unsigned Opc = 0; + SmallVector<SDValue, 8> Ops; + + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::Tex1DFloatI32: + Opc = NVPTX::TEX_1D_F32_I32; + break; + case NVPTXISD::Tex1DFloatFloat: + Opc = NVPTX::TEX_1D_F32_F32; + break; + case NVPTXISD::Tex1DFloatFloatLevel: + Opc = NVPTX::TEX_1D_F32_F32_LEVEL; + break; + case NVPTXISD::Tex1DFloatFloatGrad: + Opc = NVPTX::TEX_1D_F32_F32_GRAD; + break; + case NVPTXISD::Tex1DI32I32: + Opc = NVPTX::TEX_1D_I32_I32; + break; + case NVPTXISD::Tex1DI32Float: + Opc = NVPTX::TEX_1D_I32_F32; + break; + case NVPTXISD::Tex1DI32FloatLevel: + Opc = NVPTX::TEX_1D_I32_F32_LEVEL; + break; + case NVPTXISD::Tex1DI32FloatGrad: + Opc = NVPTX::TEX_1D_I32_F32_GRAD; + break; + case NVPTXISD::Tex1DArrayFloatI32: + Opc = NVPTX::TEX_1D_ARRAY_F32_I32; + break; + case NVPTXISD::Tex1DArrayFloatFloat: + Opc = NVPTX::TEX_1D_ARRAY_F32_F32; + break; + case NVPTXISD::Tex1DArrayFloatFloatLevel: + Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL; + break; + case NVPTXISD::Tex1DArrayFloatFloatGrad: + Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; + break; + case NVPTXISD::Tex1DArrayI32I32: + Opc = NVPTX::TEX_1D_ARRAY_I32_I32; + break; + case NVPTXISD::Tex1DArrayI32Float: + Opc = NVPTX::TEX_1D_ARRAY_I32_F32; + break; + case NVPTXISD::Tex1DArrayI32FloatLevel: + Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL; + break; + case NVPTXISD::Tex1DArrayI32FloatGrad: + Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD; + break; + case NVPTXISD::Tex2DFloatI32: + Opc = NVPTX::TEX_2D_F32_I32; + break; + case NVPTXISD::Tex2DFloatFloat: + Opc = NVPTX::TEX_2D_F32_F32; + break; + case NVPTXISD::Tex2DFloatFloatLevel: + Opc = NVPTX::TEX_2D_F32_F32_LEVEL; + break; + case NVPTXISD::Tex2DFloatFloatGrad: + Opc = NVPTX::TEX_2D_F32_F32_GRAD; + break; + case NVPTXISD::Tex2DI32I32: + Opc = NVPTX::TEX_2D_I32_I32; + break; + case NVPTXISD::Tex2DI32Float: + Opc = NVPTX::TEX_2D_I32_F32; + break; + case NVPTXISD::Tex2DI32FloatLevel: + Opc = NVPTX::TEX_2D_I32_F32_LEVEL; + break; + case NVPTXISD::Tex2DI32FloatGrad: + Opc = NVPTX::TEX_2D_I32_F32_GRAD; + break; + case NVPTXISD::Tex2DArrayFloatI32: + Opc = NVPTX::TEX_2D_ARRAY_F32_I32; + break; + case NVPTXISD::Tex2DArrayFloatFloat: + Opc = NVPTX::TEX_2D_ARRAY_F32_F32; + break; + case NVPTXISD::Tex2DArrayFloatFloatLevel: + Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL; + break; + case NVPTXISD::Tex2DArrayFloatFloatGrad: + Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; + break; + case NVPTXISD::Tex2DArrayI32I32: + Opc = NVPTX::TEX_2D_ARRAY_I32_I32; + break; + case NVPTXISD::Tex2DArrayI32Float: + Opc = NVPTX::TEX_2D_ARRAY_I32_F32; + break; + case NVPTXISD::Tex2DArrayI32FloatLevel: + Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL; + break; + case NVPTXISD::Tex2DArrayI32FloatGrad: + Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD; + break; + case NVPTXISD::Tex3DFloatI32: + Opc = NVPTX::TEX_3D_F32_I32; + break; + case NVPTXISD::Tex3DFloatFloat: + Opc = NVPTX::TEX_3D_F32_F32; + break; + case NVPTXISD::Tex3DFloatFloatLevel: + Opc = NVPTX::TEX_3D_F32_F32_LEVEL; + break; + case NVPTXISD::Tex3DFloatFloatGrad: + Opc = NVPTX::TEX_3D_F32_F32_GRAD; + break; + case NVPTXISD::Tex3DI32I32: + Opc = NVPTX::TEX_3D_I32_I32; + break; + case NVPTXISD::Tex3DI32Float: + Opc = NVPTX::TEX_3D_I32_F32; + break; + case NVPTXISD::Tex3DI32FloatLevel: + Opc = NVPTX::TEX_3D_I32_F32_LEVEL; + break; + case NVPTXISD::Tex3DI32FloatGrad: + Opc = NVPTX::TEX_3D_I32_F32_GRAD; + break; + } + + Ops.push_back(TexRef); + Ops.push_back(SampRef); + + // Copy over indices + for (unsigned i = 3; i < N->getNumOperands(); ++i) { + Ops.push_back(N->getOperand(i)); + } + + Ops.push_back(Chain); + Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); + return Ret; +} + +SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue TexHandle = N->getOperand(1); + SDNode *Ret = NULL; + unsigned Opc = 0; + SmallVector<SDValue, 8> Ops; + switch (N->getOpcode()) { + default: return NULL; + case NVPTXISD::Suld1DI8Trap: + Opc = NVPTX::SULD_1D_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI16Trap: + Opc = NVPTX::SULD_1D_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI32Trap: + Opc = NVPTX::SULD_1D_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I8Trap: + Opc = NVPTX::SULD_1D_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I16Trap: + Opc = NVPTX::SULD_1D_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I32Trap: + Opc = NVPTX::SULD_1D_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I8Trap: + Opc = NVPTX::SULD_1D_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I16Trap: + Opc = NVPTX::SULD_1D_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I32Trap: + Opc = NVPTX::SULD_1D_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI8Trap: + Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI16Trap: + Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI32Trap: + Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I8Trap: + Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I16Trap: + Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I32Trap: + Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I8Trap: + Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I16Trap: + Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I32Trap: + Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI8Trap: + Opc = NVPTX::SULD_2D_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI16Trap: + Opc = NVPTX::SULD_2D_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI32Trap: + Opc = NVPTX::SULD_2D_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I8Trap: + Opc = NVPTX::SULD_2D_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I16Trap: + Opc = NVPTX::SULD_2D_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I32Trap: + Opc = NVPTX::SULD_2D_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I8Trap: + Opc = NVPTX::SULD_2D_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I16Trap: + Opc = NVPTX::SULD_2D_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I32Trap: + Opc = NVPTX::SULD_2D_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI8Trap: + Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI16Trap: + Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI32Trap: + Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I8Trap: + Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I16Trap: + Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I32Trap: + Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I8Trap: + Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I16Trap: + Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I32Trap: + Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI8Trap: + Opc = NVPTX::SULD_3D_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI16Trap: + Opc = NVPTX::SULD_3D_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI32Trap: + Opc = NVPTX::SULD_3D_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I8Trap: + Opc = NVPTX::SULD_3D_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I16Trap: + Opc = NVPTX::SULD_3D_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I32Trap: + Opc = NVPTX::SULD_3D_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I8Trap: + Opc = NVPTX::SULD_3D_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I16Trap: + Opc = NVPTX::SULD_3D_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I32Trap: + Opc = NVPTX::SULD_3D_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + } + Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); + return Ret; +} + // SelectDirectAddr - Match a direct address for DAG. // A direct address could be a globaladdress or externalsymbol. bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 93ad16911b5..fda4e71ec8d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -59,6 +59,8 @@ private: #include "NVPTXGenDAGISel.inc" SDNode *Select(SDNode *N); + SDNode *SelectIntrinsicNoChain(SDNode *N); + SDNode *SelectTexSurfHandle(SDNode *N); SDNode *SelectLoad(SDNode *N); SDNode *SelectLoadVector(SDNode *N); SDNode *SelectLDGLDUVector(SDNode *N); @@ -68,6 +70,8 @@ private: SDNode *SelectStoreRetval(SDNode *N); SDNode *SelectStoreParam(SDNode *N); SDNode *SelectAddrSpaceCast(SDNode *N); + SDNode *SelectTextureIntrinsic(SDNode *N); + SDNode *SelectSurfaceIntrinsic(SDNode *N); inline SDValue getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 8e25a657afb..0021042d903 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -328,6 +328,116 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::StoreV2"; case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4"; + case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32"; + case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; + case NVPTXISD::Tex1DFloatFloatLevel: + return "NVPTXISD::Tex1DFloatFloatLevel"; + case NVPTXISD::Tex1DFloatFloatGrad: + return "NVPTXISD::Tex1DFloatFloatGrad"; + case NVPTXISD::Tex1DI32I32: return "NVPTXISD::Tex1DI32I32"; + case NVPTXISD::Tex1DI32Float: return "NVPTXISD::Tex1DI32Float"; + case NVPTXISD::Tex1DI32FloatLevel: + return "NVPTXISD::Tex1DI32FloatLevel"; + case NVPTXISD::Tex1DI32FloatGrad: + return "NVPTXISD::Tex1DI32FloatGrad"; + case NVPTXISD::Tex1DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32"; + case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; + case NVPTXISD::Tex1DArrayFloatFloatLevel: + return "NVPTXISD::Tex2DArrayFloatFloatLevel"; + case NVPTXISD::Tex1DArrayFloatFloatGrad: + return "NVPTXISD::Tex2DArrayFloatFloatGrad"; + case NVPTXISD::Tex1DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32"; + case NVPTXISD::Tex1DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float"; + case NVPTXISD::Tex1DArrayI32FloatLevel: + return "NVPTXISD::Tex2DArrayI32FloatLevel"; + case NVPTXISD::Tex1DArrayI32FloatGrad: + return "NVPTXISD::Tex2DArrayI32FloatGrad"; + case NVPTXISD::Tex2DFloatI32: return "NVPTXISD::Tex2DFloatI32"; + case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; + case NVPTXISD::Tex2DFloatFloatLevel: + return "NVPTXISD::Tex2DFloatFloatLevel"; + case NVPTXISD::Tex2DFloatFloatGrad: + return "NVPTXISD::Tex2DFloatFloatGrad"; + case NVPTXISD::Tex2DI32I32: return "NVPTXISD::Tex2DI32I32"; + case NVPTXISD::Tex2DI32Float: return "NVPTXISD::Tex2DI32Float"; + case NVPTXISD::Tex2DI32FloatLevel: + return "NVPTXISD::Tex2DI32FloatLevel"; + case NVPTXISD::Tex2DI32FloatGrad: + return "NVPTXISD::Tex2DI32FloatGrad"; + case NVPTXISD::Tex2DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32"; + case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; + case NVPTXISD::Tex2DArrayFloatFloatLevel: + return "NVPTXISD::Tex2DArrayFloatFloatLevel"; + case NVPTXISD::Tex2DArrayFloatFloatGrad: + return "NVPTXISD::Tex2DArrayFloatFloatGrad"; + case NVPTXISD::Tex2DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32"; + case NVPTXISD::Tex2DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float"; + case NVPTXISD::Tex2DArrayI32FloatLevel: + return "NVPTXISD::Tex2DArrayI32FloatLevel"; + case NVPTXISD::Tex2DArrayI32FloatGrad: + return "NVPTXISD::Tex2DArrayI32FloatGrad"; + case NVPTXISD::Tex3DFloatI32: return "NVPTXISD::Tex3DFloatI32"; + case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; + case NVPTXISD::Tex3DFloatFloatLevel: + return "NVPTXISD::Tex3DFloatFloatLevel"; + case NVPTXISD::Tex3DFloatFloatGrad: + return "NVPTXISD::Tex3DFloatFloatGrad"; + case NVPTXISD::Tex3DI32I32: return "NVPTXISD::Tex3DI32I32"; + case NVPTXISD::Tex3DI32Float: return "NVPTXISD::Tex3DI32Float"; + case NVPTXISD::Tex3DI32FloatLevel: + return "NVPTXISD::Tex3DI32FloatLevel"; + case NVPTXISD::Tex3DI32FloatGrad: + return "NVPTXISD::Tex3DI32FloatGrad"; + + case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; + case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; + case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; + case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; + case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; + case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; + case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; + case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; + case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; + + case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; + case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; + case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; + case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; + case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; + case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; + case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; + case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; + case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; + + case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; + case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; + case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; + case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; + case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; + case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; + case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; + case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; + case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; + + case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; + case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; + case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; + case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; + case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; + case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; + case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; + case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; + case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; + + case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; + case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; + case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; + case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; + case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; + case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; + case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; + case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; + case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; } } @@ -1891,6 +2001,195 @@ bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { return false; } +static unsigned getOpcForTextureInstr(unsigned Intrinsic) { + switch (Intrinsic) { + default: + return 0; + + case Intrinsic::nvvm_tex_1d_v4f32_i32: + return NVPTXISD::Tex1DFloatI32; + case Intrinsic::nvvm_tex_1d_v4f32_f32: + return NVPTXISD::Tex1DFloatFloat; + case Intrinsic::nvvm_tex_1d_level_v4f32_f32: + return NVPTXISD::Tex1DFloatFloatLevel; + case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: + return NVPTXISD::Tex1DFloatFloatGrad; + case Intrinsic::nvvm_tex_1d_v4i32_i32: + return NVPTXISD::Tex1DI32I32; + case Intrinsic::nvvm_tex_1d_v4i32_f32: + return NVPTXISD::Tex1DI32Float; + case Intrinsic::nvvm_tex_1d_level_v4i32_f32: + return NVPTXISD::Tex1DI32FloatLevel; + case Intrinsic::nvvm_tex_1d_grad_v4i32_f32: + return NVPTXISD::Tex1DI32FloatGrad; + + case Intrinsic::nvvm_tex_1d_array_v4f32_i32: + return NVPTXISD::Tex1DArrayFloatI32; + case Intrinsic::nvvm_tex_1d_array_v4f32_f32: + return NVPTXISD::Tex1DArrayFloatFloat; + case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: + return NVPTXISD::Tex1DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: + return NVPTXISD::Tex1DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_1d_array_v4i32_i32: + return NVPTXISD::Tex1DArrayI32I32; + case Intrinsic::nvvm_tex_1d_array_v4i32_f32: + return NVPTXISD::Tex1DArrayI32Float; + case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32: + return NVPTXISD::Tex1DArrayI32FloatLevel; + case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32: + return NVPTXISD::Tex1DArrayI32FloatGrad; + + case Intrinsic::nvvm_tex_2d_v4f32_i32: + return NVPTXISD::Tex2DFloatI32; + case Intrinsic::nvvm_tex_2d_v4f32_f32: + return NVPTXISD::Tex2DFloatFloat; + case Intrinsic::nvvm_tex_2d_level_v4f32_f32: + return NVPTXISD::Tex2DFloatFloatLevel; + case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: + return NVPTXISD::Tex2DFloatFloatGrad; + case Intrinsic::nvvm_tex_2d_v4i32_i32: + return NVPTXISD::Tex2DI32I32; + case Intrinsic::nvvm_tex_2d_v4i32_f32: + return NVPTXISD::Tex2DI32Float; + case Intrinsic::nvvm_tex_2d_level_v4i32_f32: + return NVPTXISD::Tex2DI32FloatLevel; + case Intrinsic::nvvm_tex_2d_grad_v4i32_f32: + return NVPTXISD::Tex2DI32FloatGrad; + + case Intrinsic::nvvm_tex_2d_array_v4f32_i32: + return NVPTXISD::Tex2DArrayFloatI32; + case Intrinsic::nvvm_tex_2d_array_v4f32_f32: + return NVPTXISD::Tex2DArrayFloatFloat; + case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: + return NVPTXISD::Tex2DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: + return NVPTXISD::Tex2DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_2d_array_v4i32_i32: + return NVPTXISD::Tex2DArrayI32I32; + case Intrinsic::nvvm_tex_2d_array_v4i32_f32: + return NVPTXISD::Tex2DArrayI32Float; + case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32: + return NVPTXISD::Tex2DArrayI32FloatLevel; + case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32: + return NVPTXISD::Tex2DArrayI32FloatGrad; + + case Intrinsic::nvvm_tex_3d_v4f32_i32: + return NVPTXISD::Tex3DFloatI32; + case Intrinsic::nvvm_tex_3d_v4f32_f32: + return NVPTXISD::Tex3DFloatFloat; + case Intrinsic::nvvm_tex_3d_level_v4f32_f32: + return NVPTXISD::Tex3DFloatFloatLevel; + case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: + return NVPTXISD::Tex3DFloatFloatGrad; + case Intrinsic::nvvm_tex_3d_v4i32_i32: + return NVPTXISD::Tex3DI32I32; + case Intrinsic::nvvm_tex_3d_v4i32_f32: + return NVPTXISD::Tex3DI32Float; + case Intrinsic::nvvm_tex_3d_level_v4i32_f32: + return NVPTXISD::Tex3DI32FloatLevel; + case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: + return NVPTXISD::Tex3DI32FloatGrad; + } +} + +static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { + switch (Intrinsic) { + default: + return 0; + case Intrinsic::nvvm_suld_1d_i8_trap: + return NVPTXISD::Suld1DI8Trap; + case Intrinsic::nvvm_suld_1d_i16_trap: + return NVPTXISD::Suld1DI16Trap; + case Intrinsic::nvvm_suld_1d_i32_trap: + return NVPTXISD::Suld1DI32Trap; + case Intrinsic::nvvm_suld_1d_v2i8_trap: + return NVPTXISD::Suld1DV2I8Trap; + case Intrinsic::nvvm_suld_1d_v2i16_trap: + return NVPTXISD::Suld1DV2I16Trap; + case Intrinsic::nvvm_suld_1d_v2i32_trap: + return NVPTXISD::Suld1DV2I32Trap; + case Intrinsic::nvvm_suld_1d_v4i8_trap: + return NVPTXISD::Suld1DV4I8Trap; + case Intrinsic::nvvm_suld_1d_v4i16_trap: + return NVPTXISD::Suld1DV4I16Trap; + case Intrinsic::nvvm_suld_1d_v4i32_trap: + return NVPTXISD::Suld1DV4I32Trap; + case Intrinsic::nvvm_suld_1d_array_i8_trap: + return NVPTXISD::Suld1DArrayI8Trap; + case Intrinsic::nvvm_suld_1d_array_i16_trap: + return NVPTXISD::Suld1DArrayI16Trap; + case Intrinsic::nvvm_suld_1d_array_i32_trap: + return NVPTXISD::Suld1DArrayI32Trap; + case Intrinsic::nvvm_suld_1d_array_v2i8_trap: + return NVPTXISD::Suld1DArrayV2I8Trap; + case Intrinsic::nvvm_suld_1d_array_v2i16_trap: + return NVPTXISD::Suld1DArrayV2I16Trap; + case Intrinsic::nvvm_suld_1d_array_v2i32_trap: + return NVPTXISD::Suld1DArrayV2I32Trap; + case Intrinsic::nvvm_suld_1d_array_v4i8_trap: + return NVPTXISD::Suld1DArrayV4I8Trap; + case Intrinsic::nvvm_suld_1d_array_v4i16_trap: + return NVPTXISD::Suld1DArrayV4I16Trap; + case Intrinsic::nvvm_suld_1d_array_v4i32_trap: + return NVPTXISD::Suld1DArrayV4I32Trap; + case Intrinsic::nvvm_suld_2d_i8_trap: + return NVPTXISD::Suld2DI8Trap; + case Intrinsic::nvvm_suld_2d_i16_trap: + return NVPTXISD::Suld2DI16Trap; + case Intrinsic::nvvm_suld_2d_i32_trap: + return NVPTXISD::Suld2DI32Trap; + case Intrinsic::nvvm_suld_2d_v2i8_trap: + return NVPTXISD::Suld2DV2I8Trap; + case Intrinsic::nvvm_suld_2d_v2i16_trap: + return NVPTXISD::Suld2DV2I16Trap; + case Intrinsic::nvvm_suld_2d_v2i32_trap: + return NVPTXISD::Suld2DV2I32Trap; + case Intrinsic::nvvm_suld_2d_v4i8_trap: + return NVPTXISD::Suld2DV4I8Trap; + case Intrinsic::nvvm_suld_2d_v4i16_trap: + return NVPTXISD::Suld2DV4I16Trap; + case Intrinsic::nvvm_suld_2d_v4i32_trap: + return NVPTXISD::Suld2DV4I32Trap; + case Intrinsic::nvvm_suld_2d_array_i8_trap: + return NVPTXISD::Suld2DArrayI8Trap; + case Intrinsic::nvvm_suld_2d_array_i16_trap: + return NVPTXISD::Suld2DArrayI16Trap; + case Intrinsic::nvvm_suld_2d_array_i32_trap: + return NVPTXISD::Suld2DArrayI32Trap; + case Intrinsic::nvvm_suld_2d_array_v2i8_trap: + return NVPTXISD::Suld2DArrayV2I8Trap; + case Intrinsic::nvvm_suld_2d_array_v2i16_trap: + return NVPTXISD::Suld2DArrayV2I16Trap; + case Intrinsic::nvvm_suld_2d_array_v2i32_trap: + return NVPTXISD::Suld2DArrayV2I32Trap; + case Intrinsic::nvvm_suld_2d_array_v4i8_trap: + return NVPTXISD::Suld2DArrayV4I8Trap; + case Intrinsic::nvvm_suld_2d_array_v4i16_trap: + return NVPTXISD::Suld2DArrayV4I16Trap; + case Intrinsic::nvvm_suld_2d_array_v4i32_trap: + return NVPTXISD::Suld2DArrayV4I32Trap; + case Intrinsic::nvvm_suld_3d_i8_trap: + return NVPTXISD::Suld3DI8Trap; + case Intrinsic::nvvm_suld_3d_i16_trap: + return NVPTXISD::Suld3DI16Trap; + case Intrinsic::nvvm_suld_3d_i32_trap: + return NVPTXISD::Suld3DI32Trap; + case Intrinsic::nvvm_suld_3d_v2i8_trap: + return NVPTXISD::Suld3DV2I8Trap; + case Intrinsic::nvvm_suld_3d_v2i16_trap: + return NVPTXISD::Suld3DV2I16Trap; + case Intrinsic::nvvm_suld_3d_v2i32_trap: + return NVPTXISD::Suld3DV2I32Trap; + case Intrinsic::nvvm_suld_3d_v4i8_trap: + return NVPTXISD::Suld3DV4I8Trap; + case Intrinsic::nvvm_suld_3d_v4i16_trap: + return NVPTXISD::Suld3DV4I16Trap; + case Intrinsic::nvvm_suld_3d_v4i32_trap: + return NVPTXISD::Suld3DV4I32Trap; + } +} + // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as // TgtMemIntrinsic // because we need the information that is only available in the "Value" type @@ -1944,6 +2243,142 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 0; return true; + case Intrinsic::nvvm_tex_1d_v4f32_i32: + case Intrinsic::nvvm_tex_1d_v4f32_f32: + case Intrinsic::nvvm_tex_1d_level_v4f32_f32: + case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_1d_array_v4f32_i32: + case Intrinsic::nvvm_tex_1d_array_v4f32_f32: + case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_2d_v4f32_i32: + case Intrinsic::nvvm_tex_2d_v4f32_f32: + case Intrinsic::nvvm_tex_2d_level_v4f32_f32: + case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_2d_array_v4f32_i32: + case Intrinsic::nvvm_tex_2d_array_v4f32_f32: + case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_3d_v4f32_i32: + case Intrinsic::nvvm_tex_3d_v4f32_f32: + case Intrinsic::nvvm_tex_3d_level_v4f32_f32: + case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: { + Info.opc = getOpcForTextureInstr(Intrinsic); + Info.memVT = MVT::f32; + Info.ptrVal = NULL; + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + case Intrinsic::nvvm_tex_1d_v4i32_i32: + case Intrinsic::nvvm_tex_1d_v4i32_f32: + case Intrinsic::nvvm_tex_1d_level_v4i32_f32: + case Intrinsic::nvvm_tex_1d_grad_v4i32_f32: + case Intrinsic::nvvm_tex_1d_array_v4i32_i32: + case Intrinsic::nvvm_tex_1d_array_v4i32_f32: + case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32: + case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32: + case Intrinsic::nvvm_tex_2d_v4i32_i32: + case Intrinsic::nvvm_tex_2d_v4i32_f32: + case Intrinsic::nvvm_tex_2d_level_v4i32_f32: + case Intrinsic::nvvm_tex_2d_grad_v4i32_f32: + case Intrinsic::nvvm_tex_2d_array_v4i32_i32: + case Intrinsic::nvvm_tex_2d_array_v4i32_f32: + case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32: + case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32: + case Intrinsic::nvvm_tex_3d_v4i32_i32: + case Intrinsic::nvvm_tex_3d_v4i32_f32: + case Intrinsic::nvvm_tex_3d_level_v4i32_f32: + case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: { + Info.opc = getOpcForTextureInstr(Intrinsic); + Info.memVT = MVT::i32; + Info.ptrVal = NULL; + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + case Intrinsic::nvvm_suld_1d_i8_trap: + case Intrinsic::nvvm_suld_1d_v2i8_trap: + case Intrinsic::nvvm_suld_1d_v4i8_trap: + case Intrinsic::nvvm_suld_1d_array_i8_trap: + case Intrinsic::nvvm_suld_1d_array_v2i8_trap: + case Intrinsic::nvvm_suld_1d_array_v4i8_trap: + case Intrinsic::nvvm_suld_2d_i8_trap: + case Intrinsic::nvvm_suld_2d_v2i8_trap: + case Intrinsic::nvvm_suld_2d_v4i8_trap: + case Intrinsic::nvvm_suld_2d_array_i8_trap: + case Intrinsic::nvvm_suld_2d_array_v2i8_trap: + case Intrinsic::nvvm_suld_2d_array_v4i8_trap: + case Intrinsic::nvvm_suld_3d_i8_trap: + case Intrinsic::nvvm_suld_3d_v2i8_trap: + case Intrinsic::nvvm_suld_3d_v4i8_trap: { + Info.opc = getOpcForSurfaceInstr(Intrinsic); + Info.memVT = MVT::i8; + Info.ptrVal = NULL; + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + case Intrinsic::nvvm_suld_1d_i16_trap: + case Intrinsic::nvvm_suld_1d_v2i16_trap: + case Intrinsic::nvvm_suld_1d_v4i16_trap: + case Intrinsic::nvvm_suld_1d_array_i16_trap: + case Intrinsic::nvvm_suld_1d_array_v2i16_trap: + case Intrinsic::nvvm_suld_1d_array_v4i16_trap: + case Intrinsic::nvvm_suld_2d_i16_trap: + case Intrinsic::nvvm_suld_2d_v2i16_trap: + case Intrinsic::nvvm_suld_2d_v4i16_trap: + case Intrinsic::nvvm_suld_2d_array_i16_trap: + case Intrinsic::nvvm_suld_2d_array_v2i16_trap: + case Intrinsic::nvvm_suld_2d_array_v4i16_trap: + case Intrinsic::nvvm_suld_3d_i16_trap: + case Intrinsic::nvvm_suld_3d_v2i16_trap: + case Intrinsic::nvvm_suld_3d_v4i16_trap: { + Info.opc = getOpcForSurfaceInstr(Intrinsic); + Info.memVT = MVT::i16; + Info.ptrVal = NULL; + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + case Intrinsic::nvvm_suld_1d_i32_trap: + case Intrinsic::nvvm_suld_1d_v2i32_trap: + case Intrinsic::nvvm_suld_1d_v4i32_trap: + case Intrinsic::nvvm_suld_1d_array_i32_trap: + case Intrinsic::nvvm_suld_1d_array_v2i32_trap: + case Intrinsic::nvvm_suld_1d_array_v4i32_trap: + case Intrinsic::nvvm_suld_2d_i32_trap: + case Intrinsic::nvvm_suld_2d_v2i32_trap: + case Intrinsic::nvvm_suld_2d_v4i32_trap: + case Intrinsic::nvvm_suld_2d_array_i32_trap: + case Intrinsic::nvvm_suld_2d_array_v2i32_trap: + case Intrinsic::nvvm_suld_2d_array_v4i32_trap: + case Intrinsic::nvvm_suld_3d_i32_trap: + case Intrinsic::nvvm_suld_3d_v2i32_trap: + case Intrinsic::nvvm_suld_3d_v4i32_trap: { + Info.opc = getOpcForSurfaceInstr(Intrinsic); + Info.memVT = MVT::i32; + Info.ptrVal = NULL; + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + } return false; } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index c1e8c21958c..38f64375425 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -70,7 +70,100 @@ enum NodeType { StoreParamU32, // to zext and store a <32bit value, not used currently StoreRetval, StoreRetvalV2, - StoreRetvalV4 + StoreRetvalV4, + + // Texture intrinsics + Tex1DFloatI32, + Tex1DFloatFloat, + Tex1DFloatFloatLevel, + Tex1DFloatFloatGrad, + Tex1DI32I32, + Tex1DI32Float, + Tex1DI32FloatLevel, + Tex1DI32FloatGrad, + Tex1DArrayFloatI32, + Tex1DArrayFloatFloat, + Tex1DArrayFloatFloatLevel, + Tex1DArrayFloatFloatGrad, + Tex1DArrayI32I32, + Tex1DArrayI32Float, + Tex1DArrayI32FloatLevel, + Tex1DArrayI32FloatGrad, + Tex2DFloatI32, + Tex2DFloatFloat, + Tex2DFloatFloatLevel, + Tex2DFloatFloatGrad, + Tex2DI32I32, + Tex2DI32Float, + Tex2DI32FloatLevel, + Tex2DI32FloatGrad, + Tex2DArrayFloatI32, + Tex2DArrayFloatFloat, + Tex2DArrayFloatFloatLevel, + Tex2DArrayFloatFloatGrad, + Tex2DArrayI32I32, + Tex2DArrayI32Float, + Tex2DArrayI32FloatLevel, + Tex2DArrayI32FloatGrad, + Tex3DFloatI32, + Tex3DFloatFloat, + Tex3DFloatFloatLevel, + Tex3DFloatFloatGrad, + Tex3DI32I32, + Tex3DI32Float, + Tex3DI32FloatLevel, + Tex3DI32FloatGrad, + + // Surface intrinsics + Suld1DI8Trap, + Suld1DI16Trap, + Suld1DI32Trap, + Suld1DV2I8Trap, + Suld1DV2I16Trap, + Suld1DV2I32Trap, + Suld1DV4I8Trap, + Suld1DV4I16Trap, + Suld1DV4I32Trap, + + Suld1DArrayI8Trap, + Suld1DArrayI16Trap, + Suld1DArrayI32Trap, + Suld1DArrayV2I8Trap, + Suld1DArrayV2I16Trap, + Suld1DArrayV2I32Trap, + Suld1DArrayV4I8Trap, + Suld1DArrayV4I16Trap, + Suld1DArrayV4I32Trap, + + Suld2DI8Trap, + Suld2DI16Trap, + Suld2DI32Trap, + Suld2DV2I8Trap, + Suld2DV2I16Trap, + Suld2DV2I32Trap, + Suld2DV4I8Trap, + Suld2DV4I16Trap, + Suld2DV4I32Trap, + + Suld2DArrayI8Trap, + Suld2DArrayI16Trap, + Suld2DArrayI32Trap, + Suld2DArrayV2I8Trap, + Suld2DArrayV2I16Trap, + Suld2DArrayV2I32Trap, + Suld2DArrayV4I8Trap, + Suld2DArrayV4I16Trap, + Suld2DArrayV4I32Trap, + + Suld3DI8Trap, + Suld3DI16Trap, + Suld3DI32Trap, + Suld3DV2I8Trap, + Suld3DV2I16Trap, + Suld3DV2I32Trap, + Suld3DV4I8Trap, + Suld3DV4I16Trap, + Suld3DV4I32Trap }; } diff --git a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp new file mode 100644 index 00000000000..5b077638140 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp @@ -0,0 +1,178 @@ +//===-- NVPTXImageOptimizer.cpp - Image optimization pass -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements IR-level optimizations of image access code, +// including: +// +// 1. Eliminate istypep intrinsics when image access qualifier is known +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXUtilities.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ConstantFolding.h" + +using namespace llvm; + +namespace { +class NVPTXImageOptimizer : public FunctionPass { +private: + static char ID; + SmallVector<Instruction*, 4> InstrToDelete; + +public: + NVPTXImageOptimizer(); + + bool runOnFunction(Function &F); + +private: + bool replaceIsTypePSampler(Instruction &I); + bool replaceIsTypePSurface(Instruction &I); + bool replaceIsTypePTexture(Instruction &I); + Value *cleanupValue(Value *V); + void replaceWith(Instruction *From, ConstantInt *To); +}; +} + +char NVPTXImageOptimizer::ID = 0; + +NVPTXImageOptimizer::NVPTXImageOptimizer() + : FunctionPass(ID) {} + +bool NVPTXImageOptimizer::runOnFunction(Function &F) { + bool Changed = false; + InstrToDelete.clear(); + + // Look for call instructions in the function + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; + ++BI) { + for (BasicBlock::iterator I = (*BI).begin(), E = (*BI).end(); + I != E; ++I) { + Instruction &Instr = *I; + if (CallInst *CI = dyn_cast<CallInst>(I)) { + Function *CalledF = CI->getCalledFunction(); + if (CalledF && CalledF->isIntrinsic()) { + // This is an intrinsic function call, check if its an istypep + switch (CalledF->getIntrinsicID()) { + default: break; + case Intrinsic::nvvm_istypep_sampler: + Changed |= replaceIsTypePSampler(Instr); + break; + case Intrinsic::nvvm_istypep_surface: + Changed |= replaceIsTypePSurface(Instr); + break; + case Intrinsic::nvvm_istypep_texture: + Changed |= replaceIsTypePTexture(Instr); + break; + } + } + } + } + } + + // Delete any istypep instances we replaced in the IR + for (unsigned i = 0, e = InstrToDelete.size(); i != e; ++i) + InstrToDelete[i]->eraseFromParent(); + + return Changed; +} + +bool NVPTXImageOptimizer::replaceIsTypePSampler(Instruction &I) { + Value *TexHandle = cleanupValue(I.getOperand(0)); + if (isSampler(*TexHandle)) { + // This is an OpenCL sampler, so it must be a samplerref + replaceWith(&I, ConstantInt::getTrue(I.getContext())); + return true; + } else if (isImageWriteOnly(*TexHandle) || + isImageReadWrite(*TexHandle) || + isImageReadOnly(*TexHandle)) { + // This is an OpenCL image, so it cannot be a samplerref + replaceWith(&I, ConstantInt::getFalse(I.getContext())); + return true; + } else { + // The image type is unknown, so we cannot eliminate the intrinsic + return false; + } +} + +bool NVPTXImageOptimizer::replaceIsTypePSurface(Instruction &I) { + Value *TexHandle = cleanupValue(I.getOperand(0)); + if (isImageReadWrite(*TexHandle) || + isImageWriteOnly(*TexHandle)) { + // This is an OpenCL read-only/read-write image, so it must be a surfref + replaceWith(&I, ConstantInt::getTrue(I.getContext())); + return true; + } else if (isImageReadOnly(*TexHandle) || + isSampler(*TexHandle)) { + // This is an OpenCL read-only/ imageor sampler, so it cannot be + // a surfref + replaceWith(&I, ConstantInt::getFalse(I.getContext())); + return true; + } else { + // The image type is unknown, so we cannot eliminate the intrinsic + return false; + } +} + +bool NVPTXImageOptimizer::replaceIsTypePTexture(Instruction &I) { + Value *TexHandle = cleanupValue(I.getOperand(0)); + if (isImageReadOnly(*TexHandle)) { + // This is an OpenCL read-only image, so it must be a texref + replaceWith(&I, ConstantInt::getTrue(I.getContext())); + return true; + } else if (isImageWriteOnly(*TexHandle) || + isImageReadWrite(*TexHandle) || + isSampler(*TexHandle)) { + // This is an OpenCL read-write/write-only image or a sampler, so it + // cannot be a texref + replaceWith(&I, ConstantInt::getFalse(I.getContext())); + return true; + } else { + // The image type is unknown, so we cannot eliminate the intrinsic + return false; + } +} + +void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) { + // We implement "poor man's DCE" here to make sure any code that is no longer + // live is actually unreachable and can be trivially eliminated by the + // unreachable block elimiation pass. + for (CallInst::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE; ++UI) { + if (BranchInst *BI = dyn_cast<BranchInst>(*UI)) { + if (BI->isUnconditional()) continue; + BasicBlock *Dest; + if (To->isZero()) + // Get false block + Dest = BI->getSuccessor(1); + else + // Get true block + Dest = BI->getSuccessor(0); + BranchInst::Create(Dest, BI); + InstrToDelete.push_back(BI); + } + } + From->replaceAllUsesWith(To); + InstrToDelete.push_back(From); +} + +Value *NVPTXImageOptimizer::cleanupValue(Value *V) { + if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) { + return cleanupValue(EVI->getAggregateOperand()); + } + return V; +} + +FunctionPass *llvm::createNVPTXImageOptimizerPass() { + return new NVPTXImageOptimizer(); +} diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 14049b1cf8e..5e228fc396c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1666,6 +1666,9 @@ def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen (MoveParam texternalsym:$src)))), (nvvm_move_ptr32 texternalsym:$src)>; +def texsurf_handles + : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), + "mov.u64 \t$result, $src;", []>; //----------------------------------- // Compiler Error Warn @@ -1686,6 +1689,1826 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), [(int_nvvm_compiler_error Int64Regs:$a)]>; +//----------------------------------- +// Texture Intrinsics +//----------------------------------- + +// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be +// also defined in NVPTXReplaceImageHandles.cpp + + +// Texture fetch instructions using handles +def TEX_1D_F32_I32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), + "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; +def TEX_1D_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), + "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; +def TEX_1D_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod), + "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x\\}], $lod;", + []>; +def TEX_1D_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; +def TEX_1D_I32_I32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), + "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; +def TEX_1D_I32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), + "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; +def TEX_1D_I32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x\\}], $lod;", + []>; +def TEX_1D_I32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; + +def TEX_1D_ARRAY_F32_I32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}];", + []>; +def TEX_1D_ARRAY_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), + "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}];", + []>; +def TEX_1D_ARRAY_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}], $lod;", + []>; +def TEX_1D_ARRAY_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; +def TEX_1D_ARRAY_I32_I32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}];", + []>; +def TEX_1D_ARRAY_I32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), + "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}];", + []>; +def TEX_1D_ARRAY_I32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}], $lod;", + []>; +def TEX_1D_ARRAY_I32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; + +def TEX_2D_F32_I32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TEX_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TEX_2D_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$lod), + "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}], $lod;", + []>; +def TEX_2D_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; +def TEX_2D_I32_I32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TEX_2D_I32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TEX_2D_I32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$lod), + "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}], $lod;", + []>; +def TEX_2D_I32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; + +def TEX_2D_ARRAY_F32_I32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$y), + "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_2D_ARRAY_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y), + "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_2D_ARRAY_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$lod), + "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", + []>; +def TEX_2D_ARRAY_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; +def TEX_2D_ARRAY_I32_I32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$y), + "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_2D_ARRAY_I32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y), + "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_2D_ARRAY_I32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$lod), + "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", + []>; +def TEX_2D_ARRAY_I32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; + +def TEX_3D_F32_I32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$z), + "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_3D_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z), + "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_3D_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, Float32Regs:$lod), + "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_3D_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$gradx2, Float32Regs:$grady0, + Float32Regs:$grady1, Float32Regs:$grady2), + "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], " + "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " + "\\{$grady0, $grady1, $grady2, $grady2\\};", + []>; +def TEX_3D_I32_I32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$z), + "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_3D_I32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z), + "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_3D_I32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, Float32Regs:$lod), + "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_3D_I32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$gradx2, Float32Regs:$grady0, + Float32Regs:$grady1, Float32Regs:$grady2), + "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], " + "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " + "\\{$grady0, $grady1, $grady2, $grady2\\};", + []>; + + +// Surface load instructions +def SULD_1D_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; + + +//----------------------------------- +// Texture Query Intrinsics +//----------------------------------- +def TXQ_CHANNEL_ORDER + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.channel_order.b32 \t$d, [$a];", + []>; +def TXQ_CHANNEL_DATA_TYPE + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.channel_data_type.b32 \t$d, [$a];", + []>; +def TXQ_WIDTH + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.width.b32 \t$d, [$a];", + []>; +def TXQ_HEIGHT + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.height.b32 \t$d, [$a];", + []>; +def TXQ_DEPTH + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.depth.b32 \t$d, [$a];", + []>; +def TXQ_ARRAY_SIZE + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.array_size.b32 \t$d, [$a];", + []>; +def TXQ_NUM_SAMPLES + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.num_samples.b32 \t$d, [$a];", + []>; +def TXQ_NUM_MIPMAP_LEVELS + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.num_mipmap_levels.b32 \t$d, [$a];", + []>; + +def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), + (TXQ_CHANNEL_ORDER Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), + (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_width Int64Regs:$a), + (TXQ_WIDTH Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_height Int64Regs:$a), + (TXQ_HEIGHT Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_depth Int64Regs:$a), + (TXQ_DEPTH Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), + (TXQ_ARRAY_SIZE Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), + (TXQ_NUM_SAMPLES Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), + (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>; + + +//----------------------------------- +// Surface Query Intrinsics +//----------------------------------- +def SUQ_CHANNEL_ORDER + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.channel_order.b32 \t$d, [$a];", + []>; +def SUQ_CHANNEL_DATA_TYPE + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.channel_data_type.b32 \t$d, [$a];", + []>; +def SUQ_WIDTH + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.width.b32 \t$d, [$a];", + []>; +def SUQ_HEIGHT + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.height.b32 \t$d, [$a];", + []>; +def SUQ_DEPTH + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.depth.b32 \t$d, [$a];", + []>; +def SUQ_ARRAY_SIZE + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.array_size.b32 \t$d, [$a];", + []>; + +def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), + (SUQ_CHANNEL_ORDER Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), + (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_width Int64Regs:$a), + (SUQ_WIDTH Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_height Int64Regs:$a), + (SUQ_HEIGHT Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_depth Int64Regs:$a), + (SUQ_DEPTH Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), + (SUQ_ARRAY_SIZE Int64Regs:$a)>; + + +//===- Handle Query -------------------------------------------------------===// + +// TODO: These intrinsics are not yet finalized, pending PTX ISA design work +def ISTYPEP_SAMPLER + : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + "istypep.samplerref \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; +def ISTYPEP_SURFACE + : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + "istypep.surfref \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; +def ISTYPEP_TEXTURE + : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + "istypep.texref \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; + +//===- Surface Stores -----------------------------------------------------===// + +// Unformatted + +def SUST_B_1D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_1D_ARRAY_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), + "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g), + "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_ARRAY_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_ARRAY_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_2D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_2D_ARRAY_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r), + "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g), + "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_ARRAY_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_ARRAY_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_3D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_3D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_3D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + +// Formatted + +def SUST_P_1D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_P_1D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_P_1D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_P_1D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_P_1D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_P_1D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_P_1D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_P_1D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_P_1D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; + + +def SUST_P_1D_ARRAY_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_P_1D_ARRAY_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_P_1D_ARRAY_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), + "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_P_1D_ARRAY_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_P_1D_ARRAY_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_P_1D_ARRAY_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g), + "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_P_1D_ARRAY_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_1D_ARRAY_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_1D_ARRAY_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_P_2D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_P_2D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_P_2D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_P_2D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_P_2D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_P_2D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_P_2D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_2D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_2D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_P_2D_ARRAY_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_P_2D_ARRAY_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_P_2D_ARRAY_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r), + "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_P_2D_ARRAY_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_P_2D_ARRAY_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_P_2D_ARRAY_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g), + "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_P_2D_ARRAY_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_2D_ARRAY_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_2D_ARRAY_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_P_3D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_P_3D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_P_3D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_P_3D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_P_3D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_P_3D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_P_3D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_3D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_3D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +// Surface store instruction patterns +// I'm not sure why we can't just include these in the instruction definitions, +// but TableGen complains of type errors :( + +def : Pat<(int_nvvm_sust_b_1d_i8_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i16_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i8_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i16_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i8_trap + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i16_trap + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i32_trap + Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_1d_array_i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), + (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), + (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_array_i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_3d_i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_B_3D_B8_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_B_3D_B16_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + (SUST_B_3D_B32_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_3D_V2B8_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_3D_V2B16_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + (SUST_B_3D_V2B32_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_3D_V4B8_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_3D_V4B16_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_3D_V4B32_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + + +def : Pat<(int_nvvm_sust_p_1d_i8_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_i16_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_v2i8_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_v2i16_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_v2i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_v4i8_trap + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_1d_v4i16_trap + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_1d_v4i32_trap + Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_p_1d_array_i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_array_i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_array_i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), + (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_p_2d_i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_v2i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_v2i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_v2i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), + (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_v4i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_2d_v4i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_2d_v4i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_p_2d_array_i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_array_i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_array_i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_p_3d_i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_P_3D_B8_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_3d_i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_P_3D_B16_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_3d_i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + (SUST_P_3D_B32_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_3d_v2i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_P_3D_V2B8_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_3d_v2i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_P_3D_V2B16_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_3d_v2i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + (SUST_P_3D_V2B32_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_3d_v4i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_3D_V4B8_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_3d_v4i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_3D_V4B16_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_3d_v4i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_P_3D_V4B32_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + //===-- Old PTX Back-end Intrinsics ---------------------------------------===// diff --git a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h new file mode 100644 index 00000000000..67fb3905079 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h @@ -0,0 +1,46 @@ +//===-- NVPTXMachineFunctionInfo.h - NVPTX-specific Function Info --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class is attached to a MachineFunction instance and tracks target- +// dependent information +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { +class NVPTXMachineFunctionInfo : public MachineFunctionInfo { +private: + /// Stores a mapping from index to symbol name for removing image handles + /// on Fermi. + SmallVector<std::string, 8> ImageHandleList; + +public: + NVPTXMachineFunctionInfo(MachineFunction &MF) {} + + /// Returns the index for the symbol \p Symbol. If the symbol was previously, + /// added, the same index is returned. Otherwise, the symbol is added and the + /// new index is returned. + unsigned getImageHandleSymbolIndex(const char *Symbol) { + // Is the symbol already present? + for (unsigned i = 0, e = ImageHandleList.size(); i != e; ++i) + if (ImageHandleList[i] == std::string(Symbol)) + return i; + // Nope, insert it + ImageHandleList.push_back(Symbol); + return ImageHandleList.size()-1; + } + + /// Returns the symbol name at the given index. + const char *getImageHandleSymbol(unsigned Idx) const { + assert(ImageHandleList.size() > Idx && "Bad index"); + return ImageHandleList[Idx].c_str(); + } +}; +} diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp new file mode 100644 index 00000000000..f05f18880e8 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -0,0 +1,357 @@ +//===-- NVPTXReplaceImageHandles.cpp - Replace image handles for Fermi ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// On Fermi, image handles are not supported. To work around this, we traverse +// the machine code and replace image handles with concrete symbols. For this +// to work reliably, inlining of all function call must be performed. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseSet.h" + +using namespace llvm; + +namespace { +class NVPTXReplaceImageHandles : public MachineFunctionPass { +private: + static char ID; + DenseSet<MachineInstr *> InstrsToRemove; + +public: + NVPTXReplaceImageHandles(); + + bool runOnMachineFunction(MachineFunction &MF); +private: + bool processInstr(MachineInstr &MI); + void replaceImageHandle(MachineOperand &Op, MachineFunction &MF); +}; +} + +char NVPTXReplaceImageHandles::ID = 0; + +NVPTXReplaceImageHandles::NVPTXReplaceImageHandles() + : MachineFunctionPass(ID) {} + +bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + InstrsToRemove.clear(); + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; + ++BI) { + for (MachineBasicBlock::iterator I = (*BI).begin(), E = (*BI).end(); + I != E; ++I) { + MachineInstr &MI = *I; + Changed |= processInstr(MI); + } + } + + // Now clean up any handle-access instructions + // This is needed in debug mode when code cleanup passes are not executed, + // but we need the handle access to be eliminated because they are not + // valid instructions when image handles are disabled. + for (DenseSet<MachineInstr *>::iterator I = InstrsToRemove.begin(), + E = InstrsToRemove.end(); I != E; ++I) { + (*I)->eraseFromParent(); + } + + return Changed; +} + +bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { + MachineFunction &MF = *MI.getParent()->getParent(); + // Check if we have a surface/texture instruction + switch (MI.getOpcode()) { + default: return false; + case NVPTX::TEX_1D_F32_I32: + case NVPTX::TEX_1D_F32_F32: + case NVPTX::TEX_1D_F32_F32_LEVEL: + case NVPTX::TEX_1D_F32_F32_GRAD: + case NVPTX::TEX_1D_I32_I32: + case NVPTX::TEX_1D_I32_F32: + case NVPTX::TEX_1D_I32_F32_LEVEL: + case NVPTX::TEX_1D_I32_F32_GRAD: + case NVPTX::TEX_1D_ARRAY_F32_I32: + case NVPTX::TEX_1D_ARRAY_F32_F32: + case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL: + case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD: + case NVPTX::TEX_1D_ARRAY_I32_I32: + case NVPTX::TEX_1D_ARRAY_I32_F32: + case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL: + case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD: + case NVPTX::TEX_2D_F32_I32: + case NVPTX::TEX_2D_F32_F32: + case NVPTX::TEX_2D_F32_F32_LEVEL: + case NVPTX::TEX_2D_F32_F32_GRAD: + case NVPTX::TEX_2D_I32_I32: + case NVPTX::TEX_2D_I32_F32: + case NVPTX::TEX_2D_I32_F32_LEVEL: + case NVPTX::TEX_2D_I32_F32_GRAD: + case NVPTX::TEX_2D_ARRAY_F32_I32: + case NVPTX::TEX_2D_ARRAY_F32_F32: + case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL: + case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD: + case NVPTX::TEX_2D_ARRAY_I32_I32: + case NVPTX::TEX_2D_ARRAY_I32_F32: + case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL: + case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD: + case NVPTX::TEX_3D_F32_I32: + case NVPTX::TEX_3D_F32_F32: + case NVPTX::TEX_3D_F32_F32_LEVEL: + case NVPTX::TEX_3D_F32_F32_GRAD: + case NVPTX::TEX_3D_I32_I32: + case NVPTX::TEX_3D_I32_F32: + case NVPTX::TEX_3D_I32_F32_LEVEL: + case NVPTX::TEX_3D_I32_F32_GRAD: { + // This is a texture fetch, so operand 4 is a texref and operand 5 is + // a samplerref + MachineOperand &TexHandle = MI.getOperand(4); + MachineOperand &SampHandle = MI.getOperand(5); + + replaceImageHandle(TexHandle, MF); + replaceImageHandle(SampHandle, MF); + + return true; + } + case NVPTX::SULD_1D_I8_TRAP: + case NVPTX::SULD_1D_I16_TRAP: + case NVPTX::SULD_1D_I32_TRAP: + case NVPTX::SULD_1D_ARRAY_I8_TRAP: + case NVPTX::SULD_1D_ARRAY_I16_TRAP: + case NVPTX::SULD_1D_ARRAY_I32_TRAP: + case NVPTX::SULD_2D_I8_TRAP: + case NVPTX::SULD_2D_I16_TRAP: + case NVPTX::SULD_2D_I32_TRAP: + case NVPTX::SULD_2D_ARRAY_I8_TRAP: + case NVPTX::SULD_2D_ARRAY_I16_TRAP: + case NVPTX::SULD_2D_ARRAY_I32_TRAP: + case NVPTX::SULD_3D_I8_TRAP: + case NVPTX::SULD_3D_I16_TRAP: + case NVPTX::SULD_3D_I32_TRAP: { + // This is a V1 surface load, so operand 1 is a surfref + MachineOperand &SurfHandle = MI.getOperand(1); + + replaceImageHandle(SurfHandle, MF); + + return true; + } + case NVPTX::SULD_1D_V2I8_TRAP: + case NVPTX::SULD_1D_V2I16_TRAP: + case NVPTX::SULD_1D_V2I32_TRAP: + case NVPTX::SULD_1D_ARRAY_V2I8_TRAP: + case NVPTX::SULD_1D_ARRAY_V2I16_TRAP: + case NVPTX::SULD_1D_ARRAY_V2I32_TRAP: + case NVPTX::SULD_2D_V2I8_TRAP: + case NVPTX::SULD_2D_V2I16_TRAP: + case NVPTX::SULD_2D_V2I32_TRAP: + case NVPTX::SULD_2D_ARRAY_V2I8_TRAP: + case NVPTX::SULD_2D_ARRAY_V2I16_TRAP: + case NVPTX::SULD_2D_ARRAY_V2I32_TRAP: + case NVPTX::SULD_3D_V2I8_TRAP: + case NVPTX::SULD_3D_V2I16_TRAP: + case NVPTX::SULD_3D_V2I32_TRAP: { + // This is a V2 surface load, so operand 2 is a surfref + MachineOperand &SurfHandle = MI.getOperand(2); + + replaceImageHandle(SurfHandle, MF); + + return true; + } + case NVPTX::SULD_1D_V4I8_TRAP: + case NVPTX::SULD_1D_V4I16_TRAP: + case NVPTX::SULD_1D_V4I32_TRAP: + case NVPTX::SULD_1D_ARRAY_V4I8_TRAP: + case NVPTX::SULD_1D_ARRAY_V4I16_TRAP: + case NVPTX::SULD_1D_ARRAY_V4I32_TRAP: + case NVPTX::SULD_2D_V4I8_TRAP: + case NVPTX::SULD_2D_V4I16_TRAP: + case NVPTX::SULD_2D_V4I32_TRAP: + case NVPTX::SULD_2D_ARRAY_V4I8_TRAP: + case NVPTX::SULD_2D_ARRAY_V4I16_TRAP: + case NVPTX::SULD_2D_ARRAY_V4I32_TRAP: + case NVPTX::SULD_3D_V4I8_TRAP: + case NVPTX::SULD_3D_V4I16_TRAP: + case NVPTX::SULD_3D_V4I32_TRAP: { + // This is a V4 surface load, so operand 4 is a surfref + MachineOperand &SurfHandle = MI.getOperand(4); + + replaceImageHandle(SurfHandle, MF); + + return true; + } + case NVPTX::SUST_B_1D_B8_TRAP: + case NVPTX::SUST_B_1D_B16_TRAP: + case NVPTX::SUST_B_1D_B32_TRAP: + case NVPTX::SUST_B_1D_V2B8_TRAP: + case NVPTX::SUST_B_1D_V2B16_TRAP: + case NVPTX::SUST_B_1D_V2B32_TRAP: + case NVPTX::SUST_B_1D_V4B8_TRAP: + case NVPTX::SUST_B_1D_V4B16_TRAP: + case NVPTX::SUST_B_1D_V4B32_TRAP: + case NVPTX::SUST_B_1D_ARRAY_B8_TRAP: + case NVPTX::SUST_B_1D_ARRAY_B16_TRAP: + case NVPTX::SUST_B_1D_ARRAY_B32_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_B_2D_B8_TRAP: + case NVPTX::SUST_B_2D_B16_TRAP: + case NVPTX::SUST_B_2D_B32_TRAP: + case NVPTX::SUST_B_2D_V2B8_TRAP: + case NVPTX::SUST_B_2D_V2B16_TRAP: + case NVPTX::SUST_B_2D_V2B32_TRAP: + case NVPTX::SUST_B_2D_V4B8_TRAP: + case NVPTX::SUST_B_2D_V4B16_TRAP: + case NVPTX::SUST_B_2D_V4B32_TRAP: + case NVPTX::SUST_B_2D_ARRAY_B8_TRAP: + case NVPTX::SUST_B_2D_ARRAY_B16_TRAP: + case NVPTX::SUST_B_2D_ARRAY_B32_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_B_3D_B8_TRAP: + case NVPTX::SUST_B_3D_B16_TRAP: + case NVPTX::SUST_B_3D_B32_TRAP: + case NVPTX::SUST_B_3D_V2B8_TRAP: + case NVPTX::SUST_B_3D_V2B16_TRAP: + case NVPTX::SUST_B_3D_V2B32_TRAP: + case NVPTX::SUST_B_3D_V4B8_TRAP: + case NVPTX::SUST_B_3D_V4B16_TRAP: + case NVPTX::SUST_B_3D_V4B32_TRAP: + case NVPTX::SUST_P_1D_B8_TRAP: + case NVPTX::SUST_P_1D_B16_TRAP: + case NVPTX::SUST_P_1D_B32_TRAP: + case NVPTX::SUST_P_1D_V2B8_TRAP: + case NVPTX::SUST_P_1D_V2B16_TRAP: + case NVPTX::SUST_P_1D_V2B32_TRAP: + case NVPTX::SUST_P_1D_V4B8_TRAP: + case NVPTX::SUST_P_1D_V4B16_TRAP: + case NVPTX::SUST_P_1D_V4B32_TRAP: + case NVPTX::SUST_P_1D_ARRAY_B8_TRAP: + case NVPTX::SUST_P_1D_ARRAY_B16_TRAP: + case NVPTX::SUST_P_1D_ARRAY_B32_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_P_2D_B8_TRAP: + case NVPTX::SUST_P_2D_B16_TRAP: + case NVPTX::SUST_P_2D_B32_TRAP: + case NVPTX::SUST_P_2D_V2B8_TRAP: + case NVPTX::SUST_P_2D_V2B16_TRAP: + case NVPTX::SUST_P_2D_V2B32_TRAP: + case NVPTX::SUST_P_2D_V4B8_TRAP: + case NVPTX::SUST_P_2D_V4B16_TRAP: + case NVPTX::SUST_P_2D_V4B32_TRAP: + case NVPTX::SUST_P_2D_ARRAY_B8_TRAP: + case NVPTX::SUST_P_2D_ARRAY_B16_TRAP: + case NVPTX::SUST_P_2D_ARRAY_B32_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_P_3D_B8_TRAP: + case NVPTX::SUST_P_3D_B16_TRAP: + case NVPTX::SUST_P_3D_B32_TRAP: + case NVPTX::SUST_P_3D_V2B8_TRAP: + case NVPTX::SUST_P_3D_V2B16_TRAP: + case NVPTX::SUST_P_3D_V2B32_TRAP: + case NVPTX::SUST_P_3D_V4B8_TRAP: + case NVPTX::SUST_P_3D_V4B16_TRAP: + case NVPTX::SUST_P_3D_V4B32_TRAP: { + // This is a surface store, so operand 0 is a surfref + MachineOperand &SurfHandle = MI.getOperand(0); + + replaceImageHandle(SurfHandle, MF); + + return true; + } + case NVPTX::TXQ_CHANNEL_ORDER: + case NVPTX::TXQ_CHANNEL_DATA_TYPE: + case NVPTX::TXQ_WIDTH: + case NVPTX::TXQ_HEIGHT: + case NVPTX::TXQ_DEPTH: + case NVPTX::TXQ_ARRAY_SIZE: + case NVPTX::TXQ_NUM_SAMPLES: + case NVPTX::TXQ_NUM_MIPMAP_LEVELS: + case NVPTX::SUQ_CHANNEL_ORDER: + case NVPTX::SUQ_CHANNEL_DATA_TYPE: + case NVPTX::SUQ_WIDTH: + case NVPTX::SUQ_HEIGHT: + case NVPTX::SUQ_DEPTH: + case NVPTX::SUQ_ARRAY_SIZE: { + // This is a query, so operand 1 is a surfref/texref + MachineOperand &Handle = MI.getOperand(1); + + replaceImageHandle(Handle, MF); + + return true; + } + } +} + +void NVPTXReplaceImageHandles:: +replaceImageHandle(MachineOperand &Op, MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>(); + // Which instruction defines the handle? + MachineInstr *MI = MRI.getVRegDef(Op.getReg()); + assert(MI && "No def for image handle vreg?"); + MachineInstr &TexHandleDef = *MI; + + switch (TexHandleDef.getOpcode()) { + case NVPTX::LD_i64_avar: { + // The handle is a parameter value being loaded, replace with the + // parameter symbol + assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!"); + StringRef Sym = TexHandleDef.getOperand(6).getSymbolName(); + std::string ParamBaseName = MF.getName(); + ParamBaseName += "_param_"; + assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference"); + unsigned Param = atoi(Sym.data()+ParamBaseName.size()); + std::string NewSym; + raw_string_ostream NewSymStr(NewSym); + NewSymStr << MF.getFunction()->getName() << "_param_" << Param; + Op.ChangeToImmediate( + MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str())); + InstrsToRemove.insert(&TexHandleDef); + break; + } + case NVPTX::texsurf_handles: { + // The handle is a global variable, replace with the global variable name + assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!"); + const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal(); + assert(GV->hasName() && "Global sampler must be named!"); + Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data())); + InstrsToRemove.insert(&TexHandleDef); + break; + } + default: + llvm_unreachable("Unknown instruction operating on handle"); + } +} + +MachineFunctionPass *llvm::createNVPTXReplaceImageHandlesPass() { + return new NVPTXReplaceImageHandles(); +} diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index f99bebd6ce1..efd6e40d722 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -65,6 +65,10 @@ public: inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); } inline bool hasROT64() const { return SmVersion >= 20; } + bool hasImageHandles() const { + // Currently disabled + return false; + } bool is64Bit() const { return Is64Bit; } unsigned int getSmVersion() const { return SmVersion; } diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 9aa7dbb4fab..7510da502f2 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -143,6 +143,7 @@ void NVPTXPassConfig::addIRPasses() { disablePass(&BranchFolderPassID); disablePass(&TailDuplicateID); + addPass(createNVPTXImageOptimizerPass()); TargetPassConfig::addIRPasses(); addPass(createNVPTXAssignValidGlobalNamesPass()); addPass(createGenericToNVVMPass()); @@ -155,9 +156,16 @@ void NVPTXPassConfig::addIRPasses() { } bool NVPTXPassConfig::addInstSelector() { + const NVPTXSubtarget &ST = + getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); + addPass(createLowerAggrCopies()); addPass(createAllocaHoisting()); addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); + + if (!ST.hasImageHandles()) + addPass(createNVPTXReplaceImageHandlesPass()); + return false; } diff --git a/llvm/test/CodeGen/NVPTX/surf-read.ll b/llvm/test/CodeGen/NVPTX/surf-read.ll new file mode 100644 index 00000000000..a69d03efe0d --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/surf-read.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +target triple = "nvptx-unknown-nvcl" + +declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32) + +; CHECK: .entry foo +define void @foo(i64 %img, float* %red, i32 %idx) { +; CHECK: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [foo_param_0, {%r{{[0-9]+}}}] + %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx) +; CHECK: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] + %ret = sitofp i32 %val to float +; CHECK: st.f32 [%r{{[0-9]+}}], %f[[REDF]] + store float %ret, float* %red + ret void +} + +!nvvm.annotations = !{!1, !2} +!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1} +!2 = metadata !{void (i64, float*, i32)* @foo, metadata !"rdwrimage", i32 0} diff --git a/llvm/test/CodeGen/NVPTX/surf-write.ll b/llvm/test/CodeGen/NVPTX/surf-write.ll new file mode 100644 index 00000000000..880231f9659 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/surf-write.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +target triple = "nvptx-unknown-nvcl" + +declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32) + +; CHECK: .entry foo +define void @foo(i64 %img, i32 %val, i32 %idx) { +; CHECK: sust.b.1d.b32.trap [foo_param_0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}} + tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %img, i32 %idx, i32 %val) + ret void +} + +!nvvm.annotations = !{!1, !2} +!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1} +!2 = metadata !{void (i64, i32, i32)* @foo, metadata !"wroimage", i32 0} diff --git a/llvm/test/CodeGen/NVPTX/tex-read.ll b/llvm/test/CodeGen/NVPTX/tex-read.ll new file mode 100644 index 00000000000..291060b9848 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/tex-read.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +target triple = "nvptx-unknown-nvcl" + +declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64, i64, i32) + +; CHECK: .entry foo +define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) { +; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}] + %val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64 %img, i64 %sampler, i32 %idx) + %ret = extractvalue { float, float, float, float } %val, 0 +; CHECK: st.f32 [%r{{[0-9]+}}], %f[[RED]] + store float %ret, float* %red + ret void +} + +!nvvm.annotations = !{!1, !2, !3} +!1 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"kernel", i32 1} +!2 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"rdoimage", i32 0} +!3 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"sampler", i32 1} |