diff options
| author | Artem Belevich <tra@google.com> | 2017-11-14 19:14:00 +0000 |
|---|---|---|
| committer | Artem Belevich <tra@google.com> | 2017-11-14 19:14:00 +0000 |
| commit | 55dcf5e586a470d13350fcf3b0b85993c73ce024 (patch) | |
| tree | cfe00c61f0f9bfd22cfa430273a422769562b26f /llvm/lib/Target | |
| parent | 35d90aea7a476da62be5dde06330e1032bb46f56 (diff) | |
| download | bcm5719-llvm-55dcf5e586a470d13350fcf3b0b85993c73ce024.tar.gz bcm5719-llvm-55dcf5e586a470d13350fcf3b0b85993c73ce024.zip | |
Mark intrinsics operating on the whole warp as IntrInaccessibleMemOnly
It's needed to model the fact that they do access data from other threads in a
warp and thus can't be CSE'd.
llvm-svn: 318173
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 21 | ||||
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 10 |
2 files changed, 21 insertions, 10 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index a7e58fa9738..ce6b071859b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -811,6 +811,10 @@ bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) { switch (IID) { default: return false; + case Intrinsic::nvvm_match_all_sync_i32p: + case Intrinsic::nvvm_match_all_sync_i64p: + SelectMatchAll(N); + return true; case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_p: @@ -1025,10 +1029,6 @@ bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) { case Intrinsic::nvvm_texsurf_handle_internal: SelectTexSurfHandle(N); return true; - case Intrinsic::nvvm_match_all_sync_i32p: - case Intrinsic::nvvm_match_all_sync_i64p: - SelectMatchAll(N); - return true; case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f16: case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f16_satfinite: case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f32: @@ -1075,12 +1075,13 @@ void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { void NVPTXDAGToDAGISel::SelectMatchAll(SDNode *N) { SDLoc DL(N); + SDValue Chain = N->getOperand(0); enum { IS_I64 = 4, HAS_CONST_VALUE = 2, HAS_CONST_MASK = 1 }; - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); unsigned OpcodeIndex = (IID == Intrinsic::nvvm_match_all_sync_i64p) ? IS_I64 : 0; - SDValue MaskOp = N->getOperand(1); - SDValue ValueOp = N->getOperand(2); + SDValue MaskOp = N->getOperand(2); + SDValue ValueOp = N->getOperand(3); if (ConstantSDNode *ValueConst = dyn_cast<ConstantSDNode>(ValueOp)) { OpcodeIndex |= HAS_CONST_VALUE; ValueOp = CurDAG->getTargetConstant(ValueConst->getZExtValue(), DL, @@ -1097,9 +1098,9 @@ void NVPTXDAGToDAGISel::SelectMatchAll(SDNode *N) { NVPTX::MATCH_ALLP_SYNC_32ir, NVPTX::MATCH_ALLP_SYNC_32ii, NVPTX::MATCH_ALLP_SYNC_64rr, NVPTX::MATCH_ALLP_SYNC_64ri, NVPTX::MATCH_ALLP_SYNC_64ir, NVPTX::MATCH_ALLP_SYNC_64ii}; - SDNode *NewNode = CurDAG->getMachineNode(Opcodes[OpcodeIndex], DL, - {ValueOp->getValueType(0), MVT::i1}, - {MaskOp, ValueOp}); + SDNode *NewNode = CurDAG->getMachineNode( + Opcodes[OpcodeIndex], DL, {ValueOp->getValueType(0), MVT::i1, MVT::Other}, + {MaskOp, ValueOp}); ReplaceNode(N, NewNode); } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index ac4f2544fc3..f141122ec48 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3321,6 +3321,16 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( switch (Intrinsic) { default: return false; + case Intrinsic::nvvm_match_all_sync_i32p: + case Intrinsic::nvvm_match_all_sync_i64p: + Info.opc = ISD::INTRINSIC_W_CHAIN; + // memVT is bogus. These intrinsics have IntrInaccessibleMemOnly attribute + // in order to model data exchange with other threads, but perform no real + // memory accesses. + Info.memVT = MVT::i1; + Info.readMem = true; // Our result depends on other thread's arguments. + Info.writeMem = true; // Other threads depend on our thread's argument. + return true; case Intrinsic::nvvm_wmma_load_a_f16_col: case Intrinsic::nvvm_wmma_load_a_f16_row: case Intrinsic::nvvm_wmma_load_a_f16_col_stride: |

