diff options
| author | Jan Vesely <jan.vesely@rutgers.edu> | 2017-01-20 21:24:26 +0000 |
|---|---|---|
| committer | Jan Vesely <jan.vesely@rutgers.edu> | 2017-01-20 21:24:26 +0000 |
| commit | f170504c419596b94a3cd0178b7caaccc0d20840 (patch) | |
| tree | 6bad0708a3dfbb7f77e44b040c2fb780b173f059 /llvm/lib | |
| parent | 74694b19e04f3d0408830c879133be12c7b4f5df (diff) | |
| download | bcm5719-llvm-f170504c419596b94a3cd0178b7caaccc0d20840.tar.gz bcm5719-llvm-f170504c419596b94a3cd0178b7caaccc0d20840.zip | |
AMDGPU/R600: Serialize vector trunc stores to private AS
Add DUMMY_CHAIN SDNode to denote stores of interest
Bugzilla: https://llvm.org/bugs/show_bug.cgi?id=28915
Bugzilla: https://llvm.org/bugs/show_bug.cgi?id=30411
Differential Revision: https://reviews.llvm.org/D27964
llvm-svn: 292651
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 28 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/R600Instructions.td | 14 |
5 files changed, 44 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 07f627aab8b..54caa2c5dfa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3278,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(PC_ADD_REL_OFFSET) NODE_NAME_CASE(KILL) + NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) NODE_NAME_CASE(SENDMSGHALT) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 171b0435850..f6adceac6f1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -330,6 +330,7 @@ enum NodeType : unsigned { INTERP_P2, PC_ADD_REL_OFFSET, KILL, + DUMMY_CHAIN, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index c65db5779d7..cfef1757277 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode< // This argument to this node is a dword address. def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; +// Force dependencies for vector trunc stores +def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>; + def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index b04e954550d..31c08d0cd2a 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1120,7 +1120,10 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, llvm_unreachable("Unsupported private trunc store"); } - SDValue Chain = Store->getChain(); + SDValue OldChain = Store->getChain(); + bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN); + // Skip dummy + SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain; SDValue BasePtr = Store->getBasePtr(); SDValue Offset = Store->getOffset(); EVT MemVT = Store->getMemoryVT(); @@ -1176,7 +1179,15 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, // Store dword // TODO: Can we be smarter about MachinePointerInfo? - return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo()); + SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo()); + + // If we are part of expanded vector, make our neighbors depend on this store + if (VectorTrunc) { + // Make all other vector elements depend on this store + Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore); + DAG.ReplaceAllUsesOfValueWith(OldChain, Chain); + } + return NewStore; } SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { @@ -1196,6 +1207,17 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // Neither LOCAL nor PRIVATE can do vectors at the moment if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) && VT.isVector()) { + if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) { + // Add an extra level of chain to isolate this vector + SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); + // TODO: can the chain be replaced without creating a new store? + SDValue NewStore = DAG.getTruncStore( + NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), + MemVT, StoreNode->getAlignment(), + StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo()); + StoreNode = cast<StoreSDNode>(NewStore); + } + return scalarizeVectorStore(StoreNode, DAG); } @@ -1230,7 +1252,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // Put the mask in correct place SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift); - // Put the mask in correct place + // Put the value bits in correct place SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift); diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index 19795bdde64..9210e66b0fe 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; def MOV : R600_1OP <0x19, "MOV", []>; + +// This is a hack to get rid of DUMMY_CHAIN nodes. +// Most DUMMY_CHAINs should be eliminated during legalization, but undef +// values can sneak in some to selection. +let isPseudo = 1, isCodeGenOnly = 1 in { +def DUMMY_CHAIN : AMDGPUInst < + (outs), + (ins), + "DUMMY_CHAIN", + [(R600dummy_chain)] +>; +} // end let isPseudo = 1, isCodeGenOnly = 1 + + let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst < |

