diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2016-04-29 14:34:26 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2016-04-29 14:34:26 +0000 |
commit | 92b24f324be33b982d87814bc8b721f8c0e55ae7 (patch) | |
tree | d6137bf33a1de1e395ca0a1dac77c007c6d86919 /llvm | |
parent | fba875f90287e2c1037bb8cf0acffafba87cf8c7 (diff) | |
download | bcm5719-llvm-92b24f324be33b982d87814bc8b721f8c0e55ae7.tar.gz bcm5719-llvm-92b24f324be33b982d87814bc8b721f8c0e55ae7.zip |
AMDGPU/SI: Add offset field to ds_permute/ds_bpermute instructions
Summary:
These instructions can add an immediate offset to the address, like other
ds instructions.
Reviewers: arsenm
Subscribers: arsenm, scchan
Differential Revision: http://reviews.llvm.org/D19233
llvm-svn: 268043
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll | 12 |
5 files changed, 29 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 0e3f7ed7749..015cc8f0b20 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -822,6 +822,7 @@ bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, SDValue &Offset) const { + SDLoc DL(Addr); if (CurDAG->isBaseWithConstantOffset(Addr)) { SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); @@ -829,7 +830,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { // (add n0, c0) Base = N0; - Offset = N1; + Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); return true; } } else if (Addr.getOpcode() == ISD::SUB) { @@ -837,7 +838,6 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { int64_t ByteOffset = C->getSExtValue(); if (isUInt<16>(ByteOffset)) { - SDLoc DL(Addr); SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); // XXX - This is kind of hacky. Create a dummy sub node so we can check diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ed98b27396b..2ab4f78a49b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -224,10 +224,6 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, // will use this for some partially aligned loads. const MachineOperand *Offset0Imm = getNamedOperand(*LdSt, AMDGPU::OpName::offset0); - // DS_PERMUTE does not have Offset0Imm (and Offset1Imm). - if (!Offset0Imm) - return false; - const MachineOperand *Offset1Imm = getNamedOperand(*LdSt, AMDGPU::OpName::offset1); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 8f8533daa1e..643d4a528ad 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2494,16 +2494,16 @@ multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc, multiclass DS_1A1D_PERMUTE <bits<8> op, string opName, RegisterClass rc, SDPatternOperator node = null_frag, dag outs = (outs rc:$vdst), - dag ins = (ins VGPR_32:$addr, rc:$data0), - string asm = opName#" $vdst, $addr, $data0"> { + dag ins = (ins VGPR_32:$addr, rc:$data0, offset:$offset), + string asm = opName#" $vdst, $addr, $data0"#"$offset"> { let mayLoad = 0, mayStore = 0, isConvergent = 1 in { def "" : DS_Pseudo <opName, outs, ins, - [(set (i32 rc:$vdst), - (node (i32 VGPR_32:$addr), (i32 rc:$data0)))]>; + [(set i32:$vdst, + (node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))]>; - let data1 = 0, offset0 = 0, offset1 = 0, gds = 0 in { - def "_vi" : DS_Real_vi <op, opName, outs, ins, asm>; + let data1 = 0, gds = 0 in { + def "_vi" : DS_Off16_Real_vi <op, opName, outs, ins, asm>; } } } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll index fa12ee2fe92..e2c535baaa8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll @@ -11,4 +11,14 @@ define void @ds_bpermute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind ret void } +; CHECK-LABEL: {{^}}ds_bpermute_imm_offset: +; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4 +; CHECK: s_waitcnt lgkmcnt +define void @ds_bpermute_imm_offset(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind { + %index = add i32 %base_index, 4 + %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0 + store i32 %bpermute, i32 addrspace(1)* %out, align 4 + ret void +} + attributes #0 = { nounwind readnone convergent } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll index f0ee48ff448..6d9c9419153 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll @@ -2,7 +2,7 @@ declare i32 @llvm.amdgcn.ds.permute(i32, i32) #0 -; FUNC-LABEL: {{^}}ds_permute: +; CHECK-LABEL: {{^}}ds_permute: ; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; CHECK: s_waitcnt lgkmcnt define void @ds_permute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind { @@ -11,4 +11,14 @@ define void @ds_permute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind { ret void } +; CHECK-LABEL: {{^}}ds_permute_imm_offset: +; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4 +; CHECK: s_waitcnt lgkmcnt +define void @ds_permute_imm_offset(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind { + %index = add i32 %base_index, 4 + %bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0 + store i32 %bpermute, i32 addrspace(1)* %out, align 4 + ret void +} + attributes #0 = { nounwind readnone convergent } |