diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-06-18 12:23:46 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-06-18 12:23:46 +0000 |
| commit | bcb5ea0042847a25b58c407e57e0c9b1c289e72a (patch) | |
| tree | 9f1484f6e257fdd82ae4dccaf70ac8c71dc9c159 /llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | |
| parent | e75e197ad879177bc643d4c5d5596bbe173555e7 (diff) | |
| download | bcm5719-llvm-bcb5ea0042847a25b58c407e57e0c9b1c289e72a.tar.gz bcm5719-llvm-bcb5ea0042847a25b58c407e57e0c9b1c289e72a.zip | |
AMDGPU: Fold readlane from copy of SGPR or imm
These may be inserted to assert uniformity somewhere.
llvm-svn: 363670
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 61a4407cbcf..00698e2dd4e 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -506,6 +506,41 @@ void SIFoldOperands::foldOperand( return; } + unsigned UseOpc = UseMI->getOpcode(); + if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 || + (UseOpc == AMDGPU::V_READLANE_B32 && + (int)UseOpIdx == + AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) { + // %vgpr = V_MOV_B32 imm + // %sgpr = V_READFIRSTLANE_B32 %vgpr + // => + // %sgpr = S_MOV_B32 imm + if (FoldingImm) { + if (!isEXECMaskConstantBetweenDefAndUses( + UseMI->getOperand(UseOpIdx).getReg(), *MRI)) + return; + + UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32)); + UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm()); + UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane) + return; + } + + if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) { + if (!isEXECMaskConstantBetweenDefAndUses( + UseMI->getOperand(UseOpIdx).getReg(), *MRI)) + return; + + // %vgpr = COPY %sgpr0 + // %sgpr1 = V_READFIRSTLANE_B32 %vgpr + // => + // %sgpr1 = COPY %sgpr0 + UseMI->setDesc(TII->get(AMDGPU::COPY)); + UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane) + return; + } + } + const MCInstrDesc &UseDesc = UseMI->getDesc(); // Don't fold into target independent nodes. Target independent opcodes |

