summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-06-18 12:23:46 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-06-18 12:23:46 +0000
commitbcb5ea0042847a25b58c407e57e0c9b1c289e72a (patch)
tree9f1484f6e257fdd82ae4dccaf70ac8c71dc9c159 /llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
parente75e197ad879177bc643d4c5d5596bbe173555e7 (diff)
downloadbcm5719-llvm-bcb5ea0042847a25b58c407e57e0c9b1c289e72a.tar.gz
bcm5719-llvm-bcb5ea0042847a25b58c407e57e0c9b1c289e72a.zip
AMDGPU: Fold readlane from copy of SGPR or imm
These may be inserted to assert uniformity somewhere. llvm-svn: 363670
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp35
1 files changed, 35 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 61a4407cbcf..00698e2dd4e 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -506,6 +506,41 @@ void SIFoldOperands::foldOperand(
return;
}
+ unsigned UseOpc = UseMI->getOpcode();
+ if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
+ (UseOpc == AMDGPU::V_READLANE_B32 &&
+ (int)UseOpIdx ==
+ AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
+ // %vgpr = V_MOV_B32 imm
+ // %sgpr = V_READFIRSTLANE_B32 %vgpr
+ // =>
+ // %sgpr = S_MOV_B32 imm
+ if (FoldingImm) {
+ if (!isEXECMaskConstantBetweenDefAndUses(
+ UseMI->getOperand(UseOpIdx).getReg(), *MRI))
+ return;
+
+ UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
+ UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
+ UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
+ return;
+ }
+
+ if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
+ if (!isEXECMaskConstantBetweenDefAndUses(
+ UseMI->getOperand(UseOpIdx).getReg(), *MRI))
+ return;
+
+ // %vgpr = COPY %sgpr0
+ // %sgpr1 = V_READFIRSTLANE_B32 %vgpr
+ // =>
+ // %sgpr1 = COPY %sgpr0
+ UseMI->setDesc(TII->get(AMDGPU::COPY));
+ UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
+ return;
+ }
+ }
+
const MCInstrDesc &UseDesc = UseMI->getDesc();
// Don't fold into target independent nodes. Target independent opcodes
OpenPOWER on IntegriCloud