summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-06-18 12:23:46 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-06-18 12:23:46 +0000
commitbcb5ea0042847a25b58c407e57e0c9b1c289e72a (patch)
tree9f1484f6e257fdd82ae4dccaf70ac8c71dc9c159 /llvm/lib
parente75e197ad879177bc643d4c5d5596bbe173555e7 (diff)
downloadbcm5719-llvm-bcb5ea0042847a25b58c407e57e0c9b1c289e72a.tar.gz
bcm5719-llvm-bcb5ea0042847a25b58c407e57e0c9b1c289e72a.zip
AMDGPU: Fold readlane from copy of SGPR or imm
These may be inserted to assert uniformity somewhere. llvm-svn: 363670
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp35
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td7
2 files changed, 42 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 61a4407cbcf..00698e2dd4e 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -506,6 +506,41 @@ void SIFoldOperands::foldOperand(
return;
}
+ unsigned UseOpc = UseMI->getOpcode();
+ if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
+ (UseOpc == AMDGPU::V_READLANE_B32 &&
+ (int)UseOpIdx ==
+ AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
+ // %vgpr = V_MOV_B32 imm
+ // %sgpr = V_READFIRSTLANE_B32 %vgpr
+ // =>
+ // %sgpr = S_MOV_B32 imm
+ if (FoldingImm) {
+ if (!isEXECMaskConstantBetweenDefAndUses(
+ UseMI->getOperand(UseOpIdx).getReg(), *MRI))
+ return;
+
+ UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
+ UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
+ UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
+ return;
+ }
+
+ if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
+ if (!isEXECMaskConstantBetweenDefAndUses(
+ UseMI->getOperand(UseOpIdx).getReg(), *MRI))
+ return;
+
+ // %vgpr = COPY %sgpr0
+ // %sgpr1 = V_READFIRSTLANE_B32 %vgpr
+ // =>
+ // %sgpr1 = COPY %sgpr0
+ UseMI->setDesc(TII->get(AMDGPU::COPY));
+ UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
+ return;
+ }
+ }
+
const MCInstrDesc &UseDesc = UseMI->getDesc();
// Don't fold into target independent nodes. Target independent opcodes
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 202955a9626..2c68fdf2db3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1696,6 +1696,13 @@ def : GCNPat<
(S_SUB_I32 $src0, NegSubInlineConst32:$src1)
>;
+// Avoid pointlessly materializing a constant in VGPR.
+// FIXME: Should also do this for readlane, but tablegen crashes on
+// the ignored src1.
+def : GCNPat<
+ (int_amdgcn_readfirstlane (i32 imm:$src)),
+ (S_MOV_B32 $src)
+>;
multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
def : GCNPat <
OpenPOWER on IntegriCloud