summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-08-15 16:18:36 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-08-15 16:18:36 +0000
commit3661e90e718107888948a2d50e452f09df218551 (patch)
treed1a3da8a9fe1372260a2f24fa013d7be79dafed1 /llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
parent7cb809983daa7c32b4ca25d268983acbefb0f2bd (diff)
downloadbcm5719-llvm-3661e90e718107888948a2d50e452f09df218551.tar.gz
bcm5719-llvm-3661e90e718107888948a2d50e452f09df218551.zip
AMDGPU: Don't fold subregister extracts into tied operands
llvm-svn: 278676
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp18
1 files changed, 15 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 4ecc0fcc623..3f64cf84c69 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -197,9 +197,21 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
// FIXME: Fold operands with subregs.
- if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
- UseOp.isImplicit())) {
- return;
+ if (UseOp.isReg() && OpToFold.isReg()) {
+ if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
+ return;
+
+ // Don't fold subregister extracts into tied operands, only if it is a full
+ // copy since a subregister use tied to a full register def doesn't really
+ // make sense. e.g. don't fold:
+ //
+ // %vreg1 = COPY %vreg0:sub1
+ // %vreg2<tied3> = V_MAC_F32 %vreg3, %vreg4, %vreg1<tied0>
+ //
+ // into
+ // %vreg2<tied3> = V_MAC_F32 %vreg3, %vreg4, %vreg0:sub1<tied0>
+ if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
+ return;
}
bool FoldingImm = OpToFold.isImm();
OpenPOWER on IntegriCloud