diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-08-15 16:18:36 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-08-15 16:18:36 +0000 |
| commit | 3661e90e718107888948a2d50e452f09df218551 (patch) | |
| tree | d1a3da8a9fe1372260a2f24fa013d7be79dafed1 /llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | |
| parent | 7cb809983daa7c32b4ca25d268983acbefb0f2bd (diff) | |
| download | bcm5719-llvm-3661e90e718107888948a2d50e452f09df218551.tar.gz bcm5719-llvm-3661e90e718107888948a2d50e452f09df218551.zip | |
AMDGPU: Don't fold subregister extracts into tied operands
llvm-svn: 278676
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 4ecc0fcc623..3f64cf84c69 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -197,9 +197,21 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); // FIXME: Fold operands with subregs. - if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || - UseOp.isImplicit())) { - return; + if (UseOp.isReg() && OpToFold.isReg()) { + if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister) + return; + + // Don't fold subregister extracts into tied operands, only if it is a full + // copy since a subregister use tied to a full register def doesn't really + // make sense. e.g. don't fold: + // + // %vreg1 = COPY %vreg0:sub1 + // %vreg2<tied3> = V_MAC_F32 %vreg3, %vreg4, %vreg1<tied0> + // + // into + // %vreg2<tied3> = V_MAC_F32 %vreg3, %vreg4, %vreg0:sub1<tied0> + if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister) + return; } bool FoldingImm = OpToFold.isImm(); |

