summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp35
-rw-r--r--llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll6
2 files changed, 28 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index b55dee68d51..e1257b1d33e 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -48,24 +48,36 @@ public:
struct FoldCandidate {
MachineInstr *UseMI;
- unsigned UseOpNo;
- MachineOperand *OpToFold;
- uint64_t ImmToFold;
+ union {
+ MachineOperand *OpToFold;
+ uint64_t ImmToFold;
+ int FrameIndexToFold;
+ };
+ unsigned char UseOpNo;
+ MachineOperand::MachineOperandType Kind;
FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
- UseMI(MI), UseOpNo(OpNo) {
-
+ UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) {
if (FoldOp->isImm()) {
- OpToFold = nullptr;
ImmToFold = FoldOp->getImm();
+ } else if (FoldOp->isFI()) {
+ FrameIndexToFold = FoldOp->getIndex();
} else {
assert(FoldOp->isReg());
OpToFold = FoldOp;
}
}
+ bool isFI() const {
+ return Kind == MachineOperand::MO_FrameIndex;
+ }
+
bool isImm() const {
- return !OpToFold;
+ return Kind == MachineOperand::MO_Immediate;
+ }
+
+ bool isReg() const {
+ return Kind == MachineOperand::MO_Register;
}
};
@@ -107,6 +119,11 @@ static bool updateOperand(FoldCandidate &Fold,
return true;
}
+ if (Fold.isFI()) {
+ Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
+ return true;
+ }
+
MachineOperand *New = Fold.OpToFold;
if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
TargetRegisterInfo::isVirtualRegister(New->getReg())) {
@@ -448,7 +465,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
unsigned OpSize = TII->getOpSize(MI, 1);
MachineOperand &OpToFold = MI.getOperand(1);
- bool FoldingImm = OpToFold.isImm();
+ bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
// FIXME: We could also be folding things like FrameIndexes and
// TargetIndexes.
@@ -500,7 +517,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
for (FoldCandidate &Fold : FoldList) {
if (updateOperand(Fold, TRI)) {
// Clear kill flags.
- if (!Fold.isImm()) {
+ if (Fold.isReg()) {
assert(Fold.OpToFold && Fold.OpToFold->isReg());
// FIXME: Probably shouldn't bother trying to fold if not an
// SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
index 6e6f289f5d6..eb554e21730 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
@@ -7,10 +7,8 @@
;
; CHECK-LABEL: {{^}}main:
; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
-; CHECK: v_mov_b32_e32 [[HI_CONST:v[0-9]+]], 0x200
-; CHECK: v_mov_b32_e32 [[LO_CONST:v[0-9]+]], 0
-; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, [[BYTES]], [[HI_CONST]]
-; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BYTES]], [[LO_CONST]]
+; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
+; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
define amdgpu_ps float @main(i32 %idx) {
OpenPOWER on IntegriCloud