diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 71 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/vsxD-Form-spills.ll | 50 |
6 files changed, 56 insertions, 117 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 7ece7f99349..7902da20a01 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -823,39 +823,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, assert((isPPC64 || !MustSaveCR) && "Prologue CR saving supported only in 64-bit mode"); - // Check if we can move the stack update instruction (stdu) down the prologue - // past the callee saves. Hopefully this will avoid the situation where the - // saves are waiting for the update on the store with update to complete. - MachineBasicBlock::iterator StackUpdateLoc = MBBI; - bool MovingStackUpdateDown = false; - // This optimization has a number of guards. At this point we are being very - // cautious and we do not try to do this when we have a fast call or - // we are using PIC base or we are using a frame pointer or a base pointer. - // It would be possible to turn on this optimization under these conditions - // as well but it would require further modifications to the prologue and - // epilogue. For example, if we want to turn on this optimization for - // functions that use frame pointers we would have to take into consideration - // the fact that spills to the stack may be using r30 instead of r1. - // Aside form that we need to have a non-zero frame and we need to have a - // non-large frame size. Notice that we did not use !isLargeFrame but we used - // isInt<16>(FrameSize) instead. This is important because this guard has to - // be identical to the one in the epilogue and in the epilogue the variable - // is defined as bool isLargeFrame = !isInt<16>(FrameSize); - if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP && - !HasBP && isInt<16>(FrameSize)) { - const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); - for (int i=0; i<Info.size(); i++) { - int FrIdx = Info[i].getFrameIdx(); - if (FrIdx < 0) { - if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { - MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); - StackUpdateLoc++; - MovingStackUpdateDown = true; - } - } - } - } - // If we need to spill the CR and the LR but we don't have two separate // registers available, we must spill them one at a time if (MustSaveCR && SingleScratchReg && MustSaveLR) { @@ -919,7 +886,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, } if (MustSaveLR) - BuildMI(MBB, StackUpdateLoc, dl, StoreInst) + BuildMI(MBB, MBBI, dl, StoreInst) .addReg(ScratchReg, getKillRegState(true)) .addImm(LROffset) .addReg(SPReg); @@ -987,7 +954,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, HasSTUX = true; } else if (!isLargeFrame) { - BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) + BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg) .addReg(SPReg) .addImm(NegFrameSize) .addReg(SPReg); @@ -1227,12 +1194,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, } int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); - // We have changed the object offset above but we do not want to change - // the actual offsets in the CFI instruction so we have to undo the - // offset change here. - if (MovingStackUpdateDown) - Offset -= NegFrameSize; - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -1378,26 +1339,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, unsigned RBReg = SPReg; unsigned SPAdd = 0; - // Check if we can move the stack update instruction up the epilogue - // past the callee saves. This will allow the move to LR instruction - // to be executed before the restores of the callee saves which means - // that the callee saves can hide the latency from the MTLR instrcution. - MachineBasicBlock::iterator StackUpdateLoc = MBBI; - bool MovingStackUpdateUp = false; - if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP && - !HasBP && !isLargeFrame) { - const std::vector< CalleeSavedInfo > & Info = MFI.getCalleeSavedInfo(); - for (int i=0; i<Info.size(); i++) { - int FrIdx = Info[i].getFrameIdx(); - if (FrIdx < 0) { - if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { - StackUpdateLoc--; - MovingStackUpdateUp = true; - } - } - } - } - if (FrameSize) { // In the prologue, the loaded (or persistent) stack pointer value is // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red @@ -1427,7 +1368,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { if (HasRedZone) { - BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) + BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) .addReg(SPReg) .addImm(FrameSize); } else { @@ -1451,7 +1392,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(FPReg); RBReg = FPReg; } - BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) + BuildMI(MBB, MBBI, dl, LoadInst, RBReg) .addImm(0) .addReg(SPReg); } @@ -1484,7 +1425,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // a base register anyway, because it may happen to be R0. bool LoadedLR = false; if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { - BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) + BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) .addImm(LROffset+SPAdd) .addReg(RBReg); LoadedLR = true; @@ -1556,7 +1497,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(TempReg, getKillRegState(i == e-1)); if (MustSaveLR) - BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); + BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg); // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization diff --git a/llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll b/llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll index 8f0c78a7dfb..c72523f3593 100644 --- a/llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll +++ b/llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll @@ -15,12 +15,12 @@ define noalias i8* @_ZN2CC3funEv(%class.CC* %this) { ; CHECK-LABEL: _ZN2CC3funEv: ; CHECK: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -48(1) ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: .cfi_offset r30, -16 -; CHECK-NEXT: std 30, -16(1) -; CHECK-NEXT: std 0, 16(1) -; CHECK-NEXT: stdu 1, -48(1) +; CHECK-NEXT: std 30, 32(1) ; CHECK-NEXT: mr 30, 3 ; CHECK-NEXT: ld 12, 0(30) ; CHECK-NEXT: std 2, 24(1) @@ -38,11 +38,11 @@ define noalias i8* @_ZN2CC3funEv(%class.CC* %this) { ; CHECK-NEXT: mr 3, 30 ; CHECK-NEXT: bl _ZN2CC3barEPi ; CHECK-NEXT: nop -; CHECK: li 3, 0 +; CHECK: ld 30, 32(1) +; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: addi 1, 1, 48 ; CHECK-NEXT: ld 0, 16(1) ; CHECK-NEXT: mtlr 0 -; CHECK: ld 30, -16(1) ; CHECK-NEXT: blr entry: %foo = getelementptr inbounds %class.CC, %class.CC* %this, i64 0, i32 0, i32 0 diff --git a/llvm/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll b/llvm/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll index dce8b3ac5b8..87b45beeab7 100644 --- a/llvm/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll +++ b/llvm/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll @@ -16,12 +16,12 @@ entry: ; stfd 14, 416(1) ; After the fix by patch D34337: -; CHECK-LE:std 15, -280(1) -; CHECK-LE:stfd 14, -144(1) ; CHECK-LE: stdu 1, -528(1) -; CHECK-BE:std 15, -280(1) -; CHECK-BE:stfd 14, -144(1) +; CHECK-LE:std 15, 248(1) +; CHECK-LE:stfd 14, 384(1) ; CHECK-BE: stdu 1, -544(1) +; CHECK-BE:std 15, 264(1) +; CHECK-BE:stfd 14, 400(1) } define signext i32 @foo() { diff --git a/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll b/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll index e4898c35888..2bf4b0722f9 100644 --- a/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll +++ b/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll @@ -110,7 +110,7 @@ declare i32 @doSomething(i32, i32*) ; ; Epilogue code. ; CHECK: mtlr {{[0-9]+}} -; CHECK: blr +; CHECK-NEXT: blr ; ; ENABLE: .[[ELSE_LABEL]]: # %if.else ; Shift second argument by one and store into returned register. @@ -171,7 +171,7 @@ declare i32 @something(...) ; Next BB ; CHECK: %for.end ; CHECK: mtlr {{[0-9]+}} -; CHECK: blr +; CHECK-NEXT: blr define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { entry: br label %for.preheader @@ -209,9 +209,9 @@ for.end: ; preds = %for.body ; Make sure we save the link register ; CHECK: mflr {{[0-9]+}} ; -; DISABLE: std -; DISABLE-NEXT: std ; DISABLE: cmplwi 0, 3, 0 +; DISABLE-NEXT: std +; DISABLE-NEXT: std ; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] ; ; Loop preheader @@ -240,7 +240,7 @@ for.end: ; preds = %for.body ; DISABLE: .[[EPILOG_BB]]: # %if.end ; Epilog code ; CHECK: mtlr {{[0-9]+}} -; CHECK: blr +; CHECK-NEXT: blr ; ; ENABLE: .[[ELSE_LABEL]]: # %if.else ; Shift second argument by one and store into returned register. @@ -291,9 +291,9 @@ declare void @somethingElse(...) ; Make sure we save the link register ; CHECK: mflr {{[0-9]+}} ; -; DISABLE: std -; DISABLE-NEXT: std ; DISABLE: cmplwi 0, 3, 0 +; DISABLE-NEXT: std +; DISABLE-NEXT: std ; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]] ; ; CHECK: bl somethingElse @@ -322,7 +322,7 @@ declare void @somethingElse(...) ; ; Epilogue code. ; CHECK: mtlr {{[0-9]+}} -; CHECK: blr +; CHECK-NEXT: blr ; ; ENABLE: .[[ELSE_LABEL]]: # %if.else ; Shift second argument by one and store into returned register. diff --git a/llvm/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll b/llvm/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll index 632358ec861..108993fea95 100644 --- a/llvm/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll +++ b/llvm/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll @@ -6,7 +6,7 @@ define void @test_foo(i32* nocapture %x01, i32* nocapture %x02, i32* nocapture % entry: ; CHECK-LABEL: test_foo: -; CHECK-DAG: stdu 1, {{-?[0-9]+}}(1) +; CHECK: stdu 1, {{-?[0-9]+}}(1) ; CHECK-DAG: mr [[BACKUP_3:[0-9]+]], 3 ; CHECK-DAG: mr [[BACKUP_4:[0-9]+]], 4 ; CHECK-DAG: mr [[BACKUP_5:[0-9]+]], 5 @@ -15,14 +15,14 @@ entry: ; CHECK-DAG: mr [[BACKUP_8:[0-9]+]], 8 ; CHECK-DAG: mr [[BACKUP_9:[0-9]+]], 9 ; CHECK-DAG: mr [[BACKUP_10:[0-9]+]], 10 -; CHECK-DAG: std [[BACKUP_3]], {{-?[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_4]], {{-?[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_5]], {{-?[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_6]], {{-?[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_7]], {{-?[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_8]], {{-?[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_9]], {{-?[0-9]+}}(1) -; CHECK-DAG: std [[BACKUP_10]], {{-?[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_3]], {{[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_4]], {{[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_5]], {{[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_6]], {{[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_7]], {{[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_8]], {{[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_9]], {{[0-9]+}}(1) +; CHECK-DAG: std [[BACKUP_10]], {{[0-9]+}}(1) ; CHECK: bl __tls_get_addr ; CHECK-DAG: stw 3, 0([[BACKUP_3]]) ; CHECK-DAG: stw 3, 0([[BACKUP_4]]) diff --git a/llvm/test/CodeGen/PowerPC/vsxD-Form-spills.ll b/llvm/test/CodeGen/PowerPC/vsxD-Form-spills.ll index b02c8aeba17..92427f5ccde 100644 --- a/llvm/test/CodeGen/PowerPC/vsxD-Form-spills.ll +++ b/llvm/test/CodeGen/PowerPC/vsxD-Form-spills.ll @@ -4,37 +4,35 @@ define <4 x i32> @testSpill(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: testSpill: -; CHECK-DAG: li [[REG64:[0-9]+]], -64 -; CHECK-DAG: li [[REG48:[0-9]+]], -48 -; CHECK-DAG: li [[REG32:[0-9]+]], -32 -; CHECK-DAG: li [[REG16:[0-9]+]], -16 -; CHECK-NOT: li -; CHECK-DAG: stxvd2x 60, 1, [[REG64]] # 16-byte Folded Spill -; CHECK-DAG: stxvd2x 61, 1, [[REG48]] # 16-byte Folded Spill -; CHECK-DAG: stxvd2x 62, 1, [[REG32]] # 16-byte Folded Spill -; CHECK-DAG: stxvd2x 63, 1, [[REG16]] # 16-byte Folded Spill -; CHECK: std 0, 16(1) -; CHECK-DAG: li [[REG16:[0-9]+]], -16 -; CHECK-DAG: li [[REG32:[0-9]+]], -32 -; CHECK-DAG: li [[REG48:[0-9]+]], -48 -; CHECK-DAG: li [[REG64:[0-9]+]], -64 +; CHECK: li 11, 80 +; CHECK: li 12, 96 +; CHECK: li 3, 48 +; CHECK: li 10, 64 +; CHECK: stxvd2x 62, 1, 11 # 16-byte Folded Spill +; CHECK: stxvd2x 63, 1, 12 # 16-byte Folded Spill +; CHECK: stxvd2x 60, 1, 3 # 16-byte Folded Spill +; CHECK: stxvd2x 61, 1, 10 # 16-byte Folded Spill +; CHECK: li 9, 96 +; CHECK: li 10, 80 +; CHECK: li 11, 64 +; CHECK: li 12, 48 +; CHECK: lxvd2x 63, 1, 9 # 16-byte Folded Reload +; CHECK: lxvd2x 62, 1, 10 # 16-byte Folded Reload +; CHECK: lxvd2x 61, 1, 11 # 16-byte Folded Reload +; CHECK: lxvd2x 60, 1, 12 # 16-byte Folded Reload ; CHECK: mtlr 0 -; CHECK-DAG: lxvd2x 63, 1, [[REG16]] # 16-byte Folded Reload -; CHECK-DAG: lxvd2x 62, 1, [[REG32]] # 16-byte Folded Reload -; CHECK-DAG: lxvd2x 61, 1, [[REG48]] # 16-byte Folded Reload -; CHECK-DAG: lxvd2x 60, 1, [[REG64]] # 16-byte Folded Reload ; CHECK-NEXT: blr ; ; CHECK-PWR9-LABEL: testSpill: -; CHECK-PWR9-DAG: stxv 60, -64(1) # 16-byte Folded Spill -; CHECK-PWR9-DAG: stxv 61, -48(1) # 16-byte Folded Spill -; CHECK-PWR9-DAG: stxv 62, -32(1) # 16-byte Folded Spill -; CHECK-PWR9-DAG: stxv 63, -16(1) # 16-byte Folded Spill +; CHECK-PWR9: stxv 62, 80(1) # 16-byte Folded Spill +; CHECK-PWR9: stxv 63, 96(1) # 16-byte Folded Spill +; CHECK-PWR9: stxv 60, 48(1) # 16-byte Folded Spill +; CHECK-PWR9: stxv 61, 64(1) # 16-byte Folded Spill +; CHECK-PWR9: lxv 63, 96(1) # 16-byte Folded Reload +; CHECK-PWR9: lxv 62, 80(1) # 16-byte Folded Reload +; CHECK-PWR9: lxv 61, 64(1) # 16-byte Folded Reload +; CHECK-PWR9: lxv 60, 48(1) # 16-byte Folded Reload ; CHECK-PWR9: mtlr 0 -; CHECK-PWR9-DAG: lxv 63, -16(1) # 16-byte Folded Reload -; CHECK-PWR9-DAG: lxv 62, -32(1) # 16-byte Folded Reload -; CHECK-PWR9-DAG: lxv 61, -48(1) # 16-byte Folded Reload -; CHECK-PWR9-DAG: lxv 60, -64(1) # 16-byte Folded Reload ; CHECK-PWR9-NEXT: blr entry: |