summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/PowerPC/PPC.h1
-rw-r--r--llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp24
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp1
-rw-r--r--llvm/test/CodeGen/PowerPC/tls_get_addr_fence1.mir66
-rw-r--r--llvm/test/CodeGen/PowerPC/tls_get_addr_fence2.mir65
5 files changed, 153 insertions, 4 deletions
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 476dfc0c865..21a4355b96e 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -52,6 +52,7 @@ namespace llvm {
void initializePPCVSXFMAMutatePass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
void initializePPCExpandISELPass(PassRegistry &);
+ void initializePPCTLSDynamicCallPass(PassRegistry &);
extern char &PPCVSXFMAMutateID;
namespace PPCII {
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 31c50785c2e..5f8085f4626 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -52,6 +52,7 @@ namespace {
protected:
bool processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
+ bool NeedFence = true;
bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
@@ -62,6 +63,16 @@ protected:
MI.getOpcode() != PPC::ADDItlsldLADDR &&
MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
MI.getOpcode() != PPC::ADDItlsldLADDR32) {
+
+ // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
+ // as scheduling fences, we skip creating fences if we already
+ // have existing ADJCALLSTACKDOWN/UP to avoid nesting,
+ // which causes verification error with -verify-machineinstrs.
+ if (MI.getOpcode() == PPC::ADJCALLSTACKDOWN)
+ NeedFence = false;
+ else if (MI.getOpcode() == PPC::ADJCALLSTACKUP)
+ NeedFence = true;
+
++I;
continue;
}
@@ -96,11 +107,15 @@ protected:
break;
}
- // Don't really need to save data to the stack - the clobbered
+ // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr
+ // as schduling fence to avoid it is scheduled before
+ // mflr in the prologue and the address in LR is clobbered (PR25839).
+ // We don't really need to save data to the stack - the clobbered
// registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr)
// gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR).
- BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0)
- .addImm(0);
+ if (NeedFence)
+ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0)
+ .addImm(0);
// Expand into two ops built prior to the existing instruction.
MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
@@ -116,7 +131,8 @@ protected:
.addReg(GPR3));
Call->addOperand(MI.getOperand(3));
- BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
+ if (NeedFence)
+ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
.addReg(GPR3);
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index f6bef33fd13..fb38734ec1e 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -93,6 +93,7 @@ extern "C" void LLVMInitializePowerPCTarget() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializePPCBoolRetToIntPass(PR);
initializePPCExpandISELPass(PR);
+ initializePPCTLSDynamicCallPass(PR);
}
/// Return the datalayout string of a subtarget.
diff --git a/llvm/test/CodeGen/PowerPC/tls_get_addr_fence1.mir b/llvm/test/CodeGen/PowerPC/tls_get_addr_fence1.mir
new file mode 100644
index 00000000000..fa8e73e321d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/tls_get_addr_fence1.mir
@@ -0,0 +1,66 @@
+# ADJCALLSTACKDOWN and ADJCALLSTACKUP must be generated around TLS pseudo code as scheduling fence (PR25839).
+# RUN: llc -mtriple=powerpc64le-linux-gnu -run-pass=ppc-tls-dynamic-call -verify-machineinstrs -o - %s | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-i64:64-n32:64"
+ target triple = "powerpc64le-unknown-linux-gnu"
+
+ @tls_var = external thread_local local_unnamed_addr global i32
+
+ define i32 @tls_func() local_unnamed_addr {
+ entry:
+ %0 = load i32, i32* @tls_var
+ ret i32 %0
+ }
+
+...
+---
+name: tls_func
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 2, class: g8rc, preferred-register: '' }
+liveins:
+ - { reg: '%x2' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 4294967295
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+stack:
+constants:
+body: |
+ bb.0.entry:
+ liveins: %x2
+ %0 = ADDIStlsgdHA %x2, @tls_var
+ %1 = ADDItlsgdLADDR killed %0, @tls_var, @tls_var, implicit-def dead %x0, implicit-def dead %x3, implicit-def dead %x4, implicit-def dead %x5, implicit-def dead %x6, implicit-def dead %x7, implicit-def dead %x8, implicit-def dead %x9, implicit-def dead %x10, implicit-def dead %x11, implicit-def dead %x12, implicit-def dead %lr8, implicit-def dead %ctr8, implicit-def dead %cr0, implicit-def dead %cr1, implicit-def dead %cr5, implicit-def dead %cr6, implicit-def dead %cr7
+ %2 = LWZ8 0, killed %1 :: (dereferenceable load 4 from @tls_var)
+ %x3 = COPY %2
+ BLR8 implicit %lr8, implicit %rm, implicit %x3
+ ; CHECK-LABEL: bb.0.entry
+ ; CHECK: %[[reg1:[0-9]+]] = ADDIStlsgdHA %x2, @tls_var
+ ; CHECK: ADJCALLSTACKDOWN 0, 0
+ ; CHECK: %x3 = ADDItlsgdL %[[reg1]], @tls_var
+ ; CHECK: %x3 = GETtlsADDR %x3, @tls_var
+ ; CHECK: ADJCALLSTACKUP 0, 0
+ ; CHECK: BLR8
+...
diff --git a/llvm/test/CodeGen/PowerPC/tls_get_addr_fence2.mir b/llvm/test/CodeGen/PowerPC/tls_get_addr_fence2.mir
new file mode 100644
index 00000000000..2bb88147fcf
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/tls_get_addr_fence2.mir
@@ -0,0 +1,65 @@
+# ADJCALLSTACKDOWN and ADJCALLSTACKUP should not be generated around TLS pseudo code if it is located within existing ADJCALLSTACKDOWN/ADJCALLSTACKUP pair.
+# RUN: llc -mtriple=powerpc64le-linux-gnu -run-pass=ppc-tls-dynamic-call -verify-machineinstrs -o - %s | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-i64:64-n32:64"
+ target triple = "powerpc64le-unknown-linux-gnu"
+
+ @tls_var = external thread_local local_unnamed_addr global i32
+
+ define i32 @tls_func() local_unnamed_addr {
+ entry:
+ %0 = load i32, i32* @tls_var
+ ret i32 %0
+ }
+
+...
+---
+name: tls_func
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+ - { id: 2, class: g8rc, preferred-register: '' }
+liveins:
+ - { reg: '%x2' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 4294967295
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ savePoint: ''
+ restorePoint: ''
+fixedStack:
+stack:
+constants:
+body: |
+ bb.0.entry:
+ liveins: %x2
+ ADJCALLSTACKDOWN 32, 0, implicit-def %r1, implicit %r1
+ %0 = ADDIStlsgdHA %x2, @tls_var
+ %1 = ADDItlsgdLADDR killed %0, @tls_var, @tls_var, implicit-def dead %x0, implicit-def dead %x3, implicit-def dead %x4, implicit-def dead %x5, implicit-def dead %x6, implicit-def dead %x7, implicit-def dead %x8, implicit-def dead %x9, implicit-def dead %x10, implicit-def dead %x11, implicit-def dead %x12, implicit-def dead %lr8, implicit-def dead %ctr8, implicit-def dead %cr0, implicit-def dead %cr1, implicit-def dead %cr5, implicit-def dead %cr6, implicit-def dead %cr7
+ %2 = LWZ8 0, killed %1 :: (dereferenceable load 4 from @tls_var)
+ %x3 = COPY %2
+ ADJCALLSTACKUP 32, 0, implicit-def %r1, implicit %r1
+ BLR8 implicit %lr8, implicit %rm, implicit %x3
+ ; CHECK-LABEL: bb.0.entry
+ ; CHECK-NOT: ADJCALLSTACKDOWN 0, 0
+ ; CHECK-NOT: ADJCALLSTACKUP 0, 0
+ ; CHECK: BLR8
+...
OpenPOWER on IntegriCloud