diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/Target/TargetInstrInfo.h | 7 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/MachineBasicBlock.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/spill-cfg-position.ll | 78 | 
5 files changed, 100 insertions, 2 deletions
diff --git a/llvm/include/llvm/Target/TargetInstrInfo.h b/llvm/include/llvm/Target/TargetInstrInfo.h index 83515bc9184..7d50201c3b7 100644 --- a/llvm/include/llvm/Target/TargetInstrInfo.h +++ b/llvm/include/llvm/Target/TargetInstrInfo.h @@ -1510,6 +1510,13 @@ public:      return false;    } +  /// True if the instruction is bound to the top of its basic block and no +  /// other instructions shall be inserted before it. This can be implemented +  /// to prevent register allocator to insert spills before such instructions. +  virtual bool isBasicBlockPrologue(const MachineInstr &MI) const { +    return false; +  } +  private:    unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;    unsigned CatchRetOpcode; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 3869f976854..a7d87799f67 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -148,8 +148,11 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {  MachineBasicBlock::iterator  MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { +  const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); +    iterator E = end(); -  while (I != E && (I->isPHI() || I->isPosition())) +  while (I != E && (I->isPHI() || I->isPosition() || +                    TII->isBasicBlockPrologue(*I)))      ++I;    // FIXME: This needs to change if we wish to bundle labels    // inside the bundle. @@ -160,8 +163,11 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {  MachineBasicBlock::iterator  MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) { +  const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); +    iterator E = end(); -  while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue())) +  while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue() || +                    TII->isBasicBlockPrologue(*I)))      ++I;    // FIXME: This needs to change if we wish to bundle labels / dbg_values    // inside the bundle. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e9dc6f57ab3..a647e29c82c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3635,3 +3635,8 @@ ScheduleHazardRecognizer *  SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {    return new GCNHazardRecognizer(MF);  } + +bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const { +  return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY && +         MI.modifiesRegister(AMDGPU::EXEC, &RI); +} diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index e68f6f92ba9..18d7ff6f29e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -731,6 +731,8 @@ public:    ScheduleHazardRecognizer *    CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; + +  bool isBasicBlockPrologue(const MachineInstr &MI) const override;  };  namespace AMDGPU { diff --git a/llvm/test/CodeGen/AMDGPU/spill-cfg-position.ll b/llvm/test/CodeGen/AMDGPU/spill-cfg-position.ll new file mode 100644 index 00000000000..686c83116fd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-cfg-position.ll @@ -0,0 +1,78 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs -stress-regalloc=6 < %s | FileCheck %s + +; Inline spiller can decide to move a spill as early as possible in the basic block. +; It will skip phis and label, but we also need to make sure it skips instructions +; in the basic block prologue which restore exec mask. +; Make sure instruction to restore exec mask immediately follows label + +; CHECK-LABEL: {{^}}spill_cfg_position: +; CHECK: s_cbranch_execz [[LABEL1:BB[0-9_]+]] +; CHECK: {{^}}[[LABEL1]]: +; CHECK: s_cbranch_execz [[LABEL2:BB[0-9_]+]] +; CHECK: {{^}}[[LABEL2]]: +; CHECK-NEXT: s_or_b64 exec +; CHECK: buffer_ + +define void @spill_cfg_position(i32 addrspace(1)* nocapture %arg) { +bb: +  %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #0 +  %tmp14 = load i32, i32 addrspace(1)* %arg, align 4 +  %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 +  %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4 +  %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 +  %tmp18 = load i32, i32 addrspace(1)* %tmp17, align 4 +  %tmp19 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3 +  %tmp20 = load i32, i32 addrspace(1)* %tmp19, align 4 +  %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4 +  %tmp22 = load i32, i32 addrspace(1)* %tmp21, align 4 +  %tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 5 +  %tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4 +  %tmp25 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 6 +  %tmp26 = load i32, i32 addrspace(1)* %tmp25, align 4 +  %tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 7 +  %tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4 +  %tmp29 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8 +  %tmp30 = load i32, i32 addrspace(1)* %tmp29, align 4 +  %tmp33 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp1 +  %tmp34 = load i32, i32 addrspace(1)* %tmp33, align 4 +  %tmp35 = icmp eq i32 %tmp34, 0 +  br i1 %tmp35, label %bb44, label %bb36 + +bb36:                                             ; preds = %bb +  %tmp37 = mul nsw i32 %tmp20, %tmp18 +  %tmp38 = add nsw i32 %tmp37, %tmp16 +  %tmp39 = mul nsw i32 %tmp24, %tmp22 +  %tmp40 = add nsw i32 %tmp38, %tmp39 +  %tmp41 = mul nsw i32 %tmp28, %tmp26 +  %tmp42 = add nsw i32 %tmp40, %tmp41 +  %tmp43 = add nsw i32 %tmp42, %tmp30 +  br label %bb52 + +bb44:                                             ; preds = %bb +  %tmp45 = mul nsw i32 %tmp18, %tmp16 +  %tmp46 = mul nsw i32 %tmp22, %tmp20 +  %tmp47 = add nsw i32 %tmp46, %tmp45 +  %tmp48 = mul nsw i32 %tmp26, %tmp24 +  %tmp49 = add nsw i32 %tmp47, %tmp48 +  %tmp50 = mul nsw i32 %tmp30, %tmp28 +  %tmp51 = add nsw i32 %tmp49, %tmp50 +  br label %bb52 + +bb52:                                             ; preds = %bb44, %bb36 +  %tmp53 = phi i32 [ %tmp43, %bb36 ], [ %tmp51, %bb44 ] +  %tmp54 = mul nsw i32 %tmp16, %tmp14 +  %tmp55 = mul nsw i32 %tmp22, %tmp18 +  %tmp56 = mul nsw i32 %tmp24, %tmp20 +  %tmp57 = mul nsw i32 %tmp30, %tmp26 +  %tmp58 = add i32 %tmp55, %tmp54 +  %tmp59 = add i32 %tmp58, %tmp56 +  %tmp60 = add i32 %tmp59, %tmp28 +  %tmp61 = add i32 %tmp60, %tmp57 +  %tmp62 = add i32 %tmp61, %tmp53 +  store i32 %tmp62, i32 addrspace(1)* %tmp33, align 4 +  ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +attributes #0 = { nounwind readnone }  | 

