summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp3
-rw-r--r--llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll95
3 files changed, 97 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a0be1c9f11f..03944321f79 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -469,7 +469,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
}
NewOps.push_back(User->getOperand(i));
}
- TLO.DAG.UpdateNodeOperands(User, NewOps);
+ User = TLO.DAG.UpdateNodeOperands(User, NewOps);
// Op has less users now, so we may be able to perform additional combines
// with it.
DCI.AddToWorklist(Op.getNode());
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 82d1bc270a4..64a4d06e95c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6507,8 +6507,7 @@ SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
Node->getOperand(i)), 0));
}
- DAG.UpdateNodeOperands(Node, Ops);
- return Node;
+ return DAG.UpdateNodeOperands(Node, Ops);
}
/// \brief Fold the instructions after selecting them.
diff --git a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll
new file mode 100644
index 00000000000..f8077cd8e3a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll
@@ -0,0 +1,95 @@
+; RUN: llc -march=amdgcn < %s | FileCheck %s
+
+; Check we can compile this bugpoint-reduced test without an
+; infinite loop in TLI.SimplifyDemandedBits() due to failure
+; to use return value of TLO.DAG.UpdateNodeOperands()
+
+; Check that code was generated; we know there will be
+; a s_endpgm, so check for it.
+
+@0 = external unnamed_addr addrspace(3) global [462 x float], align 4
+
+; Function Attrs: nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.y() #0
+
+; Function Attrs: nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+; Function Attrs: nounwind readnone speculatable
+declare float @llvm.fmuladd.f32(float, float, float) #0
+
+; CHECK: s_endpgm
+define amdgpu_kernel void @foo(float addrspace(1)* noalias nocapture readonly %arg, float addrspace(1)* noalias nocapture readonly %arg1, float addrspace(1)* noalias nocapture %arg2, float %arg3) local_unnamed_addr !reqd_work_group_size !0 {
+bb:
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %tmp5 = and i32 %tmp, 15
+ %tmp6 = mul nuw nsw i32 %tmp5, 21
+ %tmp7 = sub i32 %tmp6, 0
+ %tmp8 = add i32 %tmp7, 0
+ %tmp9 = add i32 %tmp8, 0
+ %tmp10 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 0
+ br label %bb12
+
+bb11: ; preds = %bb30
+ br i1 undef, label %bb37, label %bb38
+
+bb12: ; preds = %bb30, %bb
+ br i1 false, label %.preheader, label %.loopexit145
+
+.loopexit145: ; preds = %.preheader, %bb12
+ br label %bb13
+
+bb13: ; preds = %.loopexit, %.loopexit145
+ %tmp14 = phi i32 [ %tmp5, %.loopexit145 ], [ %tmp20, %.loopexit ]
+ %tmp15 = add nsw i32 %tmp14, -3
+ %tmp16 = mul i32 %tmp14, 21
+ br i1 undef, label %bb17, label %.loopexit
+
+bb17: ; preds = %bb13
+ %tmp18 = mul i32 %tmp15, 224
+ %tmp19 = add i32 undef, %tmp18
+ br label %bb21
+
+.loopexit: ; preds = %bb21, %bb13
+ %tmp20 = add nuw nsw i32 %tmp14, 16
+ br i1 undef, label %bb13, label %bb26
+
+bb21: ; preds = %bb21, %bb17
+ %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ]
+ %tmp23 = add i32 %tmp22, %tmp16
+ %tmp24 = getelementptr inbounds float, float addrspace(3)* %tmp10, i32 %tmp23
+ store float undef, float addrspace(3)* %tmp24, align 4
+ %tmp25 = add nuw i32 %tmp22, 8
+ br i1 undef, label %bb21, label %.loopexit
+
+bb26: ; preds = %.loopexit
+ br label %bb31
+
+.preheader: ; preds = %.preheader, %bb12
+ %tmp27 = phi i32 [ %tmp28, %.preheader ], [ undef, %bb12 ]
+ %tmp28 = add nuw i32 %tmp27, 128
+ %tmp29 = icmp ult i32 %tmp28, 1568
+ br i1 %tmp29, label %.preheader, label %.loopexit145
+
+bb30: ; preds = %bb31
+ br i1 undef, label %bb11, label %bb12
+
+bb31: ; preds = %bb31, %bb26
+ %tmp32 = phi i32 [ %tmp9, %bb26 ], [ undef, %bb31 ]
+ %tmp33 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 %tmp32
+ %tmp34 = load float, float addrspace(3)* %tmp33, align 4
+ %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp34, float undef, float undef)
+ %tmp36 = tail call float @llvm.fmuladd.f32(float undef, float undef, float %tmp35)
+ br i1 undef, label %bb30, label %bb31
+
+bb37: ; preds = %bb11
+ br label %bb38
+
+bb38: ; preds = %bb37, %bb11
+ ret void
+}
+
+attributes #0 = { nounwind readnone speculatable }
+
+!0 = !{i32 8, i32 16, i32 1}
OpenPOWER on IntegriCloud