summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-02-08 16:28:19 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-02-08 16:28:19 +0000
commit2bba779272a23094b3a1ff7b3c56259d51f457df (patch)
treee4798edd0bbc1b350a115d3abc3af47298ca0ef3
parent3d1d076f794279812b59ba71a0f53bd86ab709dd (diff)
downloadbcm5719-llvm-2bba779272a23094b3a1ff7b3c56259d51f457df.tar.gz
bcm5719-llvm-2bba779272a23094b3a1ff7b3c56259d51f457df.zip
SelectionDAG: Lower some range metadata to AssertZext
If a range has a lower bound of 0, add an AssertZext from the nearest floor power of two. This allows operations with some workitem intrinsics with known maximum ranges to use fast 24-bit multiplies. llvm-svn: 260109
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp43
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h5
-rw-r--r--llvm/test/CodeGen/AArch64/lower-range-metadata-func-call.ll44
-rw-r--r--llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll46
4 files changed, 135 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 640a2b2951a..c50e79fec29 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3721,7 +3721,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
- }
+ } else
+ Result = lowerRangeToAssertZExt(DAG, I, Result);
setValue(&I, Result);
}
@@ -5419,8 +5420,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
.setTailCall(isTailCall);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
- if (Result.first.getNode())
- setValue(CS.getInstruction(), Result.first);
+ if (Result.first.getNode()) {
+ const Instruction *Inst = CS.getInstruction();
+ Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first);
+ setValue(Inst, Result.first);
+ }
}
/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
@@ -6716,6 +6720,39 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
DAG.getSrcValue(I.getArgOperand(1))));
}
+SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
+ const Instruction &I,
+ SDValue Op) {
+ const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
+ if (!Range)
+ return Op;
+
+ Constant *Lo = cast<ConstantAsMetadata>(Range->getOperand(0))->getValue();
+ if (!Lo->isNullValue())
+ return Op;
+
+ Constant *Hi = cast<ConstantAsMetadata>(Range->getOperand(1))->getValue();
+ unsigned Bits = cast<ConstantInt>(Hi)->getValue().logBase2();
+
+ EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
+
+ SDLoc SL = getCurSDLoc();
+
+ SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(),
+ Op, DAG.getValueType(SmallVT));
+ unsigned NumVals = Op.getNode()->getNumValues();
+ if (NumVals == 1)
+ return ZExt;
+
+ SmallVector<SDValue, 4> Ops;
+
+ Ops.push_back(ZExt);
+ for (unsigned I = 1; I != NumVals; ++I)
+ Ops.push_back(Op.getValue(I));
+
+ return DAG.getMergeValues(Ops, SL);
+}
+
/// \brief Lower an argument list according to the target calling convention.
///
/// \return A tuple of <return-value, token-chain>
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 8fb85ff6ecc..0e8230d39cd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -708,6 +708,11 @@ public:
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
const BasicBlock *EHPadBB = nullptr);
+ // Lower range metadata from 0 to N to assert zext to an integer of nearest
+ // floor power of two.
+ SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I,
+ SDValue Op);
+
std::pair<SDValue, SDValue> lowerCallOperands(
ImmutableCallSite CS,
unsigned ArgIdx,
diff --git a/llvm/test/CodeGen/AArch64/lower-range-metadata-func-call.ll b/llvm/test/CodeGen/AArch64/lower-range-metadata-func-call.ll
new file mode 100644
index 00000000000..fd4b2f5ba30
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/lower-range-metadata-func-call.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=aarch64 -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; and can be eliminated
+; CHECK-LABEL: {{^}}test_call_known_max_range:
+; CHECK: bl foo
+; CHECK-NOT: and
+; CHECK: ret
+define i32 @test_call_known_max_range() #0 {
+entry:
+ %id = tail call i32 @foo(), !range !0
+ %and = and i32 %id, 1023
+ ret i32 %and
+}
+
+; CHECK-LABEL: {{^}}test_call_known_trunc_1_bit_range:
+; CHECK: bl foo
+; CHECK: and w{{[0-9]+}}, w0, #0x1ff
+; CHECK: ret
+define i32 @test_call_known_trunc_1_bit_range() #0 {
+entry:
+ %id = tail call i32 @foo(), !range !0
+ %and = and i32 %id, 511
+ ret i32 %and
+}
+
+; CHECK-LABEL: {{^}}test_call_known_max_range_m1:
+; CHECK: bl foo
+; CHECK: and w{{[0-9]+}}, w0, #0xff
+; CHECK: ret
+define i32 @test_call_known_max_range_m1() #0 {
+entry:
+ %id = tail call i32 @foo(), !range !1
+ %and = and i32 %id, 255
+ ret i32 %and
+}
+
+
+declare i32 @foo()
+
+attributes #0 = { norecurse nounwind }
+attributes #1 = { nounwind readnone }
+
+!0 = !{i32 0, i32 1024}
+!1 = !{i32 0, i32 1023}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll b/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll
new file mode 100644
index 00000000000..b6a9179a212
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-unknown < %s | FileCheck %s
+
+; and can be eliminated
+; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range:
+; CHECK-NOT: v0
+; CHECK: {{flat|buffer}}_store_dword v0
+define void @test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 {
+entry:
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
+ %and = and i32 %id, 1023
+ store i32 %and, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_workitem_id_x_known_trunc_1_bit_range:
+; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x1ff, v0
+; CHECK: {{flat|buffer}}_store_dword [[MASKED]]
+define void @test_workitem_id_x_known_trunc_1_bit_range(i32 addrspace(1)* nocapture %out) #0 {
+entry:
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
+ %and = and i32 %id, 511
+ store i32 %and, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range_m1:
+; CHECK-NOT: v0
+; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xff, v0
+; CHECK: {{flat|buffer}}_store_dword [[MASKED]]
+define void @test_workitem_id_x_known_max_range_m1(i32 addrspace(1)* nocapture %out) #0 {
+entry:
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !1
+ %and = and i32 %id, 255
+ store i32 %and, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { norecurse nounwind }
+attributes #1 = { nounwind readnone }
+
+!0 = !{i32 0, i32 1024}
+!1 = !{i32 0, i32 1023}
OpenPOWER on IntegriCloud