diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-02-08 16:28:19 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-02-08 16:28:19 +0000 |
| commit | 2bba779272a23094b3a1ff7b3c56259d51f457df (patch) | |
| tree | e4798edd0bbc1b350a115d3abc3af47298ca0ef3 /llvm/test/CodeGen | |
| parent | 3d1d076f794279812b59ba71a0f53bd86ab709dd (diff) | |
| download | bcm5719-llvm-2bba779272a23094b3a1ff7b3c56259d51f457df.tar.gz bcm5719-llvm-2bba779272a23094b3a1ff7b3c56259d51f457df.zip | |
SelectionDAG: Lower some range metadata to AssertZext
If a range has a lower bound of 0, add an AssertZext from the
nearest floor power of two.
This allows operations with some workitem intrinsics with known
maximum ranges to use fast 24-bit multiplies.
llvm-svn: 260109
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/AArch64/lower-range-metadata-func-call.ll | 44 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll | 46 |
2 files changed, 90 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/lower-range-metadata-func-call.ll b/llvm/test/CodeGen/AArch64/lower-range-metadata-func-call.ll new file mode 100644 index 00000000000..fd4b2f5ba30 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/lower-range-metadata-func-call.ll @@ -0,0 +1,44 @@ +; RUN: llc -march=aarch64 -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +; and can be eliminated +; CHECK-LABEL: {{^}}test_call_known_max_range: +; CHECK: bl foo +; CHECK-NOT: and +; CHECK: ret +define i32 @test_call_known_max_range() #0 { +entry: + %id = tail call i32 @foo(), !range !0 + %and = and i32 %id, 1023 + ret i32 %and +} + +; CHECK-LABEL: {{^}}test_call_known_trunc_1_bit_range: +; CHECK: bl foo +; CHECK: and w{{[0-9]+}}, w0, #0x1ff +; CHECK: ret +define i32 @test_call_known_trunc_1_bit_range() #0 { +entry: + %id = tail call i32 @foo(), !range !0 + %and = and i32 %id, 511 + ret i32 %and +} + +; CHECK-LABEL: {{^}}test_call_known_max_range_m1: +; CHECK: bl foo +; CHECK: and w{{[0-9]+}}, w0, #0xff +; CHECK: ret +define i32 @test_call_known_max_range_m1() #0 { +entry: + %id = tail call i32 @foo(), !range !1 + %and = and i32 %id, 255 + ret i32 %and +} + + +declare i32 @foo() + +attributes #0 = { norecurse nounwind } +attributes #1 = { nounwind readnone } + +!0 = !{i32 0, i32 1024} +!1 = !{i32 0, i32 1023} diff --git a/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll b/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll new file mode 100644 index 00000000000..b6a9179a212 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll @@ -0,0 +1,46 @@ +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-unknown < %s | FileCheck %s + +; and can be eliminated +; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range: +; CHECK-NOT: v0 +; CHECK: {{flat|buffer}}_store_dword v0 +define void @test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 { +entry: + %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 + %and = and i32 %id, 1023 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL: {{^}}test_workitem_id_x_known_trunc_1_bit_range: +; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x1ff, v0 +; CHECK: {{flat|buffer}}_store_dword [[MASKED]] +define void @test_workitem_id_x_known_trunc_1_bit_range(i32 addrspace(1)* nocapture %out) #0 { +entry: + %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 + %and = and i32 %id, 511 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range_m1: +; CHECK-NOT: v0 +; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xff, v0 +; CHECK: {{flat|buffer}}_store_dword [[MASKED]] +define void @test_workitem_id_x_known_max_range_m1(i32 addrspace(1)* nocapture %out) #0 { +entry: + %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !1 + %and = and i32 %id, 255 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { norecurse nounwind } +attributes #1 = { nounwind readnone } + +!0 = !{i32 0, i32 1024} +!1 = !{i32 0, i32 1023} |

