summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/NVPTX/ctlz.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/ctlz.ll')
-rw-r--r--llvm/test/CodeGen/NVPTX/ctlz.ll124
1 files changed, 106 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/NVPTX/ctlz.ll b/llvm/test/CodeGen/NVPTX/ctlz.ll
index bed15a9f6a5..100c0837775 100644
--- a/llvm/test/CodeGen/NVPTX/ctlz.ll
+++ b/llvm/test/CodeGen/NVPTX/ctlz.ll
@@ -6,39 +6,127 @@ declare i16 @llvm.ctlz.i16(i16, i1) readnone
declare i32 @llvm.ctlz.i32(i32, i1) readnone
declare i64 @llvm.ctlz.i64(i64, i1) readnone
+; There should be no difference between llvm.ctlz.i32(%a, true) and
+; llvm.ctlz.i32(%a, false), as ptx's clz(0) is defined to return 0.
+
+; CHECK-LABEL: myctpop(
define i32 @myctpop(i32 %a) {
-; CHECK: clz.b32
+; CHECK: ld.param.
+; CHECK-NEXT: clz.b32
+; CHECK-NEXT: st.param.
+; CHECK-NEXT: ret;
%val = call i32 @llvm.ctlz.i32(i32 %a, i1 false) readnone
ret i32 %val
}
-
-define i16 @myctpop16(i16 %a) {
-; CHECK: clz.b32
- %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
- ret i16 %val
+; CHECK-LABEL: myctpop_2(
+define i32 @myctpop_2(i32 %a) {
+; CHECK: ld.param.
+; CHECK-NEXT: clz.b32
+; CHECK-NEXT: st.param.
+; CHECK-NEXT: ret;
+ %val = call i32 @llvm.ctlz.i32(i32 %a, i1 true) readnone
+ ret i32 %val
}
+; PTX's clz.b64 returns a 32-bit value, but LLVM's intrinsic returns a 64-bit
+; value, so here we have to zero-extend it.
+; CHECK-LABEL: myctpop64(
define i64 @myctpop64(i64 %a) {
-; CHECK: clz.b64
+; CHECK: ld.param.
+; CHECK-NEXT: clz.b64
+; CHECK-NEXT: cvt.u64.u32
+; CHECK-NEXT: st.param.
+; CHECK-NEXT: ret;
%val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
ret i64 %val
}
+; CHECK-LABEL: myctpop64_2(
+define i64 @myctpop64_2(i64 %a) {
+; CHECK: ld.param.
+; CHECK-NEXT: clz.b64
+; CHECK-NEXT: cvt.u64.u32
+; CHECK-NEXT: st.param.
+; CHECK-NEXT: ret;
+ %val = call i64 @llvm.ctlz.i64(i64 %a, i1 true) readnone
+ ret i64 %val
+}
-
-define i32 @myctpop_2(i32 %a) {
-; CHECK: clz.b32
- %val = call i32 @llvm.ctlz.i32(i32 %a, i1 true) readnone
- ret i32 %val
+; Here we truncate the 64-bit value of LLVM's ctlz intrinsic to 32 bits, the
+; natural return width of ptx's clz.b64 instruction. No conversions should be
+; necessary in the PTX.
+; CHECK-LABEL: myctpop64_as_32(
+define i32 @myctpop64_as_32(i64 %a) {
+; CHECK: ld.param.
+; CHECK-NEXT: clz.b64
+; CHECK-NEXT: st.param.
+; CHECK-NEXT: ret;
+ %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
+ %trunc = trunc i64 %val to i32
+ ret i32 %trunc
+}
+; CHECK-LABEL: myctpop64_as_32_2(
+define i32 @myctpop64_as_32_2(i64 %a) {
+; CHECK: ld.param.
+; CHECK-NEXT: clz.b64
+; CHECK-NEXT: st.param.
+; CHECK-NEXT: ret;
+ %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
+ %trunc = trunc i64 %val to i32
+ ret i32 %trunc
}
-define i16 @myctpop16_2(i16 %a) {
-; CHECK: clz.b32
+; ctlz.i16 is implemented by extending the input to i32, computing the result,
+; and then truncating the result back down to i16. But the NVPTX ABI
+; zero-extends i16 return values to i32, so the final truncation doesn't appear
+; in this function.
+; CHECK-LABEL: myctpop_ret16(
+define i16 @myctpop_ret16(i16 %a) {
+; CHECK: ld.param.
+; CHECK-NEXT: cvt.u32.u16
+; CHECK-NEXT: clz.b32
+; CHECK-NEXT: sub.
+; CHECK-NEXT: st.param.
+; CHECK-NEXT: ret;
+ %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
+ ret i16 %val
+}
+; CHECK-LABEL: myctpop_ret16_2(
+define i16 @myctpop_ret16_2(i16 %a) {
+; CHECK: ld.param.
+; CHECK-NEXT: cvt.u32.u16
+; CHECK-NEXT: clz.b32
+; CHECK-NEXT: sub.
+; CHECK-NEXT: st.param.
+; CHECK-NEXT: ret;
%val = call i16 @llvm.ctlz.i16(i16 %a, i1 true) readnone
ret i16 %val
}
-define i64 @myctpop64_2(i64 %a) {
-; CHECK: clz.b64
- %val = call i64 @llvm.ctlz.i64(i64 %a, i1 true) readnone
- ret i64 %val
+; Here we store the result of ctlz.16 into an i16 pointer, so the trunc should
+; remain.
+; CHECK-LABEL: myctpop_store16(
+define void @myctpop_store16(i16 %a, i16* %b) {
+; CHECK: ld.param.
+; CHECK-NEXT: cvt.u32.u16
+; CHECK-NET: clz.b32
+; CHECK-DAG: cvt.u16.u32
+; CHECK-DAG: sub.
+; CHECK: st.{{[a-z]}}16
+; CHECK: ret;
+ %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
+ store i16 %val, i16* %b
+ ret void
+}
+; CHECK-LABEL: myctpop_store16_2(
+define void @myctpop_store16_2(i16 %a, i16* %b) {
+; CHECK: ld.param.
+; CHECK-NEXT: cvt.u32.u16
+; CHECK-NET: clz.b32
+; CHECK-DAG: cvt.u16.u32
+; CHECK-DAG: sub.
+; CHECK: st.{{[a-z]}}16
+; CHECK: ret;
+ %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
+ store i16 %val, i16* %b
+ ret void
}
OpenPOWER on IntegriCloud