diff options
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/ctlz.ll')
| -rw-r--r-- | llvm/test/CodeGen/NVPTX/ctlz.ll | 124 |
1 files changed, 106 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/NVPTX/ctlz.ll b/llvm/test/CodeGen/NVPTX/ctlz.ll index bed15a9f6a5..100c0837775 100644 --- a/llvm/test/CodeGen/NVPTX/ctlz.ll +++ b/llvm/test/CodeGen/NVPTX/ctlz.ll @@ -6,39 +6,127 @@ declare i16 @llvm.ctlz.i16(i16, i1) readnone declare i32 @llvm.ctlz.i32(i32, i1) readnone declare i64 @llvm.ctlz.i64(i64, i1) readnone +; There should be no difference between llvm.ctlz.i32(%a, true) and +; llvm.ctlz.i32(%a, false), as ptx's clz(0) is defined to return 0. + +; CHECK-LABEL: myctpop( define i32 @myctpop(i32 %a) { -; CHECK: clz.b32 +; CHECK: ld.param. +; CHECK-NEXT: clz.b32 +; CHECK-NEXT: st.param. +; CHECK-NEXT: ret; %val = call i32 @llvm.ctlz.i32(i32 %a, i1 false) readnone ret i32 %val } - -define i16 @myctpop16(i16 %a) { -; CHECK: clz.b32 - %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone - ret i16 %val +; CHECK-LABEL: myctpop_2( +define i32 @myctpop_2(i32 %a) { +; CHECK: ld.param. +; CHECK-NEXT: clz.b32 +; CHECK-NEXT: st.param. +; CHECK-NEXT: ret; + %val = call i32 @llvm.ctlz.i32(i32 %a, i1 true) readnone + ret i32 %val } +; PTX's clz.b64 returns a 32-bit value, but LLVM's intrinsic returns a 64-bit +; value, so here we have to zero-extend it. +; CHECK-LABEL: myctpop64( define i64 @myctpop64(i64 %a) { -; CHECK: clz.b64 +; CHECK: ld.param. +; CHECK-NEXT: clz.b64 +; CHECK-NEXT: cvt.u64.u32 +; CHECK-NEXT: st.param. +; CHECK-NEXT: ret; %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone ret i64 %val } +; CHECK-LABEL: myctpop64_2( +define i64 @myctpop64_2(i64 %a) { +; CHECK: ld.param. +; CHECK-NEXT: clz.b64 +; CHECK-NEXT: cvt.u64.u32 +; CHECK-NEXT: st.param. +; CHECK-NEXT: ret; + %val = call i64 @llvm.ctlz.i64(i64 %a, i1 true) readnone + ret i64 %val +} - -define i32 @myctpop_2(i32 %a) { -; CHECK: clz.b32 - %val = call i32 @llvm.ctlz.i32(i32 %a, i1 true) readnone - ret i32 %val +; Here we truncate the 64-bit value of LLVM's ctlz intrinsic to 32 bits, the +; natural return width of ptx's clz.b64 instruction. No conversions should be +; necessary in the PTX. +; CHECK-LABEL: myctpop64_as_32( +define i32 @myctpop64_as_32(i64 %a) { +; CHECK: ld.param. +; CHECK-NEXT: clz.b64 +; CHECK-NEXT: st.param. +; CHECK-NEXT: ret; + %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone + %trunc = trunc i64 %val to i32 + ret i32 %trunc +} +; CHECK-LABEL: myctpop64_as_32_2( +define i32 @myctpop64_as_32_2(i64 %a) { +; CHECK: ld.param. +; CHECK-NEXT: clz.b64 +; CHECK-NEXT: st.param. +; CHECK-NEXT: ret; + %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone + %trunc = trunc i64 %val to i32 + ret i32 %trunc } -define i16 @myctpop16_2(i16 %a) { -; CHECK: clz.b32 +; ctlz.i16 is implemented by extending the input to i32, computing the result, +; and then truncating the result back down to i16. But the NVPTX ABI +; zero-extends i16 return values to i32, so the final truncation doesn't appear +; in this function. +; CHECK-LABEL: myctpop_ret16( +define i16 @myctpop_ret16(i16 %a) { +; CHECK: ld.param. +; CHECK-NEXT: cvt.u32.u16 +; CHECK-NEXT: clz.b32 +; CHECK-NEXT: sub. +; CHECK-NEXT: st.param. +; CHECK-NEXT: ret; + %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone + ret i16 %val +} +; CHECK-LABEL: myctpop_ret16_2( +define i16 @myctpop_ret16_2(i16 %a) { +; CHECK: ld.param. +; CHECK-NEXT: cvt.u32.u16 +; CHECK-NEXT: clz.b32 +; CHECK-NEXT: sub. +; CHECK-NEXT: st.param. +; CHECK-NEXT: ret; %val = call i16 @llvm.ctlz.i16(i16 %a, i1 true) readnone ret i16 %val } -define i64 @myctpop64_2(i64 %a) { -; CHECK: clz.b64 - %val = call i64 @llvm.ctlz.i64(i64 %a, i1 true) readnone - ret i64 %val +; Here we store the result of ctlz.16 into an i16 pointer, so the trunc should +; remain. +; CHECK-LABEL: myctpop_store16( +define void @myctpop_store16(i16 %a, i16* %b) { +; CHECK: ld.param. +; CHECK-NEXT: cvt.u32.u16 +; CHECK-NET: clz.b32 +; CHECK-DAG: cvt.u16.u32 +; CHECK-DAG: sub. +; CHECK: st.{{[a-z]}}16 +; CHECK: ret; + %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone + store i16 %val, i16* %b + ret void +} +; CHECK-LABEL: myctpop_store16_2( +define void @myctpop_store16_2(i16 %a, i16* %b) { +; CHECK: ld.param. +; CHECK-NEXT: cvt.u32.u16 +; CHECK-NET: clz.b32 +; CHECK-DAG: cvt.u16.u32 +; CHECK-DAG: sub. +; CHECK: st.{{[a-z]}}16 +; CHECK: ret; + %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone + store i16 %val, i16* %b + ret void } |

