diff options
| author | Justin Holewinski <jholewinski@nvidia.com> | 2013-06-28 17:58:07 +0000 |
|---|---|---|
| committer | Justin Holewinski <jholewinski@nvidia.com> | 2013-06-28 17:58:07 +0000 |
| commit | dc372df63b23884b9267c46480f31118cb2117c5 (patch) | |
| tree | 78a7852c74cbcf5f8d6415cb28933bb97e3bf1d2 /llvm/lib | |
| parent | dc5e3b68f5b3dce841645c465ec8ce789825c4e9 (diff) | |
| download | bcm5719-llvm-dc372df63b23884b9267c46480f31118cb2117c5.tar.gz bcm5719-llvm-dc372df63b23884b9267c46480f31118cb2117c5.zip | |
[NVPTX] Add support for cttz/ctlz/ctpop
llvm-svn: 185176
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 58 |
2 files changed, 74 insertions, 0 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 338fe7c155f..8877d131eae 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -216,6 +216,22 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) // Custom handling for i8 intrinsics setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); + setOperationAction(ISD::CTLZ, MVT::i16, Legal); + setOperationAction(ISD::CTLZ, MVT::i32, Legal); + setOperationAction(ISD::CTLZ, MVT::i64, Legal); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal); + setOperationAction(ISD::CTTZ, MVT::i16, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i64, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTPOP, MVT::i16, Legal); + setOperationAction(ISD::CTPOP, MVT::i32, Legal); + setOperationAction(ISD::CTPOP, MVT::i64, Legal); + // Now deduce the information based on the above mentioned // actions computeRegisterProperties(); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 32193641f21..553a6ba703d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -2406,6 +2406,64 @@ def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), "mov.b64\t{{$d1, $d2}}, $s;", []>; +// Count leading zeros +def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), + "clz.b32\t$d, $a;", + []>; +def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "clz.b64\t$d, $a;", + []>; + +// 32-bit has a direct PTX instruction +def : Pat<(ctlz Int32Regs:$a), + (CLZr32 Int32Regs:$a)>; +def : Pat<(ctlz_zero_undef Int32Regs:$a), + (CLZr32 Int32Regs:$a)>; + +// For 64-bit, the result in PTX is actually 32-bit so we zero-extend +// to 64-bit to match the LLVM semantics +def : Pat<(ctlz Int64Regs:$a), + (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; +def : Pat<(ctlz_zero_undef Int64Regs:$a), + (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; + +// For 16-bit, we zero-extend to 32-bit, then trunc the result back +// to 16-bits (ctlz of a 16-bit value is guaranteed to require less +// than 16 bits to store). We also need to subtract 16 because the +// high-order 16 zeros were counted. +def : Pat<(ctlz Int16Regs:$a), + (SUBi16ri (CVT_u16_u32 (CLZr32 + (CVT_u32_u16 Int16Regs:$a, CvtNONE)), + CvtNONE), 16)>; +def : Pat<(ctlz_zero_undef Int16Regs:$a), + (SUBi16ri (CVT_u16_u32 (CLZr32 + (CVT_u32_u16 Int16Regs:$a, CvtNONE)), + CvtNONE), 16)>; + +// Population count +def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), + "popc.b32\t$d, $a;", + []>; +def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "popc.b64\t$d, $a;", + []>; + +// 32-bit has a direct PTX instruction +def : Pat<(ctpop Int32Regs:$a), + (POPCr32 Int32Regs:$a)>; + +// For 64-bit, the result in PTX is actually 32-bit so we zero-extend +// to 64-bit to match the LLVM semantics +def : Pat<(ctpop Int64Regs:$a), + (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>; + +// For 16-bit, we zero-extend to 32-bit, then trunc the result back +// to 16-bits (ctpop of a 16-bit value is guaranteed to require less +// than 16 bits to store) +def : Pat<(ctpop Int16Regs:$a), + (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), + CvtNONE)>; + // fround f64 -> f32 def : Pat<(f32 (fround Float64Regs:$a)), (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; |

