diff options
| author | vhscampos <Victor.Campos@arm.com> | 2019-10-17 14:10:30 +0100 |
|---|---|---|
| committer | vhscampos <Victor.Campos@arm.com> | 2019-10-28 11:06:58 +0000 |
| commit | f6e11a36c49c065cd71e9c54e4fba917da5bbf2e (patch) | |
| tree | 111c1c361e799280cad8866d7715d2169feeb901 /llvm/lib/Target/ARM | |
| parent | 3cb5764f900284666dbb0342c487edb1fde4d7fc (diff) | |
| download | bcm5719-llvm-f6e11a36c49c065cd71e9c54e4fba917da5bbf2e.tar.gz bcm5719-llvm-f6e11a36c49c065cd71e9c54e4fba917da5bbf2e.zip | |
[ARM][AArch64] Implement __cls, __clsl and __clsll intrinsics from ACLE
Summary:
Writing support for three ACLE functions:
unsigned int __cls(uint32_t x)
unsigned int __clsl(unsigned long x)
unsigned int __clsll(uint64_t x)
CLS stands for "Count number of leading sign bits".
In AArch64, these two intrinsics can be translated into the 'cls'
instruction directly. In AArch32, on the other hand, this functionality
is achieved by implementing it in terms of clz (count number of leading
zeros).
Reviewers: compnerd
Reviewed By: compnerd
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69250
Diffstat (limited to 'llvm/lib/Target/ARM')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 615a09e1601..bb6bca51efd 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3629,6 +3629,49 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); } + case Intrinsic::arm_cls: { + const SDValue &Operand = Op.getOperand(1); + const EVT VTy = Op.getValueType(); + SDValue SRA = + DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy)); + SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand); + SDValue SHL = + DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy)); + SDValue OR = + DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy)); + SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR); + return Result; + } + case Intrinsic::arm_cls64: { + // cls(x) = if cls(hi(x)) != 31 then cls(hi(x)) + // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x))) + const SDValue &Operand = Op.getOperand(1); + const EVT VTy = Op.getValueType(); + + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand, + DAG.getConstant(1, dl, VTy)); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand, + DAG.getConstant(0, dl, VTy)); + SDValue Constant0 = DAG.getConstant(0, dl, VTy); + SDValue Constant1 = DAG.getConstant(1, dl, VTy); + SDValue Constant31 = DAG.getConstant(31, dl, VTy); + SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31); + SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi); + SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1); + SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1); + SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi); + SDValue CheckLo = + DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ); + SDValue HiIsZero = + DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ); + SDValue AdjustedLo = + DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy)); + SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo); + SDValue Result = + DAG.getSelect(dl, VTy, CheckLo, + DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi); + return Result; + } case Intrinsic::eh_sjlj_lsda: { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |

