summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Basic/BuiltinsAArch64.def2
-rw-r--r--clang/include/clang/Basic/BuiltinsARM.def2
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp21
-rw-r--r--clang/lib/Headers/arm_acle.h20
-rw-r--r--clang/test/CodeGen/arm_acle.c18
-rw-r--r--clang/test/CodeGen/builtins-arm.c15
-rw-r--r--clang/test/CodeGen/builtins-arm64.c17
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAArch64.td3
-rw-r--r--llvm/include/llvm/IR/IntrinsicsARM.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td2
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp43
-rw-r--r--llvm/test/CodeGen/AArch64/cls.ll20
-rw-r--r--llvm/test/CodeGen/ARM/cls.ll27
13 files changed, 193 insertions, 0 deletions
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 4df8d5a1676..f07c567053d 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -33,6 +33,8 @@ BUILTIN(__builtin_arm_clrex, "v", "")
// Bit manipulation
BUILTIN(__builtin_arm_rbit, "UiUi", "nc")
BUILTIN(__builtin_arm_rbit64, "WUiWUi", "nc")
+BUILTIN(__builtin_arm_cls, "UiZUi", "nc")
+BUILTIN(__builtin_arm_cls64, "UiWUi", "nc")
// HINT
BUILTIN(__builtin_arm_nop, "v", "")
diff --git a/clang/include/clang/Basic/BuiltinsARM.def b/clang/include/clang/Basic/BuiltinsARM.def
index 5f8308d2931..81991a57e2b 100644
--- a/clang/include/clang/Basic/BuiltinsARM.def
+++ b/clang/include/clang/Basic/BuiltinsARM.def
@@ -115,6 +115,8 @@ BUILTIN(__builtin_arm_smusdx, "iii", "nc")
// Bit manipulation
BUILTIN(__builtin_arm_rbit, "UiUi", "nc")
+BUILTIN(__builtin_arm_cls, "UiZUi", "nc")
+BUILTIN(__builtin_arm_cls64, "UiWUi", "nc")
// Store and load exclusive
BUILTIN(__builtin_arm_ldrexd, "LLUiv*", "")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9a56116173e..648837a6d15 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6055,6 +6055,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
}
+ if (BuiltinID == ARM::BI__builtin_arm_cls) {
+ llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
+ }
+ if (BuiltinID == ARM::BI__builtin_arm_cls64) {
+ llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
+ "cls");
+ }
+
if (BuiltinID == ARM::BI__clear_cache) {
assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
const FunctionDecl *FD = E->getDirectCallee();
@@ -7108,6 +7118,17 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
}
+ if (BuiltinID == AArch64::BI__builtin_arm_cls) {
+ llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
+ "cls");
+ }
+ if (BuiltinID == AArch64::BI__builtin_arm_cls64) {
+ llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
+ "cls");
+ }
+
if (BuiltinID == AArch64::BI__builtin_arm_jcvt) {
assert((getContext().getTypeSize(E->getType()) == 32) &&
"__jcvt of unusual size!");
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 942172f9f8e..137092fb9d7 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -139,6 +139,26 @@ __clzll(uint64_t __t) {
return __builtin_clzll(__t);
}
+/* CLS */
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__cls(uint32_t __t) {
+ return __builtin_arm_cls(__t);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__clsl(unsigned long __t) {
+#if __SIZEOF_LONG__ == 4
+ return __builtin_arm_cls(__t);
+#else
+ return __builtin_arm_cls64(__t);
+#endif
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__clsll(uint64_t __t) {
+ return __builtin_arm_cls64(__t);
+}
+
/* REV */
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__rev(uint32_t __t) {
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index 7463d0d8e1d..9f0ad22bda4 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -175,6 +175,24 @@ uint64_t test_clzll(uint64_t t) {
return __clzll(t);
}
+// ARM-LABEL: test_cls
+// ARM: call i32 @llvm.arm.cls(i32 %t)
+unsigned test_cls(uint32_t t) {
+ return __cls(t);
+}
+
+// ARM-LABEL: test_clsl
+// AArch32: call i32 @llvm.arm.cls(i32 %t)
+// AArch64: call i32 @llvm.arm.cls64(i64 %t)
+unsigned test_clsl(unsigned long t) {
+ return __clsl(t);
+}
+// ARM-LABEL: test_clsll
+// ARM: call i32 @llvm.arm.cls64(i64 %t)
+unsigned test_clsll(uint64_t t) {
+ return __clsll(t);
+}
+
// ARM-LABEL: test_rev
// ARM: call i32 @llvm.bswap.i32(i32 %t)
uint32_t test_rev(uint32_t t) {
diff --git a/clang/test/CodeGen/builtins-arm.c b/clang/test/CodeGen/builtins-arm.c
index 4941411bfbb..f3c4ecaeee9 100644
--- a/clang/test/CodeGen/builtins-arm.c
+++ b/clang/test/CodeGen/builtins-arm.c
@@ -256,6 +256,21 @@ void wsrp(void *v) {
__builtin_arm_wsrp("sysreg", v);
}
+unsigned int cls(uint32_t v) {
+ // CHECK: call i32 @llvm.arm.cls(i32 %v)
+ return __builtin_arm_cls(v);
+}
+
+unsigned int clsl(unsigned long v) {
+ // CHECK: call i32 @llvm.arm.cls(i32 %v)
+ return __builtin_arm_cls(v);
+}
+
+unsigned int clsll(uint64_t v) {
+ // CHECK: call i32 @llvm.arm.cls64(i64 %v)
+ return __builtin_arm_cls64(v);
+}
+
// CHECK: ![[M0]] = !{!"cp1:2:c3:c4:5"}
// CHECK: ![[M1]] = !{!"cp1:2:c3"}
// CHECK: ![[M2]] = !{!"sysreg"}
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index 7095396e6bb..f16cd4505f6 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -106,4 +106,21 @@ void wsrp(void *v) {
__builtin_arm_wsrp("1:2:3:4:5", v);
}
+unsigned int cls(uint32_t v) {
+ // CHECK: call i32 @llvm.aarch64.cls(i32 %v)
+ return __builtin_arm_cls(v);
+}
+
+unsigned int clsl(unsigned long v) {
+ // CHECK-WIN: [[V64:%[^ ]+]] = zext i32 %v to i64
+ // CHECK-WIN: call i32 @llvm.aarch64.cls64(i64 [[V64]]
+ // CHECK-LINUX: call i32 @llvm.aarch64.cls64(i64 %v)
+ return __builtin_arm_cls64(v);
+}
+
+unsigned int clsll(uint64_t v) {
+ // CHECK: call i32 @llvm.aarch64.cls64(i64 %v)
+ return __builtin_arm_cls64(v);
+}
+
// CHECK: ![[M0]] = !{!"1:2:3:4:5"}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index db01700f409..d2a3bec2530 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -33,6 +33,9 @@ def int_aarch64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
def int_aarch64_fjcvtzs : Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+def int_aarch64_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_aarch64_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
+
//===----------------------------------------------------------------------===//
// HINT
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index db5111e6910..a188652ba1d 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -843,4 +843,7 @@ def int_arm_mve_vst2q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMat
def int_arm_mve_vst4q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem]
>;
+def int_arm_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_arm_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
+
} // end TargetPrefix
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 89de1212443..4e1be2696d5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1527,6 +1527,8 @@ def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
(i64 1))),
(CLSXr GPR64:$Rn)>;
+def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>;
+def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>;
// Unlike the other one operand instructions, the instructions with the "rev"
// mnemonic do *not* just different in the size bit, but actually use different
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 615a09e1601..bb6bca51efd 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -3629,6 +3629,49 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
}
+ case Intrinsic::arm_cls: {
+ const SDValue &Operand = Op.getOperand(1);
+ const EVT VTy = Op.getValueType();
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
+ SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
+ SDValue SHL =
+ DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
+ SDValue OR =
+ DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
+ SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
+ return Result;
+ }
+ case Intrinsic::arm_cls64: {
+ // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
+ // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
+ const SDValue &Operand = Op.getOperand(1);
+ const EVT VTy = Op.getValueType();
+
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
+ DAG.getConstant(1, dl, VTy));
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
+ DAG.getConstant(0, dl, VTy));
+ SDValue Constant0 = DAG.getConstant(0, dl, VTy);
+ SDValue Constant1 = DAG.getConstant(1, dl, VTy);
+ SDValue Constant31 = DAG.getConstant(31, dl, VTy);
+ SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
+ SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
+ SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
+ SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
+ SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
+ SDValue CheckLo =
+ DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
+ SDValue HiIsZero =
+ DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
+ SDValue AdjustedLo =
+ DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
+ SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
+ SDValue Result =
+ DAG.getSelect(dl, VTy, CheckLo,
+ DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
+ return Result;
+ }
case Intrinsic::eh_sjlj_lsda: {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
diff --git a/llvm/test/CodeGen/AArch64/cls.ll b/llvm/test/CodeGen/AArch64/cls.ll
new file mode 100644
index 00000000000..f17ccf7d6f6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cls.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+
+; @llvm.aarch64.cls must be directly translated into the 'cls' instruction
+
+; CHECK-LABEL: cls
+; CHECK: cls [[REG:w[0-9]+]], [[REG]]
+define i32 @cls(i32 %t) {
+ %cls.i = call i32 @llvm.aarch64.cls(i32 %t)
+ ret i32 %cls.i
+}
+
+; CHECK-LABEL: cls64
+; CHECK: cls [[REG:x[0-9]+]], [[REG]]
+define i32 @cls64(i64 %t) {
+ %cls.i = call i32 @llvm.aarch64.cls64(i64 %t)
+ ret i32 %cls.i
+}
+
+declare i32 @llvm.aarch64.cls(i32) nounwind
+declare i32 @llvm.aarch64.cls64(i64) nounwind
diff --git a/llvm/test/CodeGen/ARM/cls.ll b/llvm/test/CodeGen/ARM/cls.ll
new file mode 100644
index 00000000000..cccb38d0766
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cls.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=armv5 %s -o - | FileCheck %s
+
+; CHECK: eor [[T:r[0-9]+]], [[T]], [[T]], asr #31
+; CHECK-NEXT: mov [[C1:r[0-9]+]], #1
+; CHECK-NEXT: orr [[T]], [[C1]], [[T]], lsl #1
+; CHECK-NEXT: clz [[T]], [[T]]
+define i32 @cls(i32 %t) {
+ %cls.i = call i32 @llvm.arm.cls(i32 %t)
+ ret i32 %cls.i
+}
+
+; CHECK: cmp r1, #0
+; CHECK: mvnne [[ADJUSTEDLO:r[0-9]+]], r0
+; CHECK: clz [[CLZLO:r[0-9]+]], [[ADJUSTEDLO]]
+; CHECK: eor [[A:r[0-9]+]], r1, r1, asr #31
+; CHECK: mov r1, #1
+; CHECK: orr [[A]], r1, [[A]], lsl #1
+; CHECK: clz [[CLSHI:r[0-9]+]], [[A]]
+; CHECK: cmp [[CLSHI]], #31
+; CHECK: addeq r0, [[CLZLO]], #31
+define i32 @cls64(i64 %t) {
+ %cls.i = call i32 @llvm.arm.cls64(i64 %t)
+ ret i32 %cls.i
+}
+
+declare i32 @llvm.arm.cls(i32) nounwind
+declare i32 @llvm.arm.cls64(i64) nounwind
OpenPOWER on IntegriCloud