diff options
| author | Kevin Qin <Kevin.Qin@arm.com> | 2013-12-04 07:53:09 +0000 |
|---|---|---|
| committer | Kevin Qin <Kevin.Qin@arm.com> | 2013-12-04 07:53:09 +0000 |
| commit | 8903f8df4b56608095f1890588896926d66d64b0 (patch) | |
| tree | 8108f4cfefd6082e225b4a68aee80845b6166920 | |
| parent | 06655f357093f25dfdbe83cbc45ef10dd83f7047 (diff) | |
| download | bcm5719-llvm-8903f8df4b56608095f1890588896926d66d64b0.tar.gz bcm5719-llvm-8903f8df4b56608095f1890588896926d66d64b0.zip | |
[AArch64 NEON] Add missing compare intrinsics.
llvm-svn: 196359
| -rw-r--r-- | clang/include/clang/Basic/arm_neon.td | 35 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 34 | ||||
| -rw-r--r-- | clang/test/CodeGen/aarch64-neon-intrinsics.c | 116 | ||||
| -rw-r--r-- | clang/test/CodeGen/aarch64-neon-misc.c | 48 |
4 files changed, 213 insertions, 20 deletions
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index e7f95a21c1a..9097edc4e63 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -732,28 +732,31 @@ def FSQRT : SInst<"vsqrt", "dd", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // Comparison // With additional Qd, Ql, QPl type. -def VVCEQ : IOpInst<"vceq", "udd", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPcPlQPl", - OP_EQ>; -def FCAGE : IInst<"vcage", "udd", "fQfQd">; -def FCAGT : IInst<"vcagt", "udd", "fQfQd">; -def FCALE : IInst<"vcale", "udd", "fQfQd">; -def FCALT : IInst<"vcalt", "udd", "fQfQd">; +def FCAGE : IInst<"vcage", "udd", "fdQfQd">; +def FCAGT : IInst<"vcagt", "udd", "fdQfQd">; +def FCALE : IInst<"vcale", "udd", "fdQfQd">; +def FCALT : IInst<"vcalt", "udd", "fdQfQd">; // With additional Ql, QUl, Qd types. def CMTST : WInst<"vtst", "udd", "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPslUlQlQUlPlQPl">; +// With additional l, Ul,d, Qd, Ql, QUl, Qd types. def CFMEQ : SOpInst<"vceq", "udd", - "csifUcUsUiPcQcQsQiQlQfQUcQUsQUiQUlQPcQd", OP_EQ>; -def CFMGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GE>; -def CFMLE : SOpInst<"vcle", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LE>; -def CFMGT : SOpInst<"vcgt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GT>; -def CFMLT : SOpInst<"vclt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LT>; + "csilfUcUsUiUlPcQcdQdQsQiQfQUcQUsQUiQUlQlQPcPlQPl", OP_EQ>; +def CFMGE : SOpInst<"vcge", "udd", + "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_GE>; +def CFMLE : SOpInst<"vcle", "udd", + "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_LE>; +def CFMGT : SOpInst<"vcgt", "udd", + "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_GT>; +def CFMLT : SOpInst<"vclt", "udd", + "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_LT>; def CMEQ : SInst<"vceqz", "ud", - "csifUcUsUiPcPsQcQsQiQlQfQUcQUsQUiQUlQPcQPsQd">; -def CMGE : SInst<"vcgez", "ud", "csifdQcQsQiQlQfQd">; -def CMLE : SInst<"vclez", "ud", "csifdQcQsQiQlQfQd">; -def CMGT : SInst<"vcgtz", "ud", "csifdQcQsQiQlQfQd">; -def CMLT : SInst<"vcltz", "ud", "csifdQcQsQiQlQfQd">; + "csilfUcUsUiUlPcPsPlQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQdQPl">; +def CMGE : SInst<"vcgez", "ud", "csilfdQcQsQiQlQfQd">; +def CMLE : SInst<"vclez", "ud", "csilfdQcQsQiQlQfQd">; +def CMGT : SInst<"vcgtz", "ud", "csilfdQcQsQiQlQfQd">; +def CMLT : SInst<"vcltz", "ud", "csilfdQcQsQiQlQfQd">; //////////////////////////////////////////////////////////////////////////////// // Max/Min Integer diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 97252cbfdbe..7ca68f13de0 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3007,10 +3007,24 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecps_v, E); case AArch64::BI__builtin_neon_vrecpsq_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpsq_v, E); + case AArch64::BI__builtin_neon_vcale_v: + if (VTy->getVectorNumElements() == 1) { + std::swap(Ops[0], Ops[1]); + } else { + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E); + } case AArch64::BI__builtin_neon_vcage_v: + if (VTy->getVectorNumElements() == 1) { + // Determine the types of this overloaded AArch64 intrinsic + SmallVector<llvm::Type *, 3> Tys; + Tys.push_back(VTy); + VTy = llvm::VectorType::get(DoubleTy, 1); + Tys.push_back(VTy); + Tys.push_back(VTy); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vcage, Tys); + return EmitNeonCall(F, Ops, "vcage"); + } return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcage_v, E); - case AArch64::BI__builtin_neon_vcale_v: - return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E); case AArch64::BI__builtin_neon_vcaleq_v: std::swap(Ops[0], Ops[1]); case AArch64::BI__builtin_neon_vcageq_v: { @@ -3022,8 +3036,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitNeonCall(F, Ops, "vcage"); } case AArch64::BI__builtin_neon_vcalt_v: - return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E); + if (VTy->getVectorNumElements() == 1) { + std::swap(Ops[0], Ops[1]); + } else { + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E); + } case AArch64::BI__builtin_neon_vcagt_v: + if (VTy->getVectorNumElements() == 1) { + // Determine the types of this overloaded AArch64 intrinsic + SmallVector<llvm::Type *, 3> Tys; + Tys.push_back(VTy); + VTy = llvm::VectorType::get(DoubleTy, 1); + Tys.push_back(VTy); + Tys.push_back(VTy); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vcagt, Tys); + return EmitNeonCall(F, Ops, "vcagt"); + } return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcagt_v, E); case AArch64::BI__builtin_neon_vcaltq_v: std::swap(Ops[0], Ops[1]); diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index 8c4ef4cf66f..6e9b7f10b36 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -915,6 +915,12 @@ uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) { // CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcage_f64 + return vcage_f64(a, b); + // CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) { // CHECK: test_vcageq_f32 return vcageq_f32(v1, v2); @@ -933,6 +939,12 @@ uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) { // CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcagt_f64 + return vcagt_f64(a, b); + // CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) { // CHECK: test_vcagtq_f32 return vcagtq_f32(v1, v2); @@ -948,10 +960,16 @@ uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) { uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) { // CHECK: test_vcale_f32 return vcale_f32(v1, v2); - // Using registers other than v0, v1 are possible, but would be odd. + // Using registers other than v0, v1 are possible, but would be odd. // CHECK: facge {{v[0-9]+}}.2s, v1.2s, v0.2s } +uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcale_f64 + return vcale_f64(a, b); + // CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) { // CHECK: test_vcaleq_f32 return vcaleq_f32(v1, v2); @@ -973,6 +991,12 @@ uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) { // CHECK: facgt {{v[0-9]+}}.2s, v1.2s, v0.2s } +uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcalt_f64 + return vcalt_f64(a, b); + // CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) { // CHECK: test_vcaltq_f32 return vcaltq_f32(v1, v2); @@ -1114,12 +1138,30 @@ uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) { // CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) { + // CHECK: test_vceq_s64 + return vceq_s64(a, b); + // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) { + // CHECK: test_vceq_u64 + return vceq_u64(a, b); + // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) { // CHECK: test_vceq_f32 return vceq_f32(v1, v2); // CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vceq_f64 + return vceq_f64(a, b); + // CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK: test_vceq_u8 return vceq_u8(v1, v2); @@ -1228,12 +1270,30 @@ uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) { // CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) { + // CHECK: test_vcge_s64 + return vcge_s64(a, b); + // CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) { + // CHECK: test_vcge_u64 + return vcge_u64(a, b); + // CHECK: cmhs {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) { // CHECK: test_vcge_f32 return vcge_f32(v1, v2); // CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcge_f64 + return vcge_f64(a, b); + // CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK: test_vcge_u8 return vcge_u8(v1, v2); @@ -1333,12 +1393,30 @@ uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) { // CHECK: cmge {{v[0-9]+}}.2s, v1.2s, v0.2s } +uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) { + // CHECK: test_vcle_s64 + return vcle_s64(a, b); + // CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) { + // CHECK: test_vcle_u64 + return vcle_u64(a, b); + // CHECK: cmhs {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) { // CHECK: test_vcle_f32 return vcle_f32(v1, v2); // CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s } +uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcle_f64 + return vcle_f64(a, b); + // CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK: test_vcle_u8 return vcle_u8(v1, v2); @@ -1436,12 +1514,30 @@ uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) { // CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) { + // CHECK: test_vcgt_s64 + return vcgt_s64(a, b); + // CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) { + // CHECK: test_vcgt_u64 + return vcgt_u64(a, b); + // CHECK: cmhi {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) { // CHECK: test_vcgt_f32 return vcgt_f32(v1, v2); // CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vcgt_f64 + return vcgt_f64(a, b); + // CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK: test_vcgt_u8 return vcgt_u8(v1, v2); @@ -1543,12 +1639,30 @@ uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) { // CHECK: cmgt {{v[0-9]+}}.2s, v1.2s, v0.2s } +uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) { + // CHECK: test_vclt_s64 + return vclt_s64(a, b); + // CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) { + // CHECK: test_vclt_u64 + return vclt_u64(a, b); + // CHECK: cmhi {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) { // CHECK: test_vclt_f32 return vclt_f32(v1, v2); // CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s } +uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) { + // CHECK: test_vclt_f64 + return vclt_f64(a, b); + // CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) { // CHECK: test_vclt_u8 return vclt_u8(v1, v2); diff --git a/clang/test/CodeGen/aarch64-neon-misc.c b/clang/test/CodeGen/aarch64-neon-misc.c index 8c2476b693d..f56bf760aa2 100644 --- a/clang/test/CodeGen/aarch64-neon-misc.c +++ b/clang/test/CodeGen/aarch64-neon-misc.c @@ -24,6 +24,24 @@ uint32x2_t test_vceqz_s32(int32x2_t a) { return vceqz_s32(a); } +// CHECK: test_vceqz_s64 +// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, #0x0 +uint64x1_t test_vceqz_s64(int64x1_t a) { + return vceqz_s64(a); +} + +// CHECK: test_vceqz_u64 +// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, #0x0 +uint64x1_t test_vceqz_u64(uint64x1_t a) { + return vceqz_u64(a); +} + +// CHECK: test_vceqz_p64 +// CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, #0x0 +uint64x1_t test_vceqz_p64(poly64x1_t a) { + return vceqz_p64(a); +} + // CHECK: test_vceqzq_s8 // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0 uint8x16_t test_vceqzq_s8(int8x16_t a) { @@ -120,6 +138,12 @@ uint64x2_t test_vceqzq_f64(float64x2_t a) { return vceqzq_f64(a); } +// CHECK: test_vceqzq_p64 +// CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0 +uint64x2_t test_vceqzq_p64(poly64x2_t a) { + return vceqzq_p64(a); +} + // CHECK: test_vcgez_s8 // CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x0 uint8x8_t test_vcgez_s8(int8x8_t a) { @@ -138,6 +162,12 @@ uint32x2_t test_vcgez_s32(int32x2_t a) { return vcgez_s32(a); } +// CHECK: test_vcgez_s64 +// CHECK: cmge {{d[0-9]+}}, {{d[0-9]+}}, #0x0 +uint64x1_t test_vcgez_s64(int64x1_t a) { + return vcgez_s64(a); +} + // CHECK: test_vcgezq_s8 // CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0 uint8x16_t test_vcgezq_s8(int8x16_t a) { @@ -198,6 +228,12 @@ uint32x2_t test_vclez_s32(int32x2_t a) { return vclez_s32(a); } +// CHECK: test_vclez_s64 +// CHECK: cmle {{d[0-9]+}}, {{d[0-9]+}}, #0x0 +uint64x1_t test_vclez_s64(int64x1_t a) { + return vclez_s64(a); +} + // CHECK: test_vclezq_s8 // CHECK: cmle {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0 uint8x16_t test_vclezq_s8(int8x16_t a) { @@ -258,6 +294,12 @@ uint32x2_t test_vcgtz_s32(int32x2_t a) { return vcgtz_s32(a); } +// CHECK: test_vcgtz_s64 +// CHECK: cmgt {{d[0-9]+}}, {{d[0-9]+}}, #0x0 +uint64x1_t test_vcgtz_s64(int64x1_t a) { + return vcgtz_s64(a); +} + // CHECK: test_vcgtzq_s8 // CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x0 uint8x16_t test_vcgtzq_s8(int8x16_t a) { @@ -318,6 +360,12 @@ uint32x2_t test_vcltz_s32(int32x2_t a) { return vcltz_s32(a); } +// CHECK: test_vcltz_s64 +// CHECK: cmlt {{d[0-9]+}}, {{d[0-9]+}}, #0 +uint64x1_t test_vcltz_s64(int64x1_t a) { + return vcltz_s64(a); +} + // CHECK: test_vcltzq_s8 // CHECK: cmlt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0 uint8x16_t test_vcltzq_s8(int8x16_t a) { |

