summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Basic/arm_neon.td40
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp16
-rw-r--r--clang/test/CodeGen/aarch64-neon-intrinsics.c968
-rw-r--r--clang/utils/TableGen/NeonEmitter.cpp190
4 files changed, 1199 insertions, 15 deletions
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 1ac5f9b7168..ad10abb1c2b 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -18,15 +18,22 @@ def OP_NONE : Op;
def OP_UNAVAILABLE : Op;
def OP_ADD : Op;
def OP_ADDL : Op;
+def OP_ADDLHi : Op;
def OP_ADDW : Op;
+def OP_ADDWHi : Op;
def OP_SUB : Op;
def OP_SUBL : Op;
+def OP_SUBLHi : Op;
def OP_SUBW : Op;
+def OP_SUBWHi : Op;
def OP_MUL : Op;
def OP_MLA : Op;
def OP_MLAL : Op;
+def OP_MULLHi : Op;
+def OP_MLALHi : Op;
def OP_MLS : Op;
def OP_MLSL : Op;
+def OP_MLSLHi : Op;
def OP_MUL_N : Op;
def OP_MLA_N : Op;
def OP_MLS_N : Op;
@@ -66,9 +73,18 @@ def OP_REV64 : Op;
def OP_REV32 : Op;
def OP_REV16 : Op;
def OP_REINT : Op;
+def OP_ADDHNHi : Op;
+def OP_RADDHNHi : Op;
+def OP_SUBHNHi : Op;
+def OP_RSUBHNHi : Op;
def OP_ABDL : Op;
+def OP_ABDLHi : Op;
def OP_ABA : Op;
def OP_ABAL : Op;
+def OP_ABALHi : Op;
+def OP_QDMULLHi : Op;
+def OP_QDMLALHi : Op;
+def OP_QDMLSLHi : Op;
def OP_DIV : Op;
def OP_LONG_HI : Op;
def OP_NARROW_HI : Op;
@@ -133,6 +149,7 @@ class NoTestOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {}
// w: double width elements, same num elts
// n: double width elements, half num elts
// h: half width elements, double num elts
+// q: half width elements, quad num elts
// e: half width elements, double num elts, unsigned
// m: half width elements, same num elts
// i: constant int
@@ -590,6 +607,29 @@ def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "udi", "Qd">;
}
////////////////////////////////////////////////////////////////////////////////
+// 3VDiff class using high 64-bit in operands
+def VADDL_HIGH : SOpInst<"vaddl_high", "wkk", "csiUcUsUi", OP_ADDLHi>;
+def VADDW_HIGH : SOpInst<"vaddw_high", "wwk", "csiUcUsUi", OP_ADDWHi>;
+def VSUBL_HIGH : SOpInst<"vsubl_high", "wkk", "csiUcUsUi", OP_SUBLHi>;
+def VSUBW_HIGH : SOpInst<"vsubw_high", "wwk", "csiUcUsUi", OP_SUBWHi>;
+
+def VABDL_HIGH : SOpInst<"vabdl_high", "wkk", "csiUcUsUi", OP_ABDLHi>;
+def VABAL_HIGH : SOpInst<"vabal_high", "wwkk", "csiUcUsUi", OP_ABALHi>;
+
+def VMULL_HIGH : SOpInst<"vmull_high", "wkk", "csiUcUsUiPc", OP_MULLHi>;
+def VMLAL_HIGH : SOpInst<"vmlal_high", "wwkk", "csiUcUsUi", OP_MLALHi>;
+def VMLSL_HIGH : SOpInst<"vmlsl_high", "wwkk", "csiUcUsUi", OP_MLSLHi>;
+
+def VADDHN_HIGH : SOpInst<"vaddhn_high", "qhkk", "silUsUiUl", OP_ADDHNHi>;
+def VRADDHN_HIGH : SOpInst<"vraddhn_high", "qhkk", "silUsUiUl", OP_RADDHNHi>;
+def VSUBHN_HIGH : SOpInst<"vsubhn_high", "qhkk", "silUsUiUl", OP_SUBHNHi>;
+def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "qhkk", "silUsUiUl", OP_RSUBHNHi>;
+
+def VQDMULL_HIGH : SOpInst<"vqdmull_high", "wkk", "si", OP_QDMULLHi>;
+def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "wwkk", "si", OP_QDMLALHi>;
+def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>;
+
+////////////////////////////////////////////////////////////////////////////////
// Scalar Arithmetic
// Scalar Addition
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 6bf5d6f54af..5b5b39f5e34 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -1840,6 +1840,22 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E);
case AArch64::BI__builtin_neon_vqrshlq_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E);
+ case AArch64::BI__builtin_neon_vaddhn_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vaddhn_v, E);
+ case AArch64::BI__builtin_neon_vraddhn_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vraddhn_v, E);
+ case AArch64::BI__builtin_neon_vsubhn_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vsubhn_v, E);
+ case AArch64::BI__builtin_neon_vrsubhn_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsubhn_v, E);
+ case AArch64::BI__builtin_neon_vmull_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmull_v, E);
+ case AArch64::BI__builtin_neon_vqdmull_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmull_v, E);
+ case AArch64::BI__builtin_neon_vqdmlal_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlal_v, E);
+ case AArch64::BI__builtin_neon_vqdmlsl_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmlsl_v, E);
case AArch64::BI__builtin_neon_vmax_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E);
case AArch64::BI__builtin_neon_vmaxq_v:
diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c
index 9dce70d50a9..53ec130184c 100644
--- a/clang/test/CodeGen/aarch64-neon-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c
@@ -4274,3 +4274,971 @@ uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
return vcvtq_n_u64_f64(a, 50);
// CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
}
+
+int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vaddl_s8
+ return vaddl_s8(a, b);
+ // CHECK: saddl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vaddl_s16
+ return vaddl_s16(a, b);
+ // CHECK: saddl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vaddl_s32
+ return vaddl_s32(a, b);
+ // CHECK: saddl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vaddl_u8
+ return vaddl_u8(a, b);
+ // CHECK: uaddl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
+ // CHECK: test_vaddl_u16
+ return vaddl_u16(a, b);
+ // CHECK: uaddl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
+ // CHECK: test_vaddl_u32
+ return vaddl_u32(a, b);
+ // CHECK: uaddl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vaddl_high_s8
+ return vaddl_high_s8(a, b);
+ // CHECK: saddl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+
+int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vaddl_high_s16
+ return vaddl_high_s16(a, b);
+ // CHECK: saddl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vaddl_high_s32
+ return vaddl_high_s32(a, b);
+ // CHECK: saddl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vaddl_high_u8
+ return vaddl_high_u8(a, b);
+ // CHECK: uaddl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+
+uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vaddl_high_u16
+ return vaddl_high_u16(a, b);
+ // CHECK: uaddl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vaddl_high_u32
+ return vaddl_high_u32(a, b);
+ // CHECK: uaddl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
+ // CHECK: test_vaddw_s8
+ return vaddw_s8(a, b);
+ // CHECK: saddw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
+}
+
+int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
+ // CHECK: test_vaddw_s16
+ return vaddw_s16(a, b);
+ // CHECK: saddw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
+ // CHECK: test_vaddw_s32
+ return vaddw_s32(a, b);
+ // CHECK: saddw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
+}
+
+uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
+ // CHECK: test_vaddw_u8
+ return vaddw_u8(a, b);
+ // CHECK: uaddw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
+}
+
+uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
+ // CHECK: test_vaddw_u16
+ return vaddw_u16(a, b);
+ // CHECK: uaddw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
+}
+
+uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
+ // CHECK: test_vaddw_u32
+ return vaddw_u32(a, b);
+ // CHECK: uaddw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
+ // CHECK: test_vaddw_high_s8
+ return vaddw_high_s8(a, b);
+ // CHECK: saddw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
+}
+
+int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
+ // CHECK: test_vaddw_high_s16
+ return vaddw_high_s16(a, b);
+ // CHECK: saddw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
+ // CHECK: test_vaddw_high_s32
+ return vaddw_high_s32(a, b);
+ // CHECK: saddw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
+}
+
+uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
+ // CHECK: test_vaddw_high_u8
+ return vaddw_high_u8(a, b);
+ // CHECK: uaddw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
+}
+
+uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
+ // CHECK: test_vaddw_high_u16
+ return vaddw_high_u16(a, b);
+ // CHECK: uaddw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
+}
+
+uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
+ // CHECK: test_vaddw_high_u32
+ return vaddw_high_u32(a, b);
+ // CHECK: uaddw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vsubl_s8
+ return vsubl_s8(a, b);
+ // CHECK: ssubl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vsubl_s16
+ return vsubl_s16(a, b);
+ // CHECK: ssubl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vsubl_s32
+ return vsubl_s32(a, b);
+ // CHECK: ssubl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vsubl_u8
+ return vsubl_u8(a, b);
+ // CHECK: usubl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
+ // CHECK: test_vsubl_u16
+ return vsubl_u16(a, b);
+ // CHECK: usubl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
+ // CHECK: test_vsubl_u32
+ return vsubl_u32(a, b);
+ // CHECK: usubl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vsubl_high_s8
+ return vsubl_high_s8(a, b);
+ // CHECK: ssubl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+
+int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vsubl_high_s16
+ return vsubl_high_s16(a, b);
+ // CHECK: ssubl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vsubl_high_s32
+ return vsubl_high_s32(a, b);
+ // CHECK: ssubl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vsubl_high_u8
+ return vsubl_high_u8(a, b);
+ // CHECK: usubl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+
+uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vsubl_high_u16
+ return vsubl_high_u16(a, b);
+ // CHECK: usubl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vsubl_high_u32
+ return vsubl_high_u32(a, b);
+ // CHECK: usubl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
+ // CHECK: test_vsubw_s8
+ return vsubw_s8(a, b);
+ // CHECK: ssubw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
+}
+
+int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
+ // CHECK: test_vsubw_s16
+ return vsubw_s16(a, b);
+ // CHECK: ssubw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
+ // CHECK: test_vsubw_s32
+ return vsubw_s32(a, b);
+ // CHECK: ssubw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
+}
+
+uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
+ // CHECK: test_vsubw_u8
+ return vsubw_u8(a, b);
+ // CHECK: usubw {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8b
+}
+
+uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
+ // CHECK: test_vsubw_u16
+ return vsubw_u16(a, b);
+ // CHECK: usubw {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4h
+}
+
+uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
+ // CHECK: test_vsubw_u32
+ return vsubw_u32(a, b);
+ // CHECK: usubw {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
+ // CHECK: test_vsubw_high_s8
+ return vsubw_high_s8(a, b);
+ // CHECK: ssubw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
+}
+
+int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
+ // CHECK: test_vsubw_high_s16
+ return vsubw_high_s16(a, b);
+ // CHECK: ssubw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
+ // CHECK: test_vsubw_high_s32
+ return vsubw_high_s32(a, b);
+ // CHECK: ssubw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
+}
+
+uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
+ // CHECK: test_vsubw_high_u8
+ return vsubw_high_u8(a, b);
+ // CHECK: usubw2 {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.16b
+}
+
+uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
+ // CHECK: test_vsubw_high_u16
+ return vsubw_high_u16(a, b);
+ // CHECK: usubw2 {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.8h
+}
+
+uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
+ // CHECK: test_vsubw_high_u32
+ return vsubw_high_u32(a, b);
+ // CHECK: usubw2 {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.4s
+}
+
+int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vaddhn_s16
+ return vaddhn_s16(a, b);
+ // CHECK: addhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vaddhn_s32
+ return vaddhn_s32(a, b);
+ // CHECK: addhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
+ // CHECK: test_vaddhn_s64
+ return vaddhn_s64(a, b);
+ // CHECK: addhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vaddhn_u16
+ return vaddhn_u16(a, b);
+ // CHECK: addhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vaddhn_u32
+ return vaddhn_u32(a, b);
+ // CHECK: addhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vaddhn_u64
+ return vaddhn_u64(a, b);
+ // CHECK: addhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+ // CHECK: test_vaddhn_high_s16
+ return vaddhn_high_s16(r, a, b);
+ // CHECK: addhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+ // CHECK: test_vaddhn_high_s32
+ return vaddhn_high_s32(r, a, b);
+ // CHECK: addhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+ // CHECK: test_vaddhn_high_s64
+ return vaddhn_high_s64(r, a, b);
+ // CHECK: addhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vaddhn_high_u16
+ return vaddhn_high_u16(r, a, b);
+ // CHECK: addhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vaddhn_high_u32
+ return vaddhn_high_u32(r, a, b);
+ // CHECK: addhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vaddhn_high_u64
+ return vaddhn_high_u64(r, a, b);
+ // CHECK: addhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vraddhn_s16
+ return vraddhn_s16(a, b);
+ // CHECK: raddhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vraddhn_s32
+ return vraddhn_s32(a, b);
+ // CHECK: raddhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
+ // CHECK: test_vraddhn_s64
+ return vraddhn_s64(a, b);
+ // CHECK: raddhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vraddhn_u16
+ return vraddhn_u16(a, b);
+ // CHECK: raddhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vraddhn_u32
+ return vraddhn_u32(a, b);
+ // CHECK: raddhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vraddhn_u64
+ return vraddhn_u64(a, b);
+ // CHECK: raddhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+ // CHECK: test_vraddhn_high_s16
+ return vraddhn_high_s16(r, a, b);
+ // CHECK: raddhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+ // CHECK: test_vraddhn_high_s32
+ return vraddhn_high_s32(r, a, b);
+ // CHECK: raddhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+ // CHECK: test_vraddhn_high_s64
+ return vraddhn_high_s64(r, a, b);
+ // CHECK: raddhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vraddhn_high_u16
+ return vraddhn_high_u16(r, a, b);
+ // CHECK: raddhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vraddhn_high_u32
+ return vraddhn_high_u32(r, a, b);
+ // CHECK: raddhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vraddhn_high_u64
+ return vraddhn_high_u64(r, a, b);
+ // CHECK: raddhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vsubhn_s16
+ return vsubhn_s16(a, b);
+ // CHECK: subhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vsubhn_s32
+ return vsubhn_s32(a, b);
+ // CHECK: subhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
+ // CHECK: test_vsubhn_s64
+ return vsubhn_s64(a, b);
+ // CHECK: subhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vsubhn_u16
+ return vsubhn_u16(a, b);
+ // CHECK: subhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vsubhn_u32
+ return vsubhn_u32(a, b);
+ // CHECK: subhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vsubhn_u64
+ return vsubhn_u64(a, b);
+ // CHECK: subhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+ // CHECK: test_vsubhn_high_s16
+ return vsubhn_high_s16(r, a, b);
+ // CHECK: subhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+ // CHECK: test_vsubhn_high_s32
+ return vsubhn_high_s32(r, a, b);
+ // CHECK: subhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+ // CHECK: test_vsubhn_high_s64
+ return vsubhn_high_s64(r, a, b);
+ // CHECK: subhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vsubhn_high_u16
+ return vsubhn_high_u16(r, a, b);
+ // CHECK: subhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vsubhn_high_u32
+ return vsubhn_high_u32(r, a, b);
+ // CHECK: subhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vsubhn_high_u64
+ return vsubhn_high_u64(r, a, b);
+ // CHECK: subhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vrsubhn_s16
+ return vrsubhn_s16(a, b);
+ // CHECK: rsubhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vrsubhn_s32
+ return vrsubhn_s32(a, b);
+ // CHECK: rsubhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
+ // CHECK: test_vrsubhn_s64
+ return vrsubhn_s64(a, b);
+ // CHECK: rsubhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vrsubhn_u16
+ return vrsubhn_u16(a, b);
+ // CHECK: rsubhn {{v[0-31]+}}.8b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vrsubhn_u32
+ return vrsubhn_u32(a, b);
+ // CHECK: rsubhn {{v[0-31]+}}.4h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vrsubhn_u64
+ return vrsubhn_u64(a, b);
+ // CHECK: rsubhn {{v[0-31]+}}.2s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+ // CHECK: test_vrsubhn_high_s16
+ return vrsubhn_high_s16(r, a, b);
+ // CHECK: rsubhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+ // CHECK: test_vrsubhn_high_s32
+ return vrsubhn_high_s32(r, a, b);
+ // CHECK: rsubhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+ // CHECK: test_vrsubhn_high_s64
+ return vrsubhn_high_s64(r, a, b);
+ // CHECK: rsubhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vrsubhn_high_u16
+ return vrsubhn_high_u16(r, a, b);
+ // CHECK: rsubhn2 {{v[0-31]+}}.16b, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vrsubhn_high_u32
+ return vrsubhn_high_u32(r, a, b);
+ // CHECK: rsubhn2 {{v[0-31]+}}.8h, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+ // CHECK: test_vrsubhn_high_u64
+ return vrsubhn_high_u64(r, a, b);
+ // CHECK: rsubhn2 {{v[0-31]+}}.4s, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
+}
+
+int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vabdl_s8
+ return vabdl_s8(a, b);
+ // CHECK: sabdl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vabdl_s16
+ return vabdl_s16(a, b);
+ // CHECK: sabdl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vabdl_s32
+ return vabdl_s32(a, b);
+ // CHECK: sabdl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vabdl_u8
+ return vabdl_u8(a, b);
+ // CHECK: uabdl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
+ // CHECK: test_vabdl_u16
+ return vabdl_u16(a, b);
+ // CHECK: uabdl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
+ // CHECK: test_vabdl_u32
+ return vabdl_u32(a, b);
+ // CHECK: uabdl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+ // CHECK: test_vabal_s8
+ return vabal_s8(a, b, c);
+ // CHECK: sabal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+ // CHECK: test_vabal_s16
+ return vabal_s16(a, b, c);
+ // CHECK: sabal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+ // CHECK: test_vabal_s32
+ return vabal_s32(a, b, c);
+ // CHECK: sabal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+ // CHECK: test_vabal_u8
+ return vabal_u8(a, b, c);
+ // CHECK: uabal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+ // CHECK: test_vabal_u16
+ return vabal_u16(a, b, c);
+ // CHECK: uabal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+ // CHECK: test_vabal_u32
+ return vabal_u32(a, b, c);
+ // CHECK: uabal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vabdl_high_s8
+ return vabdl_high_s8(a, b);
+ // CHECK: sabdl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vabdl_high_s16
+ return vabdl_high_s16(a, b);
+ // CHECK: sabdl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vabdl_high_s32
+ return vabdl_high_s32(a, b);
+ // CHECK: sabdl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vabdl_high_u8
+ return vabdl_high_u8(a, b);
+ // CHECK: uabdl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vabdl_high_u16
+ return vabdl_high_u16(a, b);
+ // CHECK: uabdl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vabdl_high_u32
+ return vabdl_high_u32(a, b);
+ // CHECK: uabdl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+ // CHECK: test_vabal_high_s8
+ return vabal_high_s8(a, b, c);
+ // CHECK: sabal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+ // CHECK: test_vabal_high_s16
+ return vabal_high_s16(a, b, c);
+ // CHECK: sabal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+ // CHECK: test_vabal_high_s32
+ return vabal_high_s32(a, b, c);
+ // CHECK: sabal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+ // CHECK: test_vabal_high_u8
+ return vabal_high_u8(a, b, c);
+ // CHECK: uabal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+ // CHECK: test_vabal_high_u16
+ return vabal_high_u16(a, b, c);
+ // CHECK: uabal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+ // CHECK: test_vabal_high_u32
+ return vabal_high_u32(a, b, c);
+ // CHECK: uabal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
+ // CHECK: test_vmull_s8
+ return vmull_s8(a, b);
+ // CHECK: smull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vmull_s16
+ return vmull_s16(a, b);
+ // CHECK: smull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vmull_s32
+ return vmull_s32(a, b);
+ // CHECK: smull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
+ // CHECK: test_vmull_u8
+ return vmull_u8(a, b);
+ // CHECK: umull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
+ // CHECK: test_vmull_u16
+ return vmull_u16(a, b);
+ // CHECK: umull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
+ // CHECK: test_vmull_u32
+ return vmull_u32(a, b);
+ // CHECK: umull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
+ // CHECK: test_vmull_high_s8
+ return vmull_high_s8(a, b);
+ // CHECK: smull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vmull_high_s16
+ return vmull_high_s16(a, b);
+ // CHECK: smull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vmull_high_s32
+ return vmull_high_s32(a, b);
+ // CHECK: smull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
+ // CHECK: test_vmull_high_u8
+ return vmull_high_u8(a, b);
+ // CHECK: umull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
+ // CHECK: test_vmull_high_u16
+ return vmull_high_u16(a, b);
+ // CHECK: umull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
+ // CHECK: test_vmull_high_u32
+ return vmull_high_u32(a, b);
+ // CHECK: umull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+ // CHECK: test_vmlal_s8
+ return vmlal_s8(a, b, c);
+ // CHECK: smlal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+ // CHECK: test_vmlal_s16
+ return vmlal_s16(a, b, c);
+ // CHECK: smlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+ // CHECK: test_vmlal_s32
+ return vmlal_s32(a, b, c);
+ // CHECK: smlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+ // CHECK: test_vmlal_u8
+ return vmlal_u8(a, b, c);
+ // CHECK: umlal {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+ // CHECK: test_vmlal_u16
+ return vmlal_u16(a, b, c);
+ // CHECK: umlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+ // CHECK: test_vmlal_u32
+ return vmlal_u32(a, b, c);
+ // CHECK: umlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+ // CHECK: test_vmlal_high_s8
+ return vmlal_high_s8(a, b, c);
+ // CHECK: smlal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+ // CHECK: test_vmlal_high_s16
+ return vmlal_high_s16(a, b, c);
+ // CHECK: smlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+ // CHECK: test_vmlal_high_s32
+ return vmlal_high_s32(a, b, c);
+ // CHECK: smlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+ // CHECK: test_vmlal_high_u8
+ return vmlal_high_u8(a, b, c);
+ // CHECK: umlal2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+ // CHECK: test_vmlal_high_u16
+ return vmlal_high_u16(a, b, c);
+ // CHECK: umlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+ // CHECK: test_vmlal_high_u32
+ return vmlal_high_u32(a, b, c);
+ // CHECK: umlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+ // CHECK: test_vmlsl_s8
+ return vmlsl_s8(a, b, c);
+ // CHECK: smlsl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+ // CHECK: test_vmlsl_s16
+ return vmlsl_s16(a, b, c);
+ // CHECK: smlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+ // CHECK: test_vmlsl_s32
+ return vmlsl_s32(a, b, c);
+ // CHECK: smlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+ // CHECK: test_vmlsl_u8
+ return vmlsl_u8(a, b, c);
+ // CHECK: umlsl {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+ // CHECK: test_vmlsl_u16
+ return vmlsl_u16(a, b, c);
+ // CHECK: umlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+ // CHECK: test_vmlsl_u32
+ return vmlsl_u32(a, b, c);
+ // CHECK: umlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+ // CHECK: test_vmlsl_high_s8
+ return vmlsl_high_s8(a, b, c);
+ // CHECK: smlsl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+ // CHECK: test_vmlsl_high_s16
+ return vmlsl_high_s16(a, b, c);
+ // CHECK: smlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+ // CHECK: test_vmlsl_high_s32
+ return vmlsl_high_s32(a, b, c);
+ // CHECK: smlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+ // CHECK: test_vmlsl_high_u8
+ return vmlsl_high_u8(a, b, c);
+ // CHECK: umlsl2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
+uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+ // CHECK: test_vmlsl_high_u16
+ return vmlsl_high_u16(a, b, c);
+ // CHECK: umlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+ // CHECK: test_vmlsl_high_u32
+ return vmlsl_high_u32(a, b, c);
+ // CHECK: umlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
+ // CHECK: test_vqdmull_s16
+ return vqdmull_s16(a, b);
+ // CHECK: sqdmull {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
+ // CHECK: test_vqdmull_s32
+ return vqdmull_s32(a, b);
+ // CHECK: sqdmull {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+ // CHECK: test_vqdmlal_s16
+ return vqdmlal_s16(a, b, c);
+ // CHECK: sqdmlal {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+ // CHECK: test_vqdmlal_s32
+ return vqdmlal_s32(a, b, c);
+ // CHECK: sqdmlal {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+ // CHECK: test_vqdmlsl_s16
+ return vqdmlsl_s16(a, b, c);
+ // CHECK: sqdmlsl {{v[0-31]+}}.4s, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
+}
+
+int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+ // CHECK: test_vqdmlsl_s32
+ return vqdmlsl_s32(a, b, c);
+ // CHECK: sqdmlsl {{v[0-31]+}}.2d, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
+}
+
+int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
+ // CHECK: test_vqdmull_high_s16
+ return vqdmull_high_s16(a, b);
+ // CHECK: sqdmull2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
+ // CHECK: test_vqdmull_high_s32
+ return vqdmull_high_s32(a, b);
+ // CHECK: sqdmull2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+ // CHECK: test_vqdmlal_high_s16
+ return vqdmlal_high_s16(a, b, c);
+ // CHECK: sqdmlal2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+ // CHECK: test_vqdmlal_high_s32
+ return vqdmlal_high_s32(a, b, c);
+ // CHECK: sqdmlal2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+ // CHECK: test_vqdmlsl_high_s16
+ return vqdmlsl_high_s16(a, b, c);
+ // CHECK: sqdmlsl2 {{v[0-31]+}}.4s, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
+}
+
+int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+ // CHECK: test_vqdmlsl_high_s32
+ return vqdmlsl_high_s32(a, b, c);
+ // CHECK: sqdmlsl2 {{v[0-31]+}}.2d, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
+}
+
+poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
+ // CHECK: test_vmull_p8
+ return vmull_p8(a, b);
+ // CHECK: pmull {{v[0-31]+}}.8h, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
+}
+
+poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
+ // CHECK: test_vmull_high_p8
+ return vmull_high_p8(a, b);
+ // CHECK: pmull2 {{v[0-31]+}}.8h, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
+}
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index d8f203d3df4..f700c6753a6 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -40,15 +40,22 @@ enum OpKind {
OpUnavailable,
OpAdd,
OpAddl,
+ OpAddlHi,
OpAddw,
+ OpAddwHi,
OpSub,
OpSubl,
+ OpSublHi,
OpSubw,
+ OpSubwHi,
OpMul,
OpMla,
OpMlal,
+ OpMullHi,
+ OpMlalHi,
OpMls,
OpMlsl,
+ OpMlslHi,
OpMulN,
OpMlaN,
OpMlsN,
@@ -88,9 +95,18 @@ enum OpKind {
OpRev32,
OpRev64,
OpReinterpret,
+ OpAddhnHi,
+ OpRAddhnHi,
+ OpSubhnHi,
+ OpRSubhnHi,
OpAbdl,
+ OpAbdlHi,
OpAba,
OpAbal,
+ OpAbalHi,
+ OpQDMullHi,
+ OpQDMlalHi,
+ OpQDMlslHi,
OpDiv,
OpLongHi,
OpNarrowHi,
@@ -159,15 +175,22 @@ public:
OpMap["OP_UNAVAILABLE"] = OpUnavailable;
OpMap["OP_ADD"] = OpAdd;
OpMap["OP_ADDL"] = OpAddl;
+ OpMap["OP_ADDLHi"] = OpAddlHi;
OpMap["OP_ADDW"] = OpAddw;
+ OpMap["OP_ADDWHi"] = OpAddwHi;
OpMap["OP_SUB"] = OpSub;
OpMap["OP_SUBL"] = OpSubl;
+ OpMap["OP_SUBLHi"] = OpSublHi;
OpMap["OP_SUBW"] = OpSubw;
+ OpMap["OP_SUBWHi"] = OpSubwHi;
OpMap["OP_MUL"] = OpMul;
OpMap["OP_MLA"] = OpMla;
OpMap["OP_MLAL"] = OpMlal;
+ OpMap["OP_MULLHi"] = OpMullHi;
+ OpMap["OP_MLALHi"] = OpMlalHi;
OpMap["OP_MLS"] = OpMls;
OpMap["OP_MLSL"] = OpMlsl;
+ OpMap["OP_MLSLHi"] = OpMlslHi;
OpMap["OP_MUL_N"] = OpMulN;
OpMap["OP_MLA_N"] = OpMlaN;
OpMap["OP_MLS_N"] = OpMlsN;
@@ -207,9 +230,18 @@ public:
OpMap["OP_REV32"] = OpRev32;
OpMap["OP_REV64"] = OpRev64;
OpMap["OP_REINT"] = OpReinterpret;
+ OpMap["OP_ADDHNHi"] = OpAddhnHi;
+ OpMap["OP_RADDHNHi"] = OpRAddhnHi;
+ OpMap["OP_SUBHNHi"] = OpSubhnHi;
+ OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
OpMap["OP_ABDL"] = OpAbdl;
+ OpMap["OP_ABDLHi"] = OpAbdlHi;
OpMap["OP_ABA"] = OpAba;
OpMap["OP_ABAL"] = OpAbal;
+ OpMap["OP_ABALHi"] = OpAbalHi;
+ OpMap["OP_QDMULLHi"] = OpQDMullHi;
+ OpMap["OP_QDMLALHi"] = OpQDMlalHi;
+ OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
OpMap["OP_DIV"] = OpDiv;
OpMap["OP_LONG_HI"] = OpLongHi;
OpMap["OP_NARROW_HI"] = OpNarrowHi;
@@ -326,6 +358,29 @@ static char Narrow(const char t) {
}
}
+static std::string GetNarrowTypestr(StringRef ty)
+{
+ std::string s;
+ for (size_t i = 0, end = ty.size(); i < end; i++) {
+ switch (ty[i]) {
+ case 's':
+ s += 'c';
+ break;
+ case 'i':
+ s += 's';
+ break;
+ case 'l':
+ s += 'i';
+ break;
+ default:
+ s += ty[i];
+ break;
+ }
+ }
+
+ return s;
+}
+
/// For a particular StringRef, return the base type code, and whether it has
/// the quad-vector, polynomial, or unsigned modifiers set.
static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
@@ -426,6 +481,10 @@ static char ModType(const char mod, char type, bool &quad, bool &poly,
if (type == 'h')
quad = false;
break;
+ case 'q':
+ type = Narrow(type);
+ quad = true;
+ break;
case 'e':
type = Narrow(type);
usgn = true;
@@ -1286,13 +1345,60 @@ static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
}
// Use the vmovl builtin to sign-extend or zero-extend a vector.
-static std::string Extend(StringRef typestr, const std::string &a) {
+static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
+ std::string s, high;
+ high = h ? "_high" : "";
+ s = MangleName("vmovl" + high, typestr, ClassS);
+ s += "(" + a + ")";
+ return s;
+}
+
+// Get the high 64-bit part of a vector
+static std::string GetHigh(const std::string &a, StringRef typestr) {
std::string s;
- s = MangleName("vmovl", typestr, ClassS);
+ s = MangleName("vget_high", typestr, ClassS);
s += "(" + a + ")";
return s;
}
+// Gen operation with two operands and get high 64-bit for both of two operands.
+static std::string Gen2OpWith2High(StringRef typestr,
+ const std::string &op,
+ const std::string &a,
+ const std::string &b) {
+ std::string s;
+ std::string Op1 = GetHigh(a, typestr);
+ std::string Op2 = GetHigh(b, typestr);
+ s = MangleName(op, typestr, ClassS);
+ s += "(" + Op1 + ", " + Op2 + ");";
+ return s;
+}
+
+// Gen operation with three operands and get high 64-bit of the latter
+// two operands.
+static std::string Gen3OpWith2High(StringRef typestr,
+ const std::string &op,
+ const std::string &a,
+ const std::string &b,
+ const std::string &c) {
+ std::string s;
+ std::string Op1 = GetHigh(b, typestr);
+ std::string Op2 = GetHigh(c, typestr);
+ s = MangleName(op, typestr, ClassS);
+ s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
+ return s;
+}
+
+// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
+static std::string GenCombine(std::string typestr,
+ const std::string &a,
+ const std::string &b) {
+ std::string s;
+ s = MangleName("vcombine", typestr, ClassS);
+ s += "(" + a + ", " + b + ")";
+ return s;
+}
+
static std::string Duplicate(unsigned nElts, StringRef typestr,
const std::string &a) {
std::string s;
@@ -1368,18 +1474,30 @@ static std::string GenOpString(const std::string &name, OpKind op,
case OpAddl:
s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
break;
+ case OpAddlHi:
+ s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
+ break;
case OpAddw:
s += "__a + " + Extend(typestr, "__b") + ";";
break;
+ case OpAddwHi:
+ s += "__a + " + Extend(typestr, "__b", 1) + ";";
+ break;
case OpSub:
s += "__a - __b;";
break;
case OpSubl:
s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
break;
+ case OpSublHi:
+ s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
+ break;
case OpSubw:
s += "__a - " + Extend(typestr, "__b") + ";";
break;
+ case OpSubwHi:
+ s += "__a - " + Extend(typestr, "__b", 1) + ";";
+ break;
case OpMulN:
s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
break;
@@ -1413,6 +1531,12 @@ static std::string GenOpString(const std::string &name, OpKind op,
case OpMlal:
s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
break;
+ case OpMullHi:
+ s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
+ break;
+ case OpMlalHi:
+ s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
+ break;
case OpMlsN:
s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
break;
@@ -1433,6 +1557,9 @@ static std::string GenOpString(const std::string &name, OpKind op,
case OpMlsl:
s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
break;
+ case OpMlslHi:
+ s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
+ break;
case OpQDMullLane:
s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
SplatLane(nElts, "__b", "__c") + ");";
@@ -1560,23 +1687,51 @@ static std::string GenOpString(const std::string &name, OpKind op,
}
break;
}
+ case OpAbdlHi:
+ s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
+ break;
+ case OpAddhnHi: {
+ std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
+ s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
+ s += ";";
+ break;
+ }
+ case OpRAddhnHi: {
+ std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
+ s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
+ s += ";";
+ break;
+ }
+ case OpSubhnHi: {
+ std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
+ s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
+ s += ";";
+ break;
+ }
+ case OpRSubhnHi: {
+ std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
+ s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
+ s += ";";
+ break;
+ }
case OpAba:
s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
break;
- case OpAbal: {
- s += "__a + ";
- std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
- if (typestr[0] != 'U') {
- // vabd results are always unsigned and must be zero-extended.
- std::string utype = "U" + typestr.str();
- s += "(" + TypeString(proto[0], typestr) + ")";
- abd = "(" + TypeString('d', utype) + ")" + abd;
- s += Extend(utype, abd) + ";";
- } else {
- s += Extend(typestr, abd) + ";";
- }
+ case OpAbal:
+ s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
+ break;
+ case OpAbalHi:
+ s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
+ break;
+ case OpQDMullHi:
+ s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
+ break;
+ case OpQDMlalHi:
+ s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
+ break;
+ case OpQDMlslHi:
+ s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
break;
- }
case OpDiv:
s += "__a / __b;";
break;
@@ -1993,6 +2148,7 @@ void NeonEmitter::run(raw_ostream &OS) {
emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
+ emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
// ARM intrinsics must be emitted before AArch64 intrinsics to ensure
// common intrinsics appear only once in the output stream.
@@ -2014,6 +2170,10 @@ void NeonEmitter::run(raw_ostream &OS) {
// Emit AArch64-specific intrinsics.
OS << "#ifdef __aarch64__\n";
+ emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
+ emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
+ emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
+
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
OpenPOWER on IntegriCloud