diff options
| author | Bob Wilson <bob.wilson@apple.com> | 2009-08-11 01:15:26 +0000 |
|---|---|---|
| committer | Bob Wilson <bob.wilson@apple.com> | 2009-08-11 01:15:26 +0000 |
| commit | 741a9c7bf6c758e9a1be4b06420003ec53e90a18 (patch) | |
| tree | 899bb9e81ffde2208f8751e6331d3d06d39e03e7 | |
| parent | 2cd5da8300fe47f801ffc97638f723646b892d8a (diff) | |
| download | bcm5719-llvm-741a9c7bf6c758e9a1be4b06420003ec53e90a18.tar.gz bcm5719-llvm-741a9c7bf6c758e9a1be4b06420003ec53e90a18.zip | |
Use new EVT::vAny type to combine Neon intrinsics for VPADD.
llvm-svn: 78632
| -rw-r--r-- | llvm/include/llvm/IntrinsicsARM.td | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrNEON.td | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/vpadd.ll | 16 |
3 files changed, 16 insertions, 14 deletions
diff --git a/llvm/include/llvm/IntrinsicsARM.td b/llvm/include/llvm/IntrinsicsARM.td index 2a31c504e12..d86dd087107 100644 --- a/llvm/include/llvm/IntrinsicsARM.td +++ b/llvm/include/llvm/IntrinsicsARM.td @@ -42,6 +42,9 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". class Neon_2Arg_Float_Intrinsic : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class Neon_2Arg_Vector_Intrinsic + : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; class Neon_2Arg_Narrow_Intrinsic : Intrinsic<[llvm_anyint_ty], [LLVMExtendedElementVectorType<0>, @@ -194,8 +197,7 @@ def int_arm_neon_vabals : Neon_3Arg_Long_Intrinsic; def int_arm_neon_vabalu : Neon_3Arg_Long_Intrinsic; // Vector Pairwise Add. -def int_arm_neon_vpaddi : Neon_2Arg_Intrinsic; -def int_arm_neon_vpaddf : Neon_2Arg_Float_Intrinsic; +def int_arm_neon_vpadd : Neon_2Arg_Vector_Intrinsic; // Vector Pairwise Add Long. // Note: This is different than the other "long" NEON intrinsics because diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 938ddcf606c..1ed3a619a0f 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -1261,13 +1261,13 @@ def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v4f32, v4f32, // VPADD : Vector Pairwise Add def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, "vpadd.i8", v8i8, v8i8, - int_arm_neon_vpaddi, 0>; + int_arm_neon_vpadd, 0>; def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, "vpadd.i16", v4i16, v4i16, - int_arm_neon_vpaddi, 0>; + int_arm_neon_vpadd, 0>; def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, "vpadd.i32", v2i32, v2i32, - int_arm_neon_vpaddi, 0>; + int_arm_neon_vpadd, 0>; def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, "vpadd.f32", v2f32, v2f32, - int_arm_neon_vpaddf, 0>; + int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s", diff --git a/llvm/test/CodeGen/ARM/vpadd.ll b/llvm/test/CodeGen/ARM/vpadd.ll index baff49227e6..b551fc0f6ab 100644 --- a/llvm/test/CodeGen/ARM/vpadd.ll +++ b/llvm/test/CodeGen/ARM/vpadd.ll @@ -7,33 +7,33 @@ define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm.neon.vpaddi.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind { %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } -declare <8 x i8> @llvm.arm.neon.vpaddi.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone |

