diff options
author | rearnsha <rearnsha@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-11-29 17:51:40 +0000 |
---|---|---|
committer | rearnsha <rearnsha@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-11-29 17:51:40 +0000 |
commit | 189fec2b7255e21e5588bfb84c053bdfb488f2bd (patch) | |
tree | 0453edaac611d759afad05d1d702d72031be7ee3 /gcc | |
parent | 394dd2c1f601babb5975fc5333fcd0e273ec7a79 (diff) | |
download | ppe42-gcc-189fec2b7255e21e5588bfb84c053bdfb488f2bd.tar.gz ppe42-gcc-189fec2b7255e21e5588bfb84c053bdfb488f2bd.zip |
PR target/55073
* arm/neon.md (neon_vtrn<mode>_internal): Split into expand
and insn patterns. Re-order insn arguments to tie inputs to
outputs.
(neon_vzip<mode>_internal): Likewise.
(neon_vuzp<mode>_internal): Likewise.
* gcc.target/arm/pr55073.C: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@193943 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 69 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/pr55073.C | 74 |
4 files changed, 142 insertions, 15 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8ca3588b3ee..7560e28eb87 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2012-11-29 Richard Earnshaw <rearnsha@arm.com> + + PR target/55073 + * arm/neon.md (neon_vtrn<mode>_internal): Split into expand + and insn patterns. Re-order insn arguments to tie inputs to + outputs. + (neon_vzip<mode>_internal): Likewise. + (neon_vuzp<mode>_internal): Likewise. + 2012-11-29 Marc Glisse <marc.glisse@inria.fr> PR c++/53094 diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 8f84795334b..0822049a3e9 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4411,16 +4411,29 @@ [(set_attr "neon_type" "neon_bp_3cycle")] ) -(define_insn "neon_vtrn<mode>_internal" +(define_expand "neon_vtrn<mode>_internal" + [(parallel + [(set (match_operand:VDQW 0 "s_register_operand" "") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") + (match_operand:VDQW 2 "s_register_operand" "")] + UNSPEC_VTRN1)) + (set (match_operand:VDQW 3 "s_register_operand" "") + (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] + "TARGET_NEON" + "" +) + +;; Note: Different operand numbering to handle tied registers correctly. +(define_insn "*neon_vtrn<mode>_insn" [(set (match_operand:VDQW 0 "s_register_operand" "=w") (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") - (match_operand:VDQW 2 "s_register_operand" "w")] + (match_operand:VDQW 3 "s_register_operand" "2")] UNSPEC_VTRN1)) - (set (match_operand:VDQW 3 "s_register_operand" "=2") - (unspec:VDQW [(match_dup 1) (match_dup 2)] + (set (match_operand:VDQW 2 "s_register_operand" "=w") + (unspec:VDQW [(match_dup 1) (match_dup 3)] UNSPEC_VTRN2))] "TARGET_NEON" - "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" + "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" [(set (attr "neon_type") (if_then_else (match_test "<Is_d_reg>") (const_string "neon_bp_simple") @@ -4438,16 +4451,29 @@ DONE; }) -(define_insn "neon_vzip<mode>_internal" +(define_expand "neon_vzip<mode>_internal" + [(parallel + [(set (match_operand:VDQW 0 "s_register_operand" "") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") + (match_operand:VDQW 2 "s_register_operand" "")] + UNSPEC_VZIP1)) + (set (match_operand:VDQW 3 "s_register_operand" "") + (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] + "TARGET_NEON" + "" +) + +;; Note: Different operand numbering to handle tied registers correctly. +(define_insn "*neon_vzip<mode>_insn" [(set (match_operand:VDQW 0 "s_register_operand" "=w") (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") - (match_operand:VDQW 2 "s_register_operand" "w")] + (match_operand:VDQW 3 "s_register_operand" "2")] UNSPEC_VZIP1)) - (set (match_operand:VDQW 3 "s_register_operand" "=2") - (unspec:VDQW [(match_dup 1) (match_dup 2)] + (set (match_operand:VDQW 2 "s_register_operand" "=w") + (unspec:VDQW [(match_dup 1) (match_dup 3)] UNSPEC_VZIP2))] "TARGET_NEON" - "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" + "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" [(set (attr "neon_type") (if_then_else (match_test "<Is_d_reg>") (const_string "neon_bp_simple") @@ -4465,16 +4491,29 @@ DONE; }) -(define_insn "neon_vuzp<mode>_internal" +(define_expand "neon_vuzp<mode>_internal" + [(parallel + [(set (match_operand:VDQW 0 "s_register_operand" "") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") + (match_operand:VDQW 2 "s_register_operand" "")] + UNSPEC_VUZP1)) + (set (match_operand:VDQW 3 "s_register_operand" "") + (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] + "TARGET_NEON" + "" +) + +;; Note: Different operand numbering to handle tied registers correctly. +(define_insn "*neon_vuzp<mode>_insn" [(set (match_operand:VDQW 0 "s_register_operand" "=w") (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") - (match_operand:VDQW 2 "s_register_operand" "w")] + (match_operand:VDQW 3 "s_register_operand" "2")] UNSPEC_VUZP1)) - (set (match_operand:VDQW 3 "s_register_operand" "=2") - (unspec:VDQW [(match_dup 1) (match_dup 2)] + (set (match_operand:VDQW 2 "s_register_operand" "=w") + (unspec:VDQW [(match_dup 1) (match_dup 3)] UNSPEC_VUZP2))] "TARGET_NEON" - "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" + "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" [(set (attr "neon_type") (if_then_else (match_test "<Is_d_reg>") (const_string "neon_bp_simple") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4e623fb7df2..996347b764f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2012-11-29 Richard Earnshaw <rearnsha@arm.com> + + PR target/55073 + * gcc.target/arm/pr55073.C: New test. + 2012-11-29 Marc Glisse <marc.glisse@inria.fr> PR c++/53094 diff --git a/gcc/testsuite/gcc.target/arm/pr55073.C b/gcc/testsuite/gcc.target/arm/pr55073.C new file mode 100644 index 00000000000..5575cf77911 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr55073.C @@ -0,0 +1,74 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> +#include <stdlib.h> + +struct __attribute__((aligned(16))) _v16u8_ { + uint8x16_t val; + _v16u8_() { } + + _v16u8_( const uint8x16_t &src) { val = src; } + _v16u8_( const int16x8_t &src) { val = vreinterpretq_u8_s16(src); } + _v16u8_( const uint32x4_t &src) { val = vreinterpretq_u8_u32(src); } + + operator uint8x16_t () const { return val; } + operator int8x16_t () const { return vreinterpretq_s8_u8 (val); } + operator int16x8_t () const { return vreinterpretq_s16_u8(val); } + operator uint32x4_t () const { return vreinterpretq_u32_u8(val); } + operator int32x4_t () const { return vreinterpretq_s32_u8(val); } +}; +typedef struct _v16u8_ v16u8; +typedef const v16u8 cv16u8; + +typedef v16u8 v16i8; +typedef v16u8 v8i16; +typedef v16u8 v4u32; + +inline v16u8 __attribute__((always_inline)) mergelo( const v16u8 & s, const v16u8 & t ) +{ + uint8x8x2_t r = vzip_u8( vget_low_u8(s), vget_low_u8(t) ); + return vcombine_u8( r.val[0], r.val[1] ); +} + +inline v8i16 __attribute__((always_inline)) unpacklo(const v16i8 & s) +{ + return vmovl_s8( vget_low_s8( s ) ); +} + +const uint32_t __attribute__((aligned(16))) _InA [4] = { 0xFF020001, 0xFF020001, 0xFF000101, 0xFF000101 } ; +const uint32_t __attribute__((aligned(16))) _InB [4] = { 0xFF050002, 0xFF050002, 0xFF000303, 0xFF000203 } ; + +__attribute__((noinline)) v16i8 test_func(void) +{ + v16u8 A = vld1q_u8( (uint8_t*) _InA ); + v16u8 B = vld1q_u8( (uint8_t*) _InB ); + v8i16 r = vdupq_n_s16(2); + + v16u8 _0 = mergelo( A, B ); + v16u8 _1 = mergelo( B, A ); + + v16u8 _2 = mergelo( _0, _1 ); + v16u8 _3 = mergelo( _1, _0 ); + + v8i16 _4 = vsubq_s16( unpacklo( _2 ), r ); + v8i16 _5 = vsubq_s16( unpacklo( _3 ), r ); + + v8i16 ret = vaddq_s16( _4, _5 ); + + return ( ret ); +} + +int main (int argc, char **argv) +{ + v16u8 val = test_func(); + + if (vgetq_lane_u32( val, 0 ) != 0xffffffff + || vgetq_lane_u32( val, 1 ) != 0xffffffff + || vgetq_lane_u32( val, 2 ) != 0xfffcfffc + || vgetq_lane_u32( val, 3 ) != 0xfffcfffc) + abort (); + exit (0); +} |