diff options
| author | sayle <sayle@138bc75d-0d04-0410-961f-82ee72b054a4> | 2006-04-16 21:46:59 +0000 |
|---|---|---|
| committer | sayle <sayle@138bc75d-0d04-0410-961f-82ee72b054a4> | 2006-04-16 21:46:59 +0000 |
| commit | 8dfacec03f29db8ac452f3111e874bb882975a87 (patch) | |
| tree | b6b0e1ede375a1e186941f57d26fa43ea68e93cf | |
| parent | 3baa29e94f5db3160fdd13b21efd20fb0c4483d4 (diff) | |
| download | ppe42-gcc-8dfacec03f29db8ac452f3111e874bb882975a87.tar.gz ppe42-gcc-8dfacec03f29db8ac452f3111e874bb882975a87.zip | |
2006-04-15 Roger Sayle <roger@eyesopen.com>
Andrew Pinski <pinskia@gcc.gnu.org>
Dale Johannesen <dalej@apple.com>
PR target/24076
* config/i386/i386.c (ix86_expand_vector_init_duplicate): Add
special case code to implement V8HImode and V16QImode with SSE2.
* gcc.target/i386/vecinit-3.c: New testcase.
* gcc.target/i386/vecinit-4.c: Likewise.
* gcc.target/i386/sse-18.c: Likewise.
* gcc.target/i386/sse-19.c: Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@112990 138bc75d-0d04-0410-961f-82ee72b054a4
| -rw-r--r-- | gcc/ChangeLog | 8 | ||||
| -rw-r--r-- | gcc/config/i386/i386.c | 55 | ||||
| -rw-r--r-- | gcc/testsuite/ChangeLog | 10 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-18.c | 38 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-19.c | 29 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/i386/vecinit-3.c | 8 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/i386/vecinit-4.c | 7 |
7 files changed, 154 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 10b55b19dd1..9fbdca0a901 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2006-04-16 Roger Sayle <roger@eyesopen.com> + Andrew Pinski <pinskia@gcc.gnu.org> + Dale Johannesen <dalej@apple.com> + + PR target/24076 + * config/i386/i386.c (ix86_expand_vector_init_duplicate): Add + special case code to implement V8HImode and V16QImode with SSE2. + 2006-04-15 Roger Sayle <roger@eyesopen.com> * config/i386/i386.c (ix86_va_start): Ensure all integer constant diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 4d3a972e386..3fc19bf6737 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -17856,11 +17856,66 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, wvmode = V4HImode; goto widen; case V8HImode: + if (TARGET_SSE2) + { + rtx tmp1, tmp2; + /* Extend HImode to SImode using a paradoxical SUBREG. */ + tmp1 = gen_reg_rtx (SImode); + emit_move_insn (tmp1, gen_lowpart (SImode, val)); + /* Insert the SImode value as low element of V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + tmp1 = gen_rtx_VEC_MERGE (V4SImode, + gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), + CONST0_RTX (V4SImode), + const1_rtx); + emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); + /* Cast the V4SImode vector back to a V8HImode vector. */ + tmp1 = gen_reg_rtx (V8HImode); + emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); + /* Duplicate the low short through the whole low SImode word. */ + emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); + /* Cast the V8HImode vector back to a V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); + /* Replicate the low element of the V4SImode vector. */ + emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); + /* Cast the V2SImode back to V8HImode, and store in target. */ + emit_move_insn (target, gen_lowpart (V8HImode, tmp2)); + return true; + } smode = HImode; wsmode = SImode; wvmode = V4SImode; goto widen; case V16QImode: + if (TARGET_SSE2) + { + rtx tmp1, tmp2; + /* Extend QImode to SImode using a paradoxical SUBREG. */ + tmp1 = gen_reg_rtx (SImode); + emit_move_insn (tmp1, gen_lowpart (SImode, val)); + /* Insert the SImode value as low element of V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + tmp1 = gen_rtx_VEC_MERGE (V4SImode, + gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), + CONST0_RTX (V4SImode), + const1_rtx); + emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); + /* Cast the V4SImode vector back to a V16QImode vector. */ + tmp1 = gen_reg_rtx (V16QImode); + emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); + /* Duplicate the low byte through the whole low SImode word. */ + emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); + emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); + /* Cast the V16QImode vector back to a V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); + /* Replicate the low element of the V4SImode vector. */ + emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); + /* Cast the V2SImode back to V16QImode, and store in target. */ + emit_move_insn (target, gen_lowpart (V16QImode, tmp2)); + return true; + } smode = QImode; wsmode = HImode; wvmode = V8HImode; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index dc960cfa721..313a7fc7dce 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,8 +1,16 @@ +2006-04-16 Roger Sayle <roger@eyesopen.com> + Dale Johannesen <dalej@apple.com> + + PR target/24076 + * gcc.target/i386/vecinit-3.c: New testcase. + * gcc.target/i386/vecinit-4.c: Likewise. + * gcc.target/i386/sse-18.c: Likewise. + * gcc.target/i386/sse-19.c: Likewise. + 2006-04-16 Thomas Koenig <Thomas.Koenig@online.de> * gfortran.dg/allocate_zerosize_1.f90: New test. - 2006-04-16 Mark Mitchell <mark@codesourcery.com> PR c++/26365 diff --git a/gcc/testsuite/gcc.target/i386/sse-18.c b/gcc/testsuite/gcc.target/i386/sse-18.c new file mode 100644 index 00000000000..61026a110d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-18.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -msse2" } */ +extern void abort(void); +#include <emmintrin.h> +#include "../../gcc.dg/i386-cpuid.h" +__m128i foo (char) __attribute__((noinline)); +__m128i foo (char x) { + return _mm_set1_epi8(x); +} +__m128i bar (char) __attribute__((noinline)); +__m128i bar (char x) { + return _mm_set_epi8 (x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x); +} + +main() { + int i, j; + union u { __m128i v; char c[16]; }; + union u x, y; + unsigned long cpu_facilities; + + cpu_facilities = i386_cpuid (); + + if ((cpu_facilities & (bit_MMX | bit_SSE | bit_CMOV)) + != (bit_MMX | bit_SSE | bit_CMOV)) + /* If host has no vector support, pass. */ + return 0; + + for (i = -128; i <= 127; i++) + { + x.v = foo ((char)i); + y.v = bar ((char)i); + for (j=0; j<16; j++) + if (x.c[j] != y.c[j]) + abort(); + } + return 0; +} + diff --git a/gcc/testsuite/gcc.target/i386/sse-19.c b/gcc/testsuite/gcc.target/i386/sse-19.c new file mode 100644 index 00000000000..43c090bd4e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse-19.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -msse2" } */ +/* { dg-final { scan-assembler "punpcklbw" } } */ +extern void abort(); +#include <emmintrin.h> +__m128i foo (char) __attribute__((noinline)); +__m128i foo (char x) { + return _mm_set1_epi8(x); +} +__m128i bar (char) __attribute__((noinline)); +__m128i bar (char x) { + return _mm_set_epi8 (x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x); +} + +main() { + int i, j; + union u { __m128i v; char c[16]; }; + union u x, y; + for (i = -128; i <= 127; i++) + { + x.v = foo ((char)i); + y.v = bar ((char)i); + for (j=0; j<16; j++) + if (x.c[j] != y.c[j]) + abort(); + } + return 0; +} + diff --git a/gcc/testsuite/gcc.target/i386/vecinit-3.c b/gcc/testsuite/gcc.target/i386/vecinit-3.c new file mode 100644 index 00000000000..4cbf52133b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vecinit-3.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ +#define vector __attribute__((vector_size(16))) + +char a; +vector char f(void) { return (vector char){ a, a, a, a, a, a, a, a, + a, a, a, a, a, a, a, a }; } +/* { dg-final { scan-assembler-not "sall" } } */ diff --git a/gcc/testsuite/gcc.target/i386/vecinit-4.c b/gcc/testsuite/gcc.target/i386/vecinit-4.c new file mode 100644 index 00000000000..7a8c1d0f6c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vecinit-4.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ +#define vector __attribute__((vector_size(16))) + +short a; +vector short f(void) { return (vector short){ a, a, a, a, a, a, a, a }; } +/* { dg-final { scan-assembler-not "sall" } } */ |

