| Commit message (Collapse) | Author | Age | Files | Lines |
| ... | |
| |
|
|
| |
llvm-svn: 27300
|
| |
|
|
|
|
|
|
| |
UnitTests/Vector/sumarray-dbl on PPC.
Now all UnitTests/Vector/* tests pass on PPC.
llvm-svn: 27299
|
| |
|
|
|
|
| |
This fixes UnitTests/Vector/simple.c with altivec.
llvm-svn: 27298
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
to:
test_extract_elt:
alloc r3 = ar.pfs,0,1,0,0
adds r8 = 12, r32
;;
ldfs f8 = [r8]
mov ar.pfs = r3
br.ret.sptk.many rp
instead of:
test_extract_elt:
alloc r3 = ar.pfs,0,1,0,0
adds r8 = 28, r32
adds r9 = 24, r32
adds r10 = 20, r32
adds r11 = 16, r32
;;
ldfs f6 = [r8]
;;
ldfs f6 = [r9]
adds r8 = 12, r32
adds r9 = 8, r32
adds r14 = 4, r32
;;
ldfs f6 = [r10]
;;
ldfs f6 = [r11]
ldfs f8 = [r8]
;;
ldfs f6 = [r9]
;;
ldfs f6 = [r14]
;;
ldfs f6 = [r32]
mov ar.pfs = r3
br.ret.sptk.many rp
llvm-svn: 27297
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
vector.ll:test_extract_elt2 into:
_test_extract_elt2:
lfd f1, 32(r3)
blr
instead of:
_test_extract_elt2:
lfd f0, 56(r3)
lfd f0, 48(r3)
lfd f0, 40(r3)
lfd f1, 32(r3)
lfd f0, 24(r3)
lfd f0, 16(r3)
lfd f0, 8(r3)
lfd f0, 0(r3)
blr
llvm-svn: 27296
|
| |
|
|
|
|
| |
Generic/vector.ll:test_extract_elt on non-sse X86 systems.
llvm-svn: 27294
|
| |
|
|
|
|
|
| |
needs to be promoted or expanded. Relegalize the scalar store once created.
This fixes CodeGen/Generic/vector.ll:test1 on non-SSE x86 targets.
llvm-svn: 27293
|
| |
|
|
| |
llvm-svn: 27292
|
| |
|
|
| |
llvm-svn: 27291
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
identical instructions into a single instruction. For example, for:
void test(vector float *x, vector float *y, int *P) {
int v = vec_any_out(*x, *y);
*x = (vector float)vec_cmpb(*x, *y);
*P = v;
}
we now generate:
_test:
mfspr r2, 256
oris r6, r2, 49152
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v0, v1, v0
mfcr r4, 2
stvx v0, 0, r3
rlwinm r3, r4, 27, 31, 31
xori r3, r3, 1
stw r3, 0(r5)
mtspr 256, r2
blr
instead of:
_test:
mfspr r2, 256
oris r6, r2, 57344
mtspr 256, r6
lvx v0, 0, r4
lvx v1, 0, r3
vcmpbfp. v2, v1, v0
mfcr r4, 2
*** vcmpbfp v0, v1, v0
rlwinm r4, r4, 27, 31, 31
stvx v0, 0, r3
xori r3, r4, 1
stw r3, 0(r5)
mtspr 256, r2
blr
Testcase here: CodeGen/PowerPC/vcmp-fold.ll
llvm-svn: 27290
|
| |
|
|
| |
llvm-svn: 27288
|
| |
|
|
| |
llvm-svn: 27287
|
| |
|
|
|
|
| |
predicates to VCMPo nodes.
llvm-svn: 27285
|
| |
|
|
| |
llvm-svn: 27284
|
| |
|
|
| |
llvm-svn: 27282
|
| |
|
|
|
|
| |
decimated vectors. This fixes UnitTests/Vector/sumarray-dbl.c
llvm-svn: 27280
|
| |
|
|
|
|
|
| |
handling cases where the vector elements need promotion, expansion, and when
the vector type itself needs to be decimated.
llvm-svn: 27278
|
| |
|
|
| |
llvm-svn: 27277
|
| |
|
|
| |
llvm-svn: 27276
|
| |
|
|
| |
llvm-svn: 27275
|
| |
|
|
| |
llvm-svn: 27274
|
| |
|
|
|
|
| |
unpromoted element type.
llvm-svn: 27273
|
| |
|
|
| |
llvm-svn: 27272
|
| |
|
|
| |
llvm-svn: 27271
|
| |
|
|
| |
llvm-svn: 27270
|
| |
|
|
| |
llvm-svn: 27268
|
| |
|
|
|
|
| |
directly correspond to intrinsics.
llvm-svn: 27266
|
| |
|
|
| |
llvm-svn: 27265
|
| |
|
|
| |
llvm-svn: 27261
|
| |
|
|
|
|
|
| |
Use pshufd, pshuhw, and pshulw to shuffle v4f32 if shufps doesn't match.
Use shufps to shuffle v4f32 if pshufd, pshuhw, and pshulw don't match.
llvm-svn: 27259
|
| |
|
|
| |
llvm-svn: 27257
|
| |
|
|
| |
llvm-svn: 27256
|
| |
|
|
| |
llvm-svn: 27255
|
| |
|
|
|
|
|
| |
For example, packsswb actually creates a v16i8 from a pair of v8i16. But since
the intrinsic specification forces the output type to match the operands.
llvm-svn: 27254
|
| |
|
|
|
|
|
| |
- Added SSE2 128-bit integer pack with signed saturation ops.
- Added pshufhw and pshuflw ops.
llvm-svn: 27252
|
| |
|
|
|
|
| |
vector_shuffle undef.
llvm-svn: 27250
|
| |
|
|
|
|
| |
integer vector logical operations would match andp{s|d} instead of pand.
llvm-svn: 27248
|
| |
|
|
|
|
|
| |
- Whenever possible use ops of the right packed types for vector shuffles /
splats.
llvm-svn: 27246
|
| |
|
|
| |
llvm-svn: 27245
|
| |
|
|
|
|
| |
- Other shuffle related fixes.
llvm-svn: 27244
|
| |
|
|
| |
llvm-svn: 27243
|
| |
|
|
|
|
|
|
| |
Handle constantpacked vectors with constantexpr elements.
This fixes CodeGen/Generic/vector-constantexpr.ll
llvm-svn: 27241
|
| |
|
|
|
|
|
| |
The source operands type are v4sf with upper bits passes through.
Added matching code for these.
llvm-svn: 27240
|
| |
|
|
| |
llvm-svn: 27239
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
sure to build it as SHUFFLE(X, undef, mask), not SHUFFLE(X, X, mask).
The later is not canonical form, and prevents the PPC splat pattern from
matching. For a particular splat, we go from generating this:
li r10, lo16(LCPI1_0)
lis r11, ha16(LCPI1_0)
lvx v3, r11, r10
vperm v3, v2, v2, v3
to generating:
vspltw v3, v2, 3
llvm-svn: 27236
|
| |
|
|
| |
llvm-svn: 27235
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
llvm-svn: 27233
|
| |
|
|
| |
llvm-svn: 27232
|
| |
|
|
| |
llvm-svn: 27231
|
| |
|
|
| |
llvm-svn: 27229
|