| Commit message (Collapse) | Author | Age | Files | Lines |
| |
|
|
| |
llvm-svn: 27374
|
| |
|
|
| |
llvm-svn: 27367
|
| |
|
|
| |
llvm-svn: 27364
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int %AreSecondAndThirdElementsBothNegative(<4 x float>* %in) {
entry:
%tmp1 = load <4 x float>* %in ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.ppc.altivec.vcmpgefp.p( int 1, <4 x float> < float 0x7FF8000000000000, float 0.000000e+00, float 0.000000e+00, float 0x7FF8000000000000 >, <4 x float> %tmp1 ) ; <int> [#uses=1]
%tmp = seteq int %tmp, 0 ; <bool> [#uses=1]
%tmp3 = cast bool %tmp to int ; <int> [#uses=1]
ret int %tmp3
}
into this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
mtspr 256, r2
blr
instead of this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
xori r3, r3, 1
cntlzw r3, r3
srwi r3, r3, 5
mtspr 256, r2
blr
llvm-svn: 27356
|
| |
|
|
|
|
| |
work with PowerPC.
llvm-svn: 27349
|
| |
|
|
|
|
|
| |
into elements and operate on each piece. This allows generic vector integer
multiplies to work on PPC, though the generated code is horrible.
llvm-svn: 27347
|
| |
|
|
|
|
|
| |
have to serialize against each other. This allows us to schedule lvx's
across each other, for example.
llvm-svn: 27346
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
"vector unsigned char mergeLowHigh = (vector unsigned char)
( 8, 9, 10, 11, 16, 17, 18, 19, 12, 13, 14, 15, 20, 21, 22, 23 );
vector unsigned char mergeHighLow = vec_xor( mergeLowHigh, vec_splat_u8(8));"
aka:
void %test2(<16 x sbyte>* %P) {
store <16 x sbyte> cast (<4 x int> xor (<4 x int> cast (<16 x ubyte> < ubyte 8, ubyte 9, ubyte 10, ubyte 11, ubyte 16, ubyte 17, ubyte 18, ubyte 19, ubyte 12, ubyte 13, ubyte 14, ubyte 15, ubyte 20, ubyte 21, ubyte 22, ubyte 23 > to <4 x int>), <4 x int> cast (<16 x sbyte> < sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8 > to <4 x int>)) to <16 x sbyte>), <16 x sbyte> * %P
ret void
}
into this:
_test2:
mfspr r2, 256
oris r4, r2, 32768
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
lvx v0, r5, r4
stvx v0, 0, r3
mtspr 256, r2
blr
instead of this:
_test2:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
vspltisb v0, 8
lvx v1, r5, r4
vxor v0, v1, v0
stvx v0, 0, r3
mtspr 256, r2
blr
... which occurs here:
http://developer.apple.com/hardware/ve/calcspeed.html
llvm-svn: 27343
|
| |
|
|
| |
llvm-svn: 27342
|
| |
|
|
|
|
| |
vbuild_vector nodes.
llvm-svn: 27341
|
| |
|
|
| |
llvm-svn: 27340
|
| |
|
|
| |
llvm-svn: 27339
|
| |
|
|
|
|
| |
multiple register classes. This fixes PowerPC/2006-04-01-FloatDoubleExtend.ll
llvm-svn: 27334
|
| |
|
|
|
|
| |
CodeGen/Generic/vector-identity-shuffle.ll
llvm-svn: 27317
|
| |
|
|
|
|
|
|
| |
UnitTests/Vector/sumarray-dbl on PPC.
Now all UnitTests/Vector/* tests pass on PPC.
llvm-svn: 27299
|
| |
|
|
|
|
| |
This fixes UnitTests/Vector/simple.c with altivec.
llvm-svn: 27298
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
to:
test_extract_elt:
alloc r3 = ar.pfs,0,1,0,0
adds r8 = 12, r32
;;
ldfs f8 = [r8]
mov ar.pfs = r3
br.ret.sptk.many rp
instead of:
test_extract_elt:
alloc r3 = ar.pfs,0,1,0,0
adds r8 = 28, r32
adds r9 = 24, r32
adds r10 = 20, r32
adds r11 = 16, r32
;;
ldfs f6 = [r8]
;;
ldfs f6 = [r9]
adds r8 = 12, r32
adds r9 = 8, r32
adds r14 = 4, r32
;;
ldfs f6 = [r10]
;;
ldfs f6 = [r11]
ldfs f8 = [r8]
;;
ldfs f6 = [r9]
;;
ldfs f6 = [r14]
;;
ldfs f6 = [r32]
mov ar.pfs = r3
br.ret.sptk.many rp
llvm-svn: 27297
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
vector.ll:test_extract_elt2 into:
_test_extract_elt2:
lfd f1, 32(r3)
blr
instead of:
_test_extract_elt2:
lfd f0, 56(r3)
lfd f0, 48(r3)
lfd f0, 40(r3)
lfd f1, 32(r3)
lfd f0, 24(r3)
lfd f0, 16(r3)
lfd f0, 8(r3)
lfd f0, 0(r3)
blr
llvm-svn: 27296
|
| |
|
|
|
|
| |
Generic/vector.ll:test_extract_elt on non-sse X86 systems.
llvm-svn: 27294
|
| |
|
|
|
|
|
| |
needs to be promoted or expanded. Relegalize the scalar store once created.
This fixes CodeGen/Generic/vector.ll:test1 on non-SSE x86 targets.
llvm-svn: 27293
|
| |
|
|
|
|
| |
decimated vectors. This fixes UnitTests/Vector/sumarray-dbl.c
llvm-svn: 27280
|
| |
|
|
|
|
|
| |
handling cases where the vector elements need promotion, expansion, and when
the vector type itself needs to be decimated.
llvm-svn: 27278
|
| |
|
|
| |
llvm-svn: 27274
|
| |
|
|
|
|
|
|
| |
Handle constantpacked vectors with constantexpr elements.
This fixes CodeGen/Generic/vector-constantexpr.ll
llvm-svn: 27241
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
sure to build it as SHUFFLE(X, undef, mask), not SHUFFLE(X, X, mask).
The later is not canonical form, and prevents the PPC splat pattern from
matching. For a particular splat, we go from generating this:
li r10, lo16(LCPI1_0)
lis r11, ha16(LCPI1_0)
lvx v3, r11, r10
vperm v3, v2, v2, v3
to generating:
vspltw v3, v2, 3
llvm-svn: 27236
|
| |
|
|
| |
llvm-svn: 27235
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
llvm-svn: 27233
|
| |
|
|
| |
llvm-svn: 27232
|
| |
|
|
| |
llvm-svn: 27231
|
| |
|
|
| |
llvm-svn: 27229
|
| |
|
|
| |
llvm-svn: 27228
|
| |
|
|
| |
llvm-svn: 27226
|
| |
|
|
| |
llvm-svn: 27224
|
| |
|
|
| |
llvm-svn: 27203
|
| |
|
|
|
|
| |
value. Split them into separate enums.
llvm-svn: 27201
|
| |
|
|
| |
llvm-svn: 27192
|
| |
|
|
| |
llvm-svn: 27182
|
| |
|
|
| |
llvm-svn: 27181
|
| |
|
|
| |
llvm-svn: 27173
|
| |
|
|
| |
llvm-svn: 27171
|
| |
|
|
| |
llvm-svn: 27169
|
| |
|
|
|
|
| |
floating point cases.
llvm-svn: 27165
|
| |
|
|
| |
llvm-svn: 27164
|
| |
|
|
| |
llvm-svn: 27158
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
| |
manner that the LowerSwitch LLVM to LLVM pass does: emitting a binary
search tree of basic blocks. The new approach has several advantages:
it is faster, it generates significantly smaller code in many cases, and
it paves the way for implementing dense switch tables as a jump table by
handling switches directly in the instruction selector.
This functionality is currently only enabled on x86, but should be safe for
every target. In anticipation of making it the default, the cfg is now
properly updated in the x86, ppc, and sparc select lowering code.
llvm-svn: 27156
|
| |
|
|
| |
llvm-svn: 27155
|
| |
|
|
| |
llvm-svn: 27154
|
| |
|
|
| |
llvm-svn: 27147
|
| |
|
|
| |
llvm-svn: 27146
|
| |
|
|
| |
llvm-svn: 27133
|