| Commit message (Collapse) | Author | Age | Files | Lines |
| |
|
|
| |
llvm-svn: 27974
|
| |
|
|
|
|
|
| |
the jump table's range check block. This re-enables 100% dense jump tables
by default on PPC & x86
llvm-svn: 27952
|
| |
|
|
|
|
| |
updating the machine CFG.
llvm-svn: 27949
|
| |
|
|
|
|
|
|
| |
x86 and ppc for 100% dense switch statements when relocations are non-PIC.
This support will be extended and enhanced in the coming days to support
PIC, and less dense forms of jump tables.
llvm-svn: 27947
|
| |
|
|
|
|
| |
miscompares). Switch RISC targets to use the list-td scheduler, which isn't.
llvm-svn: 27933
|
| |
|
|
| |
llvm-svn: 27930
|
| |
|
|
|
|
|
|
|
| |
llvm-gcc4 boostrap. Whenever a node is deleted by the dag combiner, it
*must* be returned by the visit function, or the dag combiner will not
know that the node has been processed (and will, e.g., send it to the
target dag combine xforms).
llvm-svn: 27922
|
| |
|
|
|
|
|
|
| |
DAG combiner can turn a VAND V, <-1, 0, -1, -1>, i.e. vector clear elements,
into a vector shuffle with a zero vector. It only does so when TLI tells it
the xform is profitable.
llvm-svn: 27874
|
| |
|
|
| |
llvm-svn: 27863
|
| |
|
|
| |
llvm-svn: 27846
|
| |
|
|
|
|
| |
the code in GCC PR26546.
llvm-svn: 27780
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
and a shuffle. For this:
void %test2(<4 x float>* %F, float %f) {
%tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
%tmp3 = add <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
%tmp2 = insertelement <4 x float> %tmp3, float %f, uint 2 ; <<4 x float>> [#uses=2]
%tmp6 = add <4 x float> %tmp2, %tmp2 ; <<4 x float>> [#uses=1]
store <4 x float> %tmp6, <4 x float>* %F
ret void
}
we now get this on X86 (which will get better):
_test2:
movl 4(%esp), %eax
movaps (%eax), %xmm0
addps %xmm0, %xmm0
movaps %xmm0, %xmm1
shufps $3, %xmm1, %xmm1
movaps %xmm0, %xmm2
shufps $1, %xmm2, %xmm2
unpcklps %xmm1, %xmm2
movss 8(%esp), %xmm1
unpcklps %xmm1, %xmm0
unpcklps %xmm2, %xmm0
addps %xmm0, %xmm0
movaps %xmm0, (%eax)
ret
instead of:
_test2:
subl $28, %esp
movl 32(%esp), %eax
movaps (%eax), %xmm0
addps %xmm0, %xmm0
movaps %xmm0, (%esp)
movss 36(%esp), %xmm0
movss %xmm0, 8(%esp)
movaps (%esp), %xmm0
addps %xmm0, %xmm0
movaps %xmm0, (%eax)
addl $28, %esp
ret
llvm-svn: 27765
|
| |
|
|
|
|
| |
to write one pattern for vector stores instead of 4.
llvm-svn: 27730
|
| |
|
|
|
|
| |
buildvectors.
llvm-svn: 27723
|
| |
|
|
| |
llvm-svn: 27695
|
| |
|
|
| |
llvm-svn: 27632
|
| |
|
|
| |
llvm-svn: 27606
|
| |
|
|
|
|
| |
shouldcustom legalize it and remove their XXXTargetLowering::LowerArguments overload
llvm-svn: 27604
|
| |
|
|
|
|
| |
getNode do it. This fixes CodeGen/Generic/2006-04-11-vecload.ll
llvm-svn: 27602
|
| |
|
|
| |
llvm-svn: 27586
|
| |
|
|
| |
llvm-svn: 27580
|
| |
|
|
| |
llvm-svn: 27578
|
| |
|
|
| |
llvm-svn: 27559
|
| |
|
|
| |
llvm-svn: 27542
|
| |
|
|
|
|
| |
to match again :)
llvm-svn: 27533
|
| |
|
|
| |
llvm-svn: 27529
|
| |
|
|
|
|
| |
as its input.
llvm-svn: 27528
|
| |
|
|
|
|
|
|
|
| |
store vector to $esp
store element to $esp + sizeof(VT) * index
load vector from $esp
The bug is VT is the type of the vector element, not the type of the vector!
llvm-svn: 27517
|
| |
|
|
| |
llvm-svn: 27514
|
| |
|
|
|
|
| |
2. A shuffle mask element can also be an undef.
llvm-svn: 27472
|
| |
|
|
|
|
| |
CodeGen/X86/2006-04-04-CrossBlockCrash.ll
llvm-svn: 27436
|
| |
|
|
|
|
| |
or custom lowering fails.
llvm-svn: 27432
|
| |
|
|
|
|
| |
legal.
llvm-svn: 27402
|
| |
|
|
|
|
|
|
|
|
| |
promoted/expanded (e.g. SCALAR_TO_VECTOR from i8/i16 on PPC).
* Add support for targets to request that VECTOR_SHUFFLE nodes be promoted
to a canonical type, for example, we only want v16i8 shuffles on PPC.
* Move isShuffleLegal out of TLI into Legalize.
* Teach isShuffleLegal to allow shuffles that need to be promoted.
llvm-svn: 27399
|
| |
|
|
| |
llvm-svn: 27391
|
| |
|
|
| |
llvm-svn: 27375
|
| |
|
|
| |
llvm-svn: 27374
|
| |
|
|
| |
llvm-svn: 27367
|
| |
|
|
| |
llvm-svn: 27364
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
int %AreSecondAndThirdElementsBothNegative(<4 x float>* %in) {
entry:
%tmp1 = load <4 x float>* %in ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.ppc.altivec.vcmpgefp.p( int 1, <4 x float> < float 0x7FF8000000000000, float 0.000000e+00, float 0.000000e+00, float 0x7FF8000000000000 >, <4 x float> %tmp1 ) ; <int> [#uses=1]
%tmp = seteq int %tmp, 0 ; <bool> [#uses=1]
%tmp3 = cast bool %tmp to int ; <int> [#uses=1]
ret int %tmp3
}
into this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
mtspr 256, r2
blr
instead of this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
xori r3, r3, 1
cntlzw r3, r3
srwi r3, r3, 5
mtspr 256, r2
blr
llvm-svn: 27356
|
| |
|
|
|
|
| |
work with PowerPC.
llvm-svn: 27349
|
| |
|
|
|
|
|
| |
into elements and operate on each piece. This allows generic vector integer
multiplies to work on PPC, though the generated code is horrible.
llvm-svn: 27347
|
| |
|
|
|
|
|
| |
have to serialize against each other. This allows us to schedule lvx's
across each other, for example.
llvm-svn: 27346
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
"vector unsigned char mergeLowHigh = (vector unsigned char)
( 8, 9, 10, 11, 16, 17, 18, 19, 12, 13, 14, 15, 20, 21, 22, 23 );
vector unsigned char mergeHighLow = vec_xor( mergeLowHigh, vec_splat_u8(8));"
aka:
void %test2(<16 x sbyte>* %P) {
store <16 x sbyte> cast (<4 x int> xor (<4 x int> cast (<16 x ubyte> < ubyte 8, ubyte 9, ubyte 10, ubyte 11, ubyte 16, ubyte 17, ubyte 18, ubyte 19, ubyte 12, ubyte 13, ubyte 14, ubyte 15, ubyte 20, ubyte 21, ubyte 22, ubyte 23 > to <4 x int>), <4 x int> cast (<16 x sbyte> < sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8 > to <4 x int>)) to <16 x sbyte>), <16 x sbyte> * %P
ret void
}
into this:
_test2:
mfspr r2, 256
oris r4, r2, 32768
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
lvx v0, r5, r4
stvx v0, 0, r3
mtspr 256, r2
blr
instead of this:
_test2:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
vspltisb v0, 8
lvx v1, r5, r4
vxor v0, v1, v0
stvx v0, 0, r3
mtspr 256, r2
blr
... which occurs here:
http://developer.apple.com/hardware/ve/calcspeed.html
llvm-svn: 27343
|
| |
|
|
| |
llvm-svn: 27342
|
| |
|
|
|
|
| |
vbuild_vector nodes.
llvm-svn: 27341
|
| |
|
|
| |
llvm-svn: 27340
|
| |
|
|
| |
llvm-svn: 27339
|
| |
|
|
|
|
| |
multiple register classes. This fixes PowerPC/2006-04-01-FloatDoubleExtend.ll
llvm-svn: 27334
|
| |
|
|
|
|
| |
CodeGen/Generic/vector-identity-shuffle.ll
llvm-svn: 27317
|