diff options
| author | Chris Lattner <sabre@nondot.org> | 2006-04-17 19:21:01 +0000 |
|---|---|---|
| committer | Chris Lattner <sabre@nondot.org> | 2006-04-17 19:21:01 +0000 |
| commit | 326870b40b7bdff2ff0046000b29737b3ca69f5b (patch) | |
| tree | 9fdfb98faf28306563b24de1acdc3bf4a68bc92e /llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | |
| parent | e54133cfba3c6de494b92f61e738104c59a1eac2 (diff) | |
| download | bcm5719-llvm-326870b40b7bdff2ff0046000b29737b3ca69f5b.tar.gz bcm5719-llvm-326870b40b7bdff2ff0046000b29737b3ca69f5b.zip | |
Codegen insertelement with constant insertion points as scalar_to_vector
and a shuffle. For this:
void %test2(<4 x float>* %F, float %f) {
%tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
%tmp3 = add <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
%tmp2 = insertelement <4 x float> %tmp3, float %f, uint 2 ; <<4 x float>> [#uses=2]
%tmp6 = add <4 x float> %tmp2, %tmp2 ; <<4 x float>> [#uses=1]
store <4 x float> %tmp6, <4 x float>* %F
ret void
}
we now get this on X86 (which will get better):
_test2:
movl 4(%esp), %eax
movaps (%eax), %xmm0
addps %xmm0, %xmm0
movaps %xmm0, %xmm1
shufps $3, %xmm1, %xmm1
movaps %xmm0, %xmm2
shufps $1, %xmm2, %xmm2
unpcklps %xmm1, %xmm2
movss 8(%esp), %xmm1
unpcklps %xmm1, %xmm0
unpcklps %xmm2, %xmm0
addps %xmm0, %xmm0
movaps %xmm0, (%eax)
ret
instead of:
_test2:
subl $28, %esp
movl 32(%esp), %eax
movaps (%eax), %xmm0
addps %xmm0, %xmm0
movaps %xmm0, (%esp)
movss 36(%esp), %xmm0
movss %xmm0, 8(%esp)
movaps (%esp), %xmm0
addps %xmm0, %xmm0
movaps %xmm0, (%eax)
addl $28, %esp
ret
llvm-svn: 27765
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7cf71418fa7..30044471d44 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -867,6 +867,34 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) { } // FALLTHROUGH case TargetLowering::Expand: { + // If the insert index is a constant, codegen this as a scalar_to_vector, + // then a shuffle that inserts it into the right position in the vector. + if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Tmp3)) { + SDOperand ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, + Tmp1.getValueType(), Tmp2); + + unsigned NumElts = MVT::getVectorNumElements(Tmp1.getValueType()); + MVT::ValueType ShufMaskVT = MVT::getIntVectorWithNumElements(NumElts); + MVT::ValueType ShufMaskEltVT = MVT::getVectorBaseType(ShufMaskVT); + + // We generate a shuffle of InVec and ScVec, so the shuffle mask should + // be 0,1,2,3,4,5... with the appropriate element replaced with elt 0 of + // the RHS. + std::vector<SDOperand> ShufOps; + for (unsigned i = 0; i != NumElts; ++i) { + if (i != InsertPos->getValue()) + ShufOps.push_back(DAG.getConstant(i, ShufMaskEltVT)); + else + ShufOps.push_back(DAG.getConstant(NumElts, ShufMaskEltVT)); + } + SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, ShufMaskVT,ShufOps); + + Result = DAG.getNode(ISD::VECTOR_SHUFFLE, Tmp1.getValueType(), + Tmp1, ScVec, ShufMask); + Result = LegalizeOp(Result); + break; + } + // If the target doesn't support this, we have to spill the input vector // to a temporary stack slot, update the element, then reload it. This is // badness. We could also load the value into a vector register (either |

