diff options
| author | Nadav Rotem <nadav.rotem@intel.com> | 2012-02-03 13:18:25 +0000 |
|---|---|---|
| committer | Nadav Rotem <nadav.rotem@intel.com> | 2012-02-03 13:18:25 +0000 |
| commit | 5399f4d6bf42725e86427971307a3fc65cb03891 (patch) | |
| tree | 69a8f643a2f817c5c526b54562591cc98743caca | |
| parent | f9fef3d275dd5a4a729e16601a7e3e8207f1a171 (diff) | |
| download | bcm5719-llvm-5399f4d6bf42725e86427971307a3fc65cb03891.tar.gz bcm5719-llvm-5399f4d6bf42725e86427971307a3fc65cb03891.zip | |
The type-legalizer often scalarizes code. One of the common patterns is extract-and-truncate.
In this patch we optimize this pattern and convert the sequence into extract op of a narrow type.
This allows the BUILD_VECTOR dag optimizations to construct efficient shuffle operations in many cases.
llvm-svn: 149692
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 34 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/2011-12-8-bitcastintprom.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-shuffle.ll | 8 |
3 files changed, 43 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 979f79478b0..48b9a3939e7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4957,6 +4957,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + bool isLE = TLI.isLittleEndian(); // noop truncate if (N0.getValueType() == N->getValueType(0)) @@ -4984,6 +4985,39 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0.getOperand(0); } + // Fold Extract-and-trunc into a narrow extract: + // trunc(extract(x)) -> extract(bitcast(x)) + // We only run this optimization after type legalization (which often + // creates this pattern) and before operation legalization after which + // we need to be more careful about the vector instructions that we generate. + if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + LegalTypes && !LegalOperations && N0->hasOneUse()) { + + EVT VecTy = N0.getOperand(0).getValueType(); + EVT ExTy = N0.getValueType(); + EVT TrTy = N->getValueType(0); + + unsigned NumElem = VecTy.getVectorNumElements(); + unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); + assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); + + SDValue EltNo = N0->getOperand(1); + if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + + int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); + + SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + NVT, N0.getOperand(0)); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + N->getDebugLoc(), TrTy, V, + DAG.getConstant(Index, MVT::i32)); + } + } + // See if we can simplify the input to this truncate through knowledge that // only the low bits are being used. // For example "trunc (or (shl x, 8), y)" // -> trunc y diff --git a/llvm/test/CodeGen/X86/2011-12-8-bitcastintprom.ll b/llvm/test/CodeGen/X86/2011-12-8-bitcastintprom.ll index ceee8e6041e..e2b3ebcf76e 100644 --- a/llvm/test/CodeGen/X86/2011-12-8-bitcastintprom.ll +++ b/llvm/test/CodeGen/X86/2011-12-8-bitcastintprom.ll @@ -2,8 +2,8 @@ ; Make sure that the conversion between v4i8 to v2i16 is not a simple bitcast. ; CHECK: prom_bug -; CHECK: movd ; CHECK: shufb +; CHECK: movd ; CHECK: movw ; CHECK: ret define void @prom_bug(<4 x i8> %t, i16* %p) { diff --git a/llvm/test/CodeGen/X86/avx-shuffle.ll b/llvm/test/CodeGen/X86/avx-shuffle.ll index 08b668c983b..5a5c35333f2 100644 --- a/llvm/test/CodeGen/X86/avx-shuffle.ll +++ b/llvm/test/CodeGen/X86/avx-shuffle.ll @@ -109,3 +109,11 @@ define <4 x float> @test12(<4 x float>* %a) nounwind { %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ret <4 x float> %tmp1 } + +;CHECK: test13 +;CHECK: shufd +;CHECK: ret +define <4 x i32> @test13(<2 x i32>%x) nounwind readnone { + %x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> + ret <4 x i32>%x1 +} |

