diff options
author | Wolfgang Pieb <Wolfgang.Pieb@sony.com> | 2017-06-26 23:05:51 +0000 |
---|---|---|
committer | Wolfgang Pieb <Wolfgang.Pieb@sony.com> | 2017-06-26 23:05:51 +0000 |
commit | 9f658582350570ce458bd909d3022546227d48c4 (patch) | |
tree | d9c8d9d9055d34ecbbef1e1bc44ec70fff5a9fbf | |
parent | 8b7effb344d6b4063952ceb92810de20c4410991 (diff) | |
download | bcm5719-llvm-9f658582350570ce458bd909d3022546227d48c4.tar.gz bcm5719-llvm-9f658582350570ce458bd909d3022546227d48c4.zip |
DAGCombine: Make sure we only eliminate trunc/extend when the scales of truncation and extension match.
This fixes PR33368.
Reviewer: rksimon
Differential Revision: https://reviews.llvm.org/D34069
llvm-svn: 306345
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-truncate-combine.ll | 35 |
2 files changed, 44 insertions, 5 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d02dcb6f443..d1a5a98607c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15013,6 +15013,11 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, unsigned NumElts = VT.getVectorNumElements(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits(); + unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits(); + + if (ExtDstSizeInBits % ExtSrcSizeInBits != 0) + return SDValue(); + unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits; // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1> // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1> @@ -15034,11 +15039,10 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, if (EltSizeInBits != ExtSrcSizeInBits) return SDValue(); - // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for - // power-of-2 truncations as they are the most likely. - for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) - if (isTruncate(Scale)) - return DAG.getBitcast(VT, N00); + // We can remove *extend_vector_inreg only if the truncation happens at + // the same scale as the extension. + if (isTruncate(ExtScale)) + return DAG.getBitcast(VT, N00); return SDValue(); } diff --git a/llvm/test/CodeGen/X86/vector-truncate-combine.ll b/llvm/test/CodeGen/X86/vector-truncate-combine.ll new file mode 100644 index 00000000000..1a6dac8fa6e --- /dev/null +++ b/llvm/test/CodeGen/X86/vector-truncate-combine.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=x86_64-- -O2 -start-after=stack-protector -stop-before=loops %s -o - | FileCheck %s + +; This test verifies the fix for PR33368. +; +; The expected outcome of the operation is to store bytes 0 and 2 of the incoming +; parameter into c2 (a 2 x i8 vector). DAGCombine converts shuffles into a +; sequence of extend and subsequent truncate operations. The bug was that an extension +; by 4 followed by a truncation by 8 was completely eliminated. + +; The test checks for the correct sequence of operations that results from the +; preservation of the extend/truncate operations mentioned above (2 extend and +; 3 truncate instructions). +; +; NOTE: This operation could be collapsed in to a single truncate. Once that is done +; this test will have to be adjusted. + +; CHECK: PUNPCKLBWrr +; CHECK: PUNPCKLWDrr +; CHECK: PACKUSWBrr +; CHECK: PACKUSWBrr +; CHECK: PACKUSWBrr + +define void @test(double %vec.coerce) local_unnamed_addr { +entry: + %c2 = alloca <2 x i8>, align 2 + %0 = bitcast double %vec.coerce to <8 x i8> + %1 = shufflevector <8 x i8> %0, <8 x i8> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 0> + %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <2 x i32> <i32 3, i32 0> + store volatile <2 x i8> %2, <2 x i8>* %c2, align 2 + br label %if.end + +if.end: + %3 = bitcast <2 x i8> %2 to i16 + ret void +} |