diff options
| author | Kevin Qin <Kevin.Qin@arm.com> | 2014-01-21 01:48:52 +0000 |
|---|---|---|
| committer | Kevin Qin <Kevin.Qin@arm.com> | 2014-01-21 01:48:52 +0000 |
| commit | 6d379abd8f87a36c8638a894d59a4b72fd892946 (patch) | |
| tree | 3fe82e59b1af9311810412a822f3f7c581e21747 | |
| parent | d950ae74a36190889a871fde7bfdf56bfd6dc7c8 (diff) | |
| download | bcm5719-llvm-6d379abd8f87a36c8638a894d59a4b72fd892946.tar.gz bcm5719-llvm-6d379abd8f87a36c8638a894d59a4b72fd892946.zip | |
[AArch64 NEON] Fix a bug caused by undef lane when generating VEXT.
It was commited as r199628 but reverted in r199628 as causing
regression test failed. It's because of old vervsion of patch
I used to commit. Sorry for mistake.
llvm-svn: 199704
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 36 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/neon-extract.ll | 32 |
2 files changed, 53 insertions, 15 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e98fbe1d3a3..27277c47f39 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4654,22 +4654,28 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // it into NEON_VEXTRACT. if (V1EltNum == Length) { // Check if the shuffle mask is sequential. - bool IsSequential = true; - int CurMask = ShuffleMask[0]; - for (int I = 0; I < Length; ++I) { - if (ShuffleMask[I] != CurMask) { - IsSequential = false; - break; - } - CurMask++; + int SkipUndef = 0; + while (ShuffleMask[SkipUndef] == -1) { + SkipUndef++; } - if (IsSequential) { - assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect"); - unsigned VecSize = EltSize * V1EltNum; - unsigned Index = (EltSize/8) * ShuffleMask[0]; - if (VecSize == 64 || VecSize == 128) - return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2, - DAG.getConstant(Index, MVT::i64)); + int CurMask = ShuffleMask[SkipUndef]; + if (CurMask >= SkipUndef) { + bool IsSequential = true; + for (int I = SkipUndef; I < Length; ++I) { + if (ShuffleMask[I] != -1 && ShuffleMask[I] != CurMask) { + IsSequential = false; + break; + } + CurMask++; + } + if (IsSequential) { + assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect"); + unsigned VecSize = EltSize * V1EltNum; + unsigned Index = (EltSize / 8) * (ShuffleMask[SkipUndef] - SkipUndef); + if (VecSize == 64 || VecSize == 128) + return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2, + DAG.getConstant(Index, MVT::i64)); + } } } diff --git a/llvm/test/CodeGen/AArch64/neon-extract.ll b/llvm/test/CodeGen/AArch64/neon-extract.ll index 5c52cd30676..cddc2267d77 100644 --- a/llvm/test/CodeGen/AArch64/neon-extract.ll +++ b/llvm/test/CodeGen/AArch64/neon-extract.ll @@ -188,3 +188,35 @@ entry: %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10> ret <8 x i16> %vext } + +define <8 x i8> @test_undef_vext_s8(<8 x i8> %a) { +; CHECK: test_undef_vext_s8: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2 +entry: + %vext = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 10, i32 10, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9> + ret <8 x i8> %vext +} + +define <16 x i8> @test_undef_vextq_s8(<16 x i8> %a) { +; CHECK: test_undef_vextq_s8: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +entry: + %vext = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 20, i32 20, i32 20, i32 20, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 20, i32 20, i32 20, i32 20, i32 20> + ret <16 x i8> %vext +} + +define <4 x i16> @test_undef_vext_s16(<4 x i16> %a) { +; CHECK: test_undef_vext_s16: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4 +entry: + %vext = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5> + ret <4 x i16> %vext +} + +define <8 x i16> @test_undef_vextq_s16(<8 x i16> %a) { +; CHECK: test_undef_vextq_s16: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +entry: + %vext = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 10, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10> + ret <8 x i16> %vext +} |

