diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-06-08 01:09:31 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-06-08 01:09:31 +0000 |
| commit | 2ed43282814320e709518e03c77c5b0aef35616a (patch) | |
| tree | fd73f427d8aeff5a670bbc18a11d1087497346db | |
| parent | 6db1f5da4feb863995c394a91a4f635dd4d90587 (diff) | |
| download | bcm5719-llvm-2ed43282814320e709518e03c77c5b0aef35616a.tar.gz bcm5719-llvm-2ed43282814320e709518e03c77c5b0aef35616a.zip | |
[X86] Improve some shuffle decoding code to remove a conditional from a loop and reduce the number of temporary variables. NFCI
The NumControlBits variable was definitely sketchy. I think that only worked because the expected value was 1 or 2 and the number of lanes was 2 or 4. Had their been 8 lanes the number of bits should have been 3 not 4 as the previous code would have given.
llvm-svn: 334258
| -rw-r--r-- | llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | 20 |
1 files changed, 9 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 8ac1762a30c..fe567f4cece 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -152,13 +152,12 @@ void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, if (NumLanes == 0) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; - unsigned NewImm = Imm; + uint32_t SplatImm = (Imm & 0xff) * 0x01010101; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { - ShuffleMask.push_back(NewImm % NumLaneElts + l); - NewImm /= NumLaneElts; + ShuffleMask.push_back(SplatImm % NumLaneElts + l); + SplatImm /= NumLaneElts; } - if (NumLaneElts == 4) NewImm = Imm; // reload imm } } @@ -281,16 +280,15 @@ void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize, SmallVectorImpl<int> &ShuffleMask) { unsigned NumElementsInLane = 128 / ScalarSize; unsigned NumLanes = NumElts / NumElementsInLane; - unsigned ControlBitsMask = NumLanes - 1; - unsigned NumControlBits = NumLanes / 2; - for (unsigned l = 0; l != NumLanes; ++l) { - unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; + for (unsigned l = 0; l != NumElts; l += NumElementsInLane) { + unsigned Index = (Imm % NumLanes) * NumElementsInLane; + Imm /= NumLanes; // Discard the bits we just used. // We actually need the other source. - if (l >= NumLanes / 2) - LaneMask += NumLanes; + if (l >= (NumElts / 2)) + Index += NumElts; for (unsigned i = 0; i != NumElementsInLane; ++i) - ShuffleMask.push_back(LaneMask * NumElementsInLane + i); + ShuffleMask.push_back(Index + i); } } |

