summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-06-08 01:09:31 +0000
committerCraig Topper <craig.topper@intel.com>2018-06-08 01:09:31 +0000
commit2ed43282814320e709518e03c77c5b0aef35616a (patch)
treefd73f427d8aeff5a670bbc18a11d1087497346db
parent6db1f5da4feb863995c394a91a4f635dd4d90587 (diff)
downloadbcm5719-llvm-2ed43282814320e709518e03c77c5b0aef35616a.tar.gz
bcm5719-llvm-2ed43282814320e709518e03c77c5b0aef35616a.zip
[X86] Improve some shuffle decoding code to remove a conditional from a loop and reduce the number of temporary variables. NFCI
The NumControlBits variable was definitely sketchy. I think that only worked because the expected value was 1 or 2 and the number of lanes was 2 or 4. Had their been 8 lanes the number of bits should have been 3 not 4 as the previous code would have given. llvm-svn: 334258
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp20
1 files changed, 9 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 8ac1762a30c..fe567f4cece 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -152,13 +152,12 @@ void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm,
if (NumLanes == 0) NumLanes = 1; // Handle MMX
unsigned NumLaneElts = NumElts / NumLanes;
- unsigned NewImm = Imm;
+ uint32_t SplatImm = (Imm & 0xff) * 0x01010101;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0; i != NumLaneElts; ++i) {
- ShuffleMask.push_back(NewImm % NumLaneElts + l);
- NewImm /= NumLaneElts;
+ ShuffleMask.push_back(SplatImm % NumLaneElts + l);
+ SplatImm /= NumLaneElts;
}
- if (NumLaneElts == 4) NewImm = Imm; // reload imm
}
}
@@ -281,16 +280,15 @@ void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize,
SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElementsInLane = 128 / ScalarSize;
unsigned NumLanes = NumElts / NumElementsInLane;
- unsigned ControlBitsMask = NumLanes - 1;
- unsigned NumControlBits = NumLanes / 2;
- for (unsigned l = 0; l != NumLanes; ++l) {
- unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
+ for (unsigned l = 0; l != NumElts; l += NumElementsInLane) {
+ unsigned Index = (Imm % NumLanes) * NumElementsInLane;
+ Imm /= NumLanes; // Discard the bits we just used.
// We actually need the other source.
- if (l >= NumLanes / 2)
- LaneMask += NumLanes;
+ if (l >= (NumElts / 2))
+ Index += NumElts;
for (unsigned i = 0; i != NumElementsInLane; ++i)
- ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
+ ShuffleMask.push_back(Index + i);
}
}
OpenPOWER on IntegriCloud