summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/Utils
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-07-04 16:53:12 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-07-04 16:53:12 +0000
commit9f0a0bd20b02278bf15fe5ffda6c6b62a996c71c (patch)
tree83184383490b3ea185b653f9301fddccce9a811c /llvm/lib/Target/X86/Utils
parent79f8933f23f88e137014be175fc8c6539764b923 (diff)
downloadbcm5719-llvm-9f0a0bd20b02278bf15fe5ffda6c6b62a996c71c.tar.gz
bcm5719-llvm-9f0a0bd20b02278bf15fe5ffda6c6b62a996c71c.zip
[X86][SSE4A] Generalized EXTRQI/INSERTQI shuffle decodes
The existing decodes only worked for v16i8 vectors, this adds support for any 128-bit vector llvm-svn: 307095
Diffstat (limited to 'llvm/lib/Target/X86/Utils')
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp56
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.h8
2 files changed, 37 insertions, 27 deletions
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 1be5aec849f..de7914360fd 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -452,15 +452,20 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
}
-void DecodeEXTRQIMask(int Len, int Idx,
+void DecodeEXTRQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask) {
+ assert(VT.is128BitVector() && "Expected 128-bit vector");
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSize = VT.getScalarSizeInBits();
+ unsigned HalfElts = NumElts / 2;
+
// Only the bottom 6 bits are valid for each immediate.
Len &= 0x3F;
Idx &= 0x3F;
// We can only decode this bit extraction instruction as a shuffle if both the
- // length and index work with whole bytes.
- if (0 != (Len % 8) || 0 != (Idx % 8))
+ // length and index work with whole elements.
+ if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
return;
// A length of zero is equivalent to a bit length of 64.
@@ -469,33 +474,38 @@ void DecodeEXTRQIMask(int Len, int Idx,
// If the length + index exceeds the bottom 64 bits the result is undefined.
if ((Len + Idx) > 64) {
- ShuffleMask.append(16, SM_SentinelUndef);
+ ShuffleMask.append(NumElts, SM_SentinelUndef);
return;
}
- // Convert index and index to work with bytes.
- Len /= 8;
- Idx /= 8;
+ // Convert index and index to work with elements.
+ Len /= EltSize;
+ Idx /= EltSize;
- // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes
- // of the lower 64-bits. The upper 64-bits are undefined.
+ // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining
+ // elements of the lower 64-bits. The upper 64-bits are undefined.
for (int i = 0; i != Len; ++i)
ShuffleMask.push_back(i + Idx);
- for (int i = Len; i != 8; ++i)
+ for (int i = Len; i != HalfElts; ++i)
ShuffleMask.push_back(SM_SentinelZero);
- for (int i = 8; i != 16; ++i)
+ for (int i = HalfElts; i != NumElts; ++i)
ShuffleMask.push_back(SM_SentinelUndef);
}
-void DecodeINSERTQIMask(int Len, int Idx,
+void DecodeINSERTQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask) {
+ assert(VT.is128BitVector() && "Expected 128-bit vector");
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSize = VT.getScalarSizeInBits();
+ unsigned HalfElts = NumElts / 2;
+
// Only the bottom 6 bits are valid for each immediate.
Len &= 0x3F;
Idx &= 0x3F;
// We can only decode this bit insertion instruction as a shuffle if both the
- // length and index work with whole bytes.
- if (0 != (Len % 8) || 0 != (Idx % 8))
+ // length and index work with whole elements.
+ if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
return;
// A length of zero is equivalent to a bit length of 64.
@@ -504,24 +514,24 @@ void DecodeINSERTQIMask(int Len, int Idx,
// If the length + index exceeds the bottom 64 bits the result is undefined.
if ((Len + Idx) > 64) {
- ShuffleMask.append(16, SM_SentinelUndef);
+ ShuffleMask.append(NumElts, SM_SentinelUndef);
return;
}
- // Convert index and index to work with bytes.
- Len /= 8;
- Idx /= 8;
+ // Convert index and index to work with elements.
+ Len /= EltSize;
+ Idx /= EltSize;
- // INSERTQ: Extract lowest Len bytes from lower half of second source and
- // insert over first source starting at Idx byte. The upper 64-bits are
+ // INSERTQ: Extract lowest Len elements from lower half of second source and
+ // insert over first source starting at Idx element. The upper 64-bits are
// undefined.
for (int i = 0; i != Idx; ++i)
ShuffleMask.push_back(i);
for (int i = 0; i != Len; ++i)
- ShuffleMask.push_back(i + 16);
- for (int i = Idx + Len; i != 8; ++i)
+ ShuffleMask.push_back(i + NumElts);
+ for (int i = Idx + Len; i != HalfElts; ++i)
ShuffleMask.push_back(i);
- for (int i = 8; i != 16; ++i)
+ for (int i = HalfElts; i != NumElts; ++i)
ShuffleMask.push_back(SM_SentinelUndef);
}
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index 17619d09d05..251c9f7558e 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -134,12 +134,12 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
void DecodeScalarMoveMask(MVT VT, bool IsLoad,
SmallVectorImpl<int> &ShuffleMask);
-/// Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask.
-void DecodeEXTRQIMask(int Len, int Idx,
+/// Decode a SSE4A EXTRQ instruction as a shuffle mask.
+void DecodeEXTRQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
-/// Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask.
-void DecodeINSERTQIMask(int Len, int Idx,
+/// Decode a SSE4A INSERTQ instruction as a shuffle mask.
+void DecodeINSERTQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
OpenPOWER on IntegriCloud