summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-03-09 14:06:39 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-03-09 14:06:39 +0000
commite86b7e2256dec3e7f385ebced827f91a60af2bb7 (patch)
tree571a94120c1829ff1cbe9d0983b8d7a44d2a0fa1 /llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
parent7577ce214086649ece83500d6e7daadcd0b57609 (diff)
downloadbcm5719-llvm-e86b7e2256dec3e7f385ebced827f91a60af2bb7.tar.gz
bcm5719-llvm-e86b7e2256dec3e7f385ebced827f91a60af2bb7.zip
[X86][SSE] Speed up constant pool shuffle mask decoding with direct copy (PR32037).
If the constants are already the correct size, we can copy them directly into the shuffle mask. llvm-svn: 297381
Diffstat (limited to 'llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp34
1 files changed, 27 insertions, 7 deletions
diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index df6ddafa717..9190b66367c 100644
--- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -49,6 +49,33 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
unsigned NumCstElts = CstTy->getVectorNumElements();
+ assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
+ "Unaligned shuffle mask size");
+
+ unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
+ UndefElts = APInt(NumMaskElts, 0);
+ RawMask.resize(NumMaskElts, 0);
+
+ // Fast path - if the constants match the mask size then copy direct.
+ if (MaskEltSizeInBits == CstEltSizeInBits) {
+ assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size");
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
+ return false;
+
+ if (isa<UndefValue>(COp)) {
+ UndefElts.setBit(i);
+ RawMask[i] = 0;
+ continue;
+ }
+
+ auto *Elt = cast<ConstantInt>(COp);
+ RawMask[i] = Elt->getValue().getZExtValue();
+ }
+ return true;
+ }
+
// Extract all the undef/constant element data and pack into single bitsets.
APInt UndefBits(CstSizeInBits, 0);
APInt MaskBits(CstSizeInBits, 0);
@@ -69,13 +96,6 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
}
// Now extract the undef/constant bit data into the raw shuffle masks.
- assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
- "Unaligned shuffle mask size");
-
- unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
- UndefElts = APInt(NumMaskElts, 0);
- RawMask.resize(NumMaskElts, 0);
-
for (unsigned i = 0; i != NumMaskElts; ++i) {
unsigned BitOffset = i * MaskEltSizeInBits;
APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);
OpenPOWER on IntegriCloud