diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-03-09 14:06:39 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-03-09 14:06:39 +0000 |
commit | e86b7e2256dec3e7f385ebced827f91a60af2bb7 (patch) | |
tree | 571a94120c1829ff1cbe9d0983b8d7a44d2a0fa1 /llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp | |
parent | 7577ce214086649ece83500d6e7daadcd0b57609 (diff) | |
download | bcm5719-llvm-e86b7e2256dec3e7f385ebced827f91a60af2bb7.tar.gz bcm5719-llvm-e86b7e2256dec3e7f385ebced827f91a60af2bb7.zip |
[X86][SSE] Speed up constant pool shuffle mask decoding with direct copy (PR32037).
If the constants are already the correct size, we can copy them directly into the shuffle mask.
llvm-svn: 297381
Diffstat (limited to 'llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp | 34 |
1 files changed, 27 insertions, 7 deletions
diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp index df6ddafa717..9190b66367c 100644 --- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp +++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp @@ -49,6 +49,33 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits, unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); unsigned NumCstElts = CstTy->getVectorNumElements(); + assert((CstSizeInBits % MaskEltSizeInBits) == 0 && + "Unaligned shuffle mask size"); + + unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits; + UndefElts = APInt(NumMaskElts, 0); + RawMask.resize(NumMaskElts, 0); + + // Fast path - if the constants match the mask size then copy direct. + if (MaskEltSizeInBits == CstEltSizeInBits) { + assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size"); + for (unsigned i = 0; i != NumMaskElts; ++i) { + Constant *COp = C->getAggregateElement(i); + if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) + return false; + + if (isa<UndefValue>(COp)) { + UndefElts.setBit(i); + RawMask[i] = 0; + continue; + } + + auto *Elt = cast<ConstantInt>(COp); + RawMask[i] = Elt->getValue().getZExtValue(); + } + return true; + } + // Extract all the undef/constant element data and pack into single bitsets. APInt UndefBits(CstSizeInBits, 0); APInt MaskBits(CstSizeInBits, 0); @@ -69,13 +96,6 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits, } // Now extract the undef/constant bit data into the raw shuffle masks. - assert((CstSizeInBits % MaskEltSizeInBits) == 0 && - "Unaligned shuffle mask size"); - - unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits; - UndefElts = APInt(NumMaskElts, 0); - RawMask.resize(NumMaskElts, 0); - for (unsigned i = 0; i != NumMaskElts; ++i) { unsigned BitOffset = i * MaskEltSizeInBits; APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset); |