summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/Utils
diff options
context:
space:
mode:
authorBruno Cardoso Lopes <bruno.cardoso@gmail.com>2011-07-21 01:55:47 +0000
committerBruno Cardoso Lopes <bruno.cardoso@gmail.com>2011-07-21 01:55:47 +0000
commitb878caa5e2ac6b96a79e011f77f25fdbb6e61ccb (patch)
treea2405fdd9b8e6530109e1723744fdc3cacdabc08 /llvm/lib/Target/X86/Utils
parentfb4920eb2513e3519ba59d197bf7db9a29c7cede (diff)
downloadbcm5719-llvm-b878caa5e2ac6b96a79e011f77f25fdbb6e61ccb.tar.gz
bcm5719-llvm-b878caa5e2ac6b96a79e011f77f25fdbb6e61ccb.zip
Add support for 256-bit versions of VPERMIL instruction. This is a new
instruction introduced in AVX, which can operate on 128 and 256-bit vectors. It considers a 256-bit vector as two independent 128-bit lanes. It can permute any 32 or 64 elements inside a lane, and restricts the second lane to have the same permutation of the first one. With the improved splat support introduced early today, adding codegen for this instruction enable more efficient 256-bit code: Instead of: vextractf128 $0, %ymm0, %xmm0 punpcklbw %xmm0, %xmm0 punpckhbw %xmm0, %xmm0 vinsertf128 $0, %xmm0, %ymm0, %ymm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vextractf128 $1, %ymm0, %xmm1 shufps $1, %xmm1, %xmm1 movss %xmm1, 28(%rsp) movss %xmm1, 24(%rsp) movss %xmm1, 20(%rsp) movss %xmm1, 16(%rsp) vextractf128 $0, %ymm0, %xmm0 shufps $1, %xmm0, %xmm0 movss %xmm0, 12(%rsp) movss %xmm0, 8(%rsp) movss %xmm0, 4(%rsp) movss %xmm0, (%rsp) vmovaps (%rsp), %ymm0 We get: vextractf128 $0, %ymm0, %xmm0 punpcklbw %xmm0, %xmm0 punpckhbw %xmm0, %xmm0 vinsertf128 $0, %xmm0, %ymm0, %ymm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vpermilps $85, %ymm0, %ymm0 llvm-svn: 135662
Diffstat (limited to 'llvm/lib/Target/X86/Utils')
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp27
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.h14
2 files changed, 41 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index cd06060748b..c1ff0e5011e 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -187,4 +187,31 @@ void DecodeUNPCKLPMask(EVT VT,
}
}
+void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeVPERMILMask(MVT::getVectorVT(MVT::i32, NElts), Imm, ShuffleMask);
+}
+
+void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeVPERMILMask(MVT::getVectorVT(MVT::i64, NElts), Imm, ShuffleMask);
+}
+
+// DecodeVPERMILMask - Decodes VPERMIL permutes for any 128-bit
+// with 32/64-bit elements. For 256-bit vectors, it's considered
+// as two 128 lanes and the mask of the first lane should be
+// identical of the second one.
+void DecodeVPERMILMask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ for (unsigned i = 0; i != NumElts/NumLanes; ++i) {
+ unsigned Idx = (Imm >> (i*2)) & 0x3 ;
+ ShuffleMask.push_back(Idx+(l*NumElts/NumLanes));
+ }
+ }
+}
+
} // llvm namespace
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index b18f6703309..4a5214028fa 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -82,6 +82,20 @@ void DecodeUNPCKLPDMask(unsigned NElts,
void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+// DecodeVPERMILMask - Decodes VPERMIL permutes for any 128-bit
+// with 32/64-bit elements. For 256-bit vectors, it's considered
+// as two 128 lanes and the mask of the first lane should be
+// identical of the second one.
+void DecodeVPERMILMask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
} // llvm namespace
#endif
OpenPOWER on IntegriCloud