summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/Utils
diff options
context:
space:
mode:
authorDavid Greene <greened@obbligato.org>2011-03-02 17:23:43 +0000
committerDavid Greene <greened@obbligato.org>2011-03-02 17:23:43 +0000
commitdd567b214b3464a20baf1c1a611d525a61db53c3 (patch)
treebfad11383a4889e63ae2250e761fd06a72d95e7a /llvm/lib/Target/X86/Utils
parent9d80212115baea9be681574131633a884a60b40a (diff)
downloadbcm5719-llvm-dd567b214b3464a20baf1c1a611d525a61db53c3.tar.gz
bcm5719-llvm-dd567b214b3464a20baf1c1a611d525a61db53c3.zip
[AVX] Fix mask predicates for 256-bit UNPCKLPS/D and implement
missing patterns for them. Add a SIMD test subdirectory to hold tests for SIMD instruction selection correctness and quality. ' llvm-svn: 126845
Diffstat (limited to 'llvm/lib/Target/X86/Utils')
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp25
1 files changed, 19 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index eeb83c19b70..cd06060748b 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -165,12 +165,25 @@ void DecodeUNPCKLPDMask(unsigned NElts,
/// datatypes and vector widths.
void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask) {
-
- unsigned NElts = VT.getVectorNumElements();
-
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i); // Reads from dest
- ShuffleMask.push_back(i+NElts); // Reads from src
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElts / NumSections;
+
+ unsigned Start = 0;
+ unsigned End = NumSectionElts / 2;
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = Start; i != End; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2
+ }
+ // Process the next 128 bits.
+ Start += NumSectionElts;
+ End += NumSectionElts;
}
}
OpenPOWER on IntegriCloud