diff options
author | Bill Schmidt <wschmidt@linux.vnet.ibm.com> | 2014-07-25 01:55:55 +0000 |
---|---|---|
committer | Bill Schmidt <wschmidt@linux.vnet.ibm.com> | 2014-07-25 01:55:55 +0000 |
commit | c9fa5dd61882c2b272d887abc3a33dae5bbbcedc (patch) | |
tree | ec7dd80fac172ea19ee8f1aa04a1d673b7e7d342 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp | |
parent | d9474064271271af78c2a1eecec6fe6ccc028918 (diff) | |
download | bcm5719-llvm-c9fa5dd61882c2b272d887abc3a33dae5bbbcedc.tar.gz bcm5719-llvm-c9fa5dd61882c2b272d887abc3a33dae5bbbcedc.zip |
[PATCH][PPC64LE] Correct little-endian usage of vmrgh* and vmrgl*.
Because the PowerPC vmrgh* and vmrgl* instructions have a built-in
big-endian bias, it is necessary to swap their inputs in little-endian
mode when using them to implement a vector shuffle. This was
previously missed in the vector LE implementation.
There was already logic to distinguish between unary and "normal"
vmrg* vector shuffles, so this patch extends that logic to use a third
option: "swapped" vmrg* vector shuffles that are used for little
endian in place of the "normal" ones.
I've updated the vec-shuffle-le.ll test to check for the expected
register ordering on the generated instructions.
This bug was discovered when testing the LE and ELFv2 patches for
safety if they were backported to 3.4. A different vectorization
decision was made in 3.4 than on mainline trunk, and that exposed the
problem. I've verified this fix takes care of that issue.
llvm-svn: 213915
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 65 |
1 files changed, 43 insertions, 22 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 456f978835a..d596bda49fd 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -926,31 +926,51 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). +/// The ShuffleKind distinguishes between big-endian merges with two +/// different inputs (0), either-endian merges with two identical inputs (1), +/// and little-endian merges with two different inputs (2). For the latter, +/// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary, SelectionDAG &DAG) { + unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getTarget().getDataLayout()->isLittleEndian()) { - if (!isUnary) + if (ShuffleKind == 1) // unary + return isVMerge(N, UnitSize, 0, 0); + else if (ShuffleKind == 2) // swapped return isVMerge(N, UnitSize, 0, 16); - return isVMerge(N, UnitSize, 0, 0); + else + return false; } else { - if (!isUnary) + if (ShuffleKind == 1) // unary + return isVMerge(N, UnitSize, 8, 8); + else if (ShuffleKind == 0) // normal return isVMerge(N, UnitSize, 8, 24); - return isVMerge(N, UnitSize, 8, 8); + else + return false; } } /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). +/// The ShuffleKind distinguishes between big-endian merges with two +/// different inputs (0), either-endian merges with two identical inputs (1), +/// and little-endian merges with two different inputs (2). For the latter, +/// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary, SelectionDAG &DAG) { + unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getTarget().getDataLayout()->isLittleEndian()) { - if (!isUnary) + if (ShuffleKind == 1) // unary + return isVMerge(N, UnitSize, 8, 8); + else if (ShuffleKind == 2) // swapped return isVMerge(N, UnitSize, 8, 24); - return isVMerge(N, UnitSize, 8, 8); + else + return false; } else { - if (!isUnary) + if (ShuffleKind == 1) // unary + return isVMerge(N, UnitSize, 0, 0); + else if (ShuffleKind == 0) // normal return isVMerge(N, UnitSize, 0, 16); - return isVMerge(N, UnitSize, 0, 0); + else + return false; } } @@ -6021,12 +6041,12 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isVPKUWUMShuffleMask(SVOp, true, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, true, DAG) || PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 || - PPC::isVMRGLShuffleMask(SVOp, 1, true, DAG) || - PPC::isVMRGLShuffleMask(SVOp, 2, true, DAG) || - PPC::isVMRGLShuffleMask(SVOp, 4, true, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 1, true, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 2, true, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 4, true, DAG)) { + PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) { return Op; } } @@ -6034,15 +6054,16 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // Altivec has a variety of "shuffle immediates" that take two vector inputs // and produce a fixed permutation. If any of these match, do not lower to // VPERM. + unsigned int ShuffleKind = isLittleEndian ? 2 : 0; if (PPC::isVPKUWUMShuffleMask(SVOp, false, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, false, DAG) || PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 || - PPC::isVMRGLShuffleMask(SVOp, 1, false, DAG) || - PPC::isVMRGLShuffleMask(SVOp, 2, false, DAG) || - PPC::isVMRGLShuffleMask(SVOp, 4, false, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 1, false, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 2, false, DAG) || - PPC::isVMRGHShuffleMask(SVOp, 4, false, DAG)) + PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG)) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our |