summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-09-20 03:57:01 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-09-20 03:57:01 +0000
commit87dcf093672b600fae1533177707e050721fa735 (patch)
tree46c929f9873e1d82407d44e591987e94ccdfc7d2 /llvm/lib
parent00389f3ed96fb155f74ae235ba531c6026fc09d4 (diff)
downloadbcm5719-llvm-87dcf093672b600fae1533177707e050721fa735.tar.gz
bcm5719-llvm-87dcf093672b600fae1533177707e050721fa735.zip
[x86] Refactor the code for emitting INSERTPS to reuse the zeroable mask
analysis used elsewhere. This removes the last duplicate of this logic. Also simplify the code here quite a bit. No functionality changed. llvm-svn: 218176
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp40
1 files changed, 15 insertions, 25 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index aaac02f3f80..7ca44b4615a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7802,35 +7802,25 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// When using INSERTPS we can zero any lane of the destination. Collect
// the zero inputs into a mask and drop them from the lanes of V1 which
// actually need to be present as inputs to the INSERTPS.
- unsigned ZMask = 0;
- if (ISD::isBuildVectorAllZeros(V1.getNode())) {
- ZMask = 0xF ^ (1 << V2Index);
- } else if (V1.getOpcode() == ISD::BUILD_VECTOR) {
- for (int i = 0; i < 4; ++i) {
- int M = Mask[i];
- if (M >= 4)
- continue;
- if (M > -1) {
- SDValue Input = V1.getOperand(M);
- if (Input.getOpcode() != ISD::UNDEF &&
- !X86::isZeroNode(Input)) {
- // A non-zero input!
- ZMask = 0;
- break;
- }
- }
- ZMask |= 1 << i;
- }
- }
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
// Synthesize a shuffle mask for the non-zero and non-v2 inputs.
- int InsertShuffleMask[4] = {-1, -1, -1, -1};
+ bool InsertNeedsShuffle = false;
+ unsigned ZMask = 0;
for (int i = 0; i < 4; ++i)
- if (i != V2Index && (ZMask & (1 << i)) == 0)
- InsertShuffleMask[i] = Mask[i];
+ if (i != V2Index) {
+ if (Zeroable[i]) {
+ ZMask |= 1 << i;
+ } else if (Mask[i] != i) {
+ InsertNeedsShuffle = true;
+ break;
+ }
+ }
- if (isNoopShuffleMask(InsertShuffleMask)) {
- // Replace V1 with undef if nothing from V1 survives the INSERTPS.
+ // We don't want to use INSERTPS or other insertion techniques if it will
+ // require shuffling anyways.
+ if (!InsertNeedsShuffle) {
+ // If all of V1 is zeroable, replace it with undef.
if ((ZMask | 1 << V2Index) == 0xF)
V1 = DAG.getUNDEF(MVT::v4f32);
OpenPOWER on IntegriCloud