summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-11-17 12:14:49 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-11-17 12:14:49 +0000
commit8eca5520dc5d7a570f52dff9576c04fb54657bba (patch)
tree2f32929332ecba43b3f34f49025d8ffa32b943ca /llvm/lib/Target
parentc4d733cd6af7f821657f44d65ada5177e6d7304f (diff)
downloadbcm5719-llvm-8eca5520dc5d7a570f52dff9576c04fb54657bba.tar.gz
bcm5719-llvm-8eca5520dc5d7a570f52dff9576c04fb54657bba.zip
[X86][SSE] Improve lowering of vXi64 multiply with known zero 32-bit halves
vXi64 multiplication is lowered into 3 calls of vpmuludq with the upper/lower 32-bit halves. If any of these halves are zero then we can remove individual calls. Although there was isBuildVectorAllZeros code to do this I don't think it ever worked (maybe just for constant folded cases that don't seem to be tested for any longer). This requires additional X86ISD support for computeKnownBitsForTargetNode, so far I've just added support for X86ISD::VZEXT (VPMOVZX* - helping the AVX2+ cases). Partial fix for PR30845 Differential Revision: https://reviews.llvm.org/D26590 llvm-svn: 287223
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp62
1 files changed, 43 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5d81debb5f6..838e75b4b36 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20144,33 +20144,43 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
// AloBhi = psllqi(AloBhi, 32);
// AhiBlo = psllqi(AhiBlo, 32);
// return AloBlo + AloBhi + AhiBlo;
+ APInt LowerBitsMask = APInt::getLowBitsSet(64, 32);
+ bool ALoiIsZero = DAG.MaskedValueIsZero(A, LowerBitsMask);
+ bool BLoiIsZero = DAG.MaskedValueIsZero(B, LowerBitsMask);
- SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
- SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
+ APInt UpperBitsMask = APInt::getHighBitsSet(64, 32);
+ bool AHiIsZero = DAG.MaskedValueIsZero(A, UpperBitsMask);
+ bool BHiIsZero = DAG.MaskedValueIsZero(B, UpperBitsMask);
- SDValue AhiBlo = Ahi;
- SDValue AloBhi = Bhi;
// Bit cast to 32-bit vectors for MULUDQ
MVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
(VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
- A = DAG.getBitcast(MulVT, A);
- B = DAG.getBitcast(MulVT, B);
- Ahi = DAG.getBitcast(MulVT, Ahi);
- Bhi = DAG.getBitcast(MulVT, Bhi);
-
- SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
- // After shifting right const values the result may be all-zero.
- if (!ISD::isBuildVectorAllZeros(Ahi.getNode())) {
- AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
- AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
- }
- if (!ISD::isBuildVectorAllZeros(Bhi.getNode())) {
- AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
+ SDValue Alo = DAG.getBitcast(MulVT, A);
+ SDValue Blo = DAG.getBitcast(MulVT, B);
+
+ SDValue Res;
+
+ // Only multiply lo/hi halves that aren't known to be zero.
+ if (!ALoiIsZero && !BLoiIsZero)
+ Res = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Alo, Blo);
+
+ if (!ALoiIsZero && !BHiIsZero) {
+ SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
+ Bhi = DAG.getBitcast(MulVT, Bhi);
+ SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Alo, Bhi);
AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
+ Res = (Res.getNode() ? DAG.getNode(ISD::ADD, dl, VT, Res, AloBhi) : AloBhi);
}
- SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
- return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
+ if (!AHiIsZero && !BLoiIsZero) {
+ SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
+ Ahi = DAG.getBitcast(MulVT, Ahi);
+ SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, Blo);
+ AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
+ Res = (Res.getNode() ? DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo) : AhiBlo);
+ }
+
+ return (Res.getNode() ? Res : getZeroVector(VT, Subtarget, DAG, dl));
}
static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
@@ -25256,6 +25266,20 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits);
break;
}
+ case X86ISD::VZEXT: {
+ SDValue N0 = Op.getOperand(0);
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ unsigned InNumElts = N0.getValueType().getVectorNumElements();
+ unsigned InBitWidth = N0.getValueType().getScalarSizeInBits();
+
+ KnownZero = KnownOne = APInt(InBitWidth, 0);
+ APInt DemandedElts = APInt::getLowBitsSet(InNumElts, NumElts);
+ DAG.computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1);
+ KnownOne = KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - InBitWidth);
+ break;
+ }
}
}
OpenPOWER on IntegriCloud