summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-12-05 14:46:37 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-12-05 14:46:37 +0000
commit32483668d7104dab7a5e10b247fe047d6002d7fb (patch)
tree315e0233428cf3040a8cf0f7338badd6e622adf9 /llvm/lib
parent44a40046c8137638be12d0ca19eeb3cb3174ce56 (diff)
downloadbcm5719-llvm-32483668d7104dab7a5e10b247fe047d6002d7fb.tar.gz
bcm5719-llvm-32483668d7104dab7a5e10b247fe047d6002d7fb.zip
[X86][SSE] Begun adding modulo rotate support to LowerRotate
Prep work for PR38243 - mainly adding comments on where we need to add modulo support (doing so at the moment causes massive codegen regressions). I've also consistently added support for modulo folding for uniform constants (although at the moment we have no way to trigger this) and removed the old assertions. llvm-svn: 348366
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp16
1 files changed, 12 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 79c8f6e5535..562c4c12f4b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24765,6 +24765,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
unsigned Opcode = Op.getOpcode();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ // AVX512 implicitly uses modulo rotation amounts.
if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) {
// Attempt to rotate by immediate.
APInt UndefElts;
@@ -24788,6 +24789,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// XOP has 128-bit vector variable + immediate rotates.
// +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL.
+ // XOP implicitly uses modulo rotation amounts.
if (Subtarget.hasXOP()) {
if (VT.is256BitVector())
return split256IntArith(Op, DAG);
@@ -24796,8 +24798,10 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// Attempt to rotate by immediate.
if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
- uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
- assert(RotateAmt < EltSizeInBits && "Rotation out of range");
+ uint64_t RotateAmt = RotateConst->getAPIntValue().urem(EltSizeInBits);
+ if (RotateAmt == 0)
+ return R;
+
return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
DAG.getConstant(RotateAmt, DL, MVT::i8));
}
@@ -24820,8 +24824,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// TODO - legalizers should be able to handle this.
if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
- uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
- assert(RotateAmt < EltSizeInBits && "Rotation out of range");
+ uint64_t RotateAmt = RotateConst->getAPIntValue().urem(EltSizeInBits);
if (RotateAmt == 0)
return R;
@@ -24832,6 +24835,8 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
}
}
+ // TODO: ISD::ROT* uses modulo rotate amounts, we need to handle this.
+
// Rotate by splat - expand back to shifts.
// TODO - legalizers should be able to handle this.
if (EltSizeInBits >= 16 || Subtarget.hasBWI()) {
@@ -24856,6 +24861,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
}
+ // We don't need ModuloAmt here as we just peek at individual bits.
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
@@ -24911,6 +24917,8 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return SignBitSelect(VT, Amt, M, R);
}
+ // TODO: We need explicit modulo rotation amounts for everything from here on.
+
bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
bool LegalVarShifts = SupportedVectorVarShift(VT, Subtarget, ISD::SHL) &&
SupportedVectorVarShift(VT, Subtarget, ISD::SRL);
OpenPOWER on IntegriCloud