diff options
author | Craig Topper <craig.topper@intel.com> | 2018-06-30 17:49:42 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-06-30 17:49:42 +0000 |
commit | 5f28d50d27cf7bc9d86eaa8f904c043a068c55d2 (patch) | |
tree | 889c79cc5ae1296ba69b5ab13d28f1b7af6720ca | |
parent | 16a42ca274f95e5b4eb04719109f1af028b3b159 (diff) | |
download | bcm5719-llvm-5f28d50d27cf7bc9d86eaa8f904c043a068c55d2.tar.gz bcm5719-llvm-5f28d50d27cf7bc9d86eaa8f904c043a068c55d2.zip |
[X86] When combining load to BZHI, make sure we create the shift instruction with an i8 type.
This combine runs pretty late and causes us to introduce a shift after the op legalization phase has run. We need to be sure we create the shift with the proper type for the shift amount. If we don't do this, we will still re-legalize the operation properly, but we won't get a chance to fully optimize the truncate that gets inserted.
So this patch adds the necessary truncate when the shift is created. I've also narrowed the subtract that gets created to always be an i32 type. The truncate would have trigered SimplifyDemandedBits to optimize it anyway. But using a more appropriate VT here is free and saves an optimization step.
llvm-svn: 336051
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5c9715b454a..47631e99b99 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34646,7 +34646,7 @@ static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) { // It's equivalent to performing bzhi (zero high bits) on the input, with the // same index of the load. static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { + const X86Subtarget &Subtarget) { MVT VT = Node->getSimpleValueType(0); SDLoc dl(Node); @@ -34701,15 +34701,16 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG, // <- (and (srl 0xFFFFFFFF, (sub 32, idx))) // that will be replaced with one bzhi instruction. SDValue Inp = (i == 0) ? Node->getOperand(1) : Node->getOperand(0); - SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, VT); + SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, MVT::i32); // Get the Node which indexes into the array. SDValue Index = getIndexFromUnindexedLoad(Ld); if (!Index) return SDValue(); - Index = DAG.getZExtOrTrunc(Index, dl, VT); + Index = DAG.getZExtOrTrunc(Index, dl, MVT::i32); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SizeC, Index); + SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, SizeC, Index); + Sub = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Sub); SDValue AllOnes = DAG.getAllOnesConstant(dl, VT); SDValue LShr = DAG.getNode(ISD::SRL, dl, VT, AllOnes, Sub); |