summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-06-30 17:49:42 +0000
committerCraig Topper <craig.topper@intel.com>2018-06-30 17:49:42 +0000
commit5f28d50d27cf7bc9d86eaa8f904c043a068c55d2 (patch)
tree889c79cc5ae1296ba69b5ab13d28f1b7af6720ca /llvm/lib
parent16a42ca274f95e5b4eb04719109f1af028b3b159 (diff)
downloadbcm5719-llvm-5f28d50d27cf7bc9d86eaa8f904c043a068c55d2.tar.gz
bcm5719-llvm-5f28d50d27cf7bc9d86eaa8f904c043a068c55d2.zip
[X86] When combining load to BZHI, make sure we create the shift instruction with an i8 type.
This combine runs pretty late and causes us to introduce a shift after the op legalization phase has run. We need to be sure we create the shift with the proper type for the shift amount. If we don't do this, we will still re-legalize the operation properly, but we won't get a chance to fully optimize the truncate that gets inserted. So this patch adds the necessary truncate when the shift is created. I've also narrowed the subtract that gets created to always be an i32 type. The truncate would have trigered SimplifyDemandedBits to optimize it anyway. But using a more appropriate VT here is free and saves an optimization step. llvm-svn: 336051
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp9
1 files changed, 5 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5c9715b454a..47631e99b99 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34646,7 +34646,7 @@ static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {
// It's equivalent to performing bzhi (zero high bits) on the input, with the
// same index of the load.
static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+ const X86Subtarget &Subtarget) {
MVT VT = Node->getSimpleValueType(0);
SDLoc dl(Node);
@@ -34701,15 +34701,16 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
// <- (and (srl 0xFFFFFFFF, (sub 32, idx)))
// that will be replaced with one bzhi instruction.
SDValue Inp = (i == 0) ? Node->getOperand(1) : Node->getOperand(0);
- SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, VT);
+ SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, MVT::i32);
// Get the Node which indexes into the array.
SDValue Index = getIndexFromUnindexedLoad(Ld);
if (!Index)
return SDValue();
- Index = DAG.getZExtOrTrunc(Index, dl, VT);
+ Index = DAG.getZExtOrTrunc(Index, dl, MVT::i32);
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SizeC, Index);
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, SizeC, Index);
+ Sub = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Sub);
SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
SDValue LShr = DAG.getNode(ISD::SRL, dl, VT, AllOnes, Sub);
OpenPOWER on IntegriCloud