summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp56
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp9
2 files changed, 57 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index c3ac796a0a4..0a44538366f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1700,12 +1700,46 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
return true;
}
+static SDValue stripBitcast(SDValue Val) {
+ return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
+}
+
+// Figure out if this is really an extract of the high 16-bits of a dword.
+static bool isExtractHiElt(SDValue In, SDValue &Out) {
+ In = stripBitcast(In);
+ if (In.getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ SDValue Srl = In.getOperand(0);
+ if (Srl.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
+ if (ShiftAmt->getZExtValue() == 16) {
+ Out = stripBitcast(Srl.getOperand(0));
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+// Look through operations that obscure just looking at the low 16-bits of the
+// same register.
+static SDValue stripExtractLoElt(SDValue In) {
+ if (In.getOpcode() == ISD::TRUNCATE) {
+ SDValue Src = In.getOperand(0);
+ if (Src.getValueType().getSizeInBits() == 32)
+ return stripBitcast(Src);
+ }
+
+ return In;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = 0;
Src = In;
- // FIXME: Look for on separate components
if (Src.getOpcode() == ISD::FNEG) {
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
Src = Src.getOperand(0);
@@ -1714,19 +1748,28 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
if (Src.getOpcode() == ISD::BUILD_VECTOR) {
unsigned VecMods = Mods;
- SDValue Lo = Src.getOperand(0);
- SDValue Hi = Src.getOperand(1);
+ SDValue Lo = stripBitcast(Src.getOperand(0));
+ SDValue Hi = stripBitcast(Src.getOperand(1));
if (Lo.getOpcode() == ISD::FNEG) {
- Lo = Lo.getOperand(0);
+ Lo = stripBitcast(Lo.getOperand(0));
Mods ^= SISrcMods::NEG;
}
if (Hi.getOpcode() == ISD::FNEG) {
- Hi = Hi.getOperand(0);
+ Hi = stripBitcast(Hi.getOperand(0));
Mods ^= SISrcMods::NEG_HI;
}
+ if (isExtractHiElt(Lo, Lo))
+ Mods |= SISrcMods::OP_SEL_0;
+
+ if (isExtractHiElt(Hi, Hi))
+ Mods |= SISrcMods::OP_SEL_1;
+
+ Lo = stripExtractLoElt(Lo);
+ Hi = stripExtractLoElt(Hi);
+
if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
// Really a scalar input. Just select from the low half of the register to
// avoid packing.
@@ -1740,9 +1783,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
}
// Packed instructions do not have abs modifiers.
-
- // FIXME: Handle abs/neg of individual components.
- // FIXME: Handle swizzling with op_sel
Mods |= SISrcMods::OP_SEL_1;
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 286be355bc1..4b27bba8188 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2660,6 +2660,15 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue Vec = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
+ DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
+
+ // Make sure we we do any optimizations that will make it easier to fold
+ // source modifiers before obscuring it with bit operations.
+
+ // XXX - Why doesn't this get called when vector_shuffle is expanded?
+ if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI))
+ return Combined;
+
if (const ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);
OpenPOWER on IntegriCloud