summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp26
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp51
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll13
3 files changed, 90 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index daff1f228c7..34f5014d1b0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2772,6 +2772,32 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
}
}
+ // Constant fold unary operations with a vector integer operand.
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {
+ APInt Val;
+ APInt DummyUndefs;
+ unsigned SplatBitSize;
+ bool DummyHasUndefs;
+ if (BV->isConstantSplat(Val, DummyUndefs, SplatBitSize, DummyHasUndefs)) {
+ switch (Opcode) {
+ default:
+ // FIXME: Entirely reasonable to perform folding of other unary
+ // operations here as the need arises.
+ break;
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ APFloat APF(
+ EVTToAPFloatSemantics(VT.getVectorElementType()),
+ APInt::getNullValue(VT.getVectorElementType().getSizeInBits()));
+ (void)APF.convertFromAPInt(Val, Opcode == ISD::SINT_TO_FP,
+ APFloat::rmNearestTiesToEven);
+
+ return getConstantFP(APF, VT);
+ }
+ }
+ }
+ }
+
unsigned OpOpcode = Operand.getNode()->getOpcode();
switch (Opcode) {
case ISD::TokenFactor:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7b77c59ed11..bf3832ca1de 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6417,10 +6417,61 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
+ SelectionDAG &DAG) {
+ // Take advantage of vector comparisons producing 0 or -1 in each lane to
+ // optimize away operation when it's from a constant.
+ //
+ // The general transformation is:
+ // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
+ // AND(VECTOR_CMP(x,y), constant2)
+ // constant2 = UNARYOP(constant)
+
+ // Early exit if this isn't a vector operation or if the operand of the
+ // unary operation isn't a bitwise AND.
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
+ N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ // Now check that the other operand of the AND is a constant splat. We could
+ // make the transformation for non-constant splats as well, but it's unclear
+ // that would be a benefit as it would not eliminate any operations, just
+ // perform one more step in scalar code before moving to the vector unit.
+ if (BuildVectorSDNode *BV =
+ dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
+ // Bail out if the vector isn't a constant splat.
+ if (!BV->getConstantSplatNode())
+ return SDValue();
+
+ // Everything checks out. Build up the new and improved node.
+ SDLoc DL(N);
+ EVT IntVT = BV->getValueType(0);
+ // Create a new constant of the appropriate type for the transformed
+ // DAG.
+ SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
+ // The AND node needs bitcasts to/from an integer vector type around it.
+ SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
+ N->getOperand(0)->getOperand(0), MaskConst);
+ SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
+ return Res;
+ }
+
+ return SDValue();
+}
+
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
+ // First try to optimize away the conversion when it's conditionally from
+ // a constant. Vectors only.
+ SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
+ if (Res != SDValue())
+ return Res;
+
EVT VT = N->getValueType(0);
if (VT != MVT::f32 && VT != MVT::f64)
return SDValue();
+
// Only optimize when the source and destination types have the same width.
if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits())
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll
new file mode 100644
index 00000000000..b10fe758d95
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -asm-verbose=false -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
+; CHECK-LABEL: foo:
+; CHECK-NEXT: fcmeq.4s v0, v0, v1
+; CHECK-NEXT: fmov.4s v1, #1.00000000
+; CHECK-NEXT: and.16b v0, v0, v1
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq <4 x float> %val, %test
+ %ext = zext <4 x i1> %cmp to <4 x i32>
+ %result = sitofp <4 x i32> %ext to <4 x float>
+ ret <4 x float> %result
+}
OpenPOWER on IntegriCloud