diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 26 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 51 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll | 13 |
3 files changed, 90 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index daff1f228c7..34f5014d1b0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2772,6 +2772,32 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, } } + // Constant fold unary operations with a vector integer operand. + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) { + APInt Val; + APInt DummyUndefs; + unsigned SplatBitSize; + bool DummyHasUndefs; + if (BV->isConstantSplat(Val, DummyUndefs, SplatBitSize, DummyHasUndefs)) { + switch (Opcode) { + default: + // FIXME: Entirely reasonable to perform folding of other unary + // operations here as the need arises. + break; + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: { + APFloat APF( + EVTToAPFloatSemantics(VT.getVectorElementType()), + APInt::getNullValue(VT.getVectorElementType().getSizeInBits())); + (void)APF.convertFromAPInt(Val, Opcode == ISD::SINT_TO_FP, + APFloat::rmNearestTiesToEven); + + return getConstantFP(APF, VT); + } + } + } + } + unsigned OpOpcode = Operand.getNode()->getOpcode(); switch (Opcode) { case ISD::TokenFactor: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7b77c59ed11..bf3832ca1de 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6417,10 +6417,61 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, + SelectionDAG &DAG) { + // Take advantage of vector comparisons producing 0 or -1 in each lane to + // optimize away operation when it's from a constant. + // + // The general transformation is: + // UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> + // AND(VECTOR_CMP(x,y), constant2) + // constant2 = UNARYOP(constant) + + // Early exit if this isn't a vector operation or if the operand of the + // unary operation isn't a bitwise AND. + EVT VT = N->getValueType(0); + if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND || + N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC) + return SDValue(); + + // Now check that the other operand of the AND is a constant splat. We could + // make the transformation for non-constant splats as well, but it's unclear + // that would be a benefit as it would not eliminate any operations, just + // perform one more step in scalar code before moving to the vector unit. + if (BuildVectorSDNode *BV = + dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) { + // Bail out if the vector isn't a constant splat. + if (!BV->getConstantSplatNode()) + return SDValue(); + + // Everything checks out. Build up the new and improved node. + SDLoc DL(N); + EVT IntVT = BV->getValueType(0); + // Create a new constant of the appropriate type for the transformed + // DAG. + SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); + // The AND node needs bitcasts to/from an integer vector type around it. + SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst); + SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, + N->getOperand(0)->getOperand(0), MaskConst); + SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd); + return Res; + } + + return SDValue(); +} + static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) { + // First try to optimize away the conversion when it's conditionally from + // a constant. Vectors only. + SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG); + if (Res != SDValue()) + return Res; + EVT VT = N->getValueType(0); if (VT != MVT::f32 && VT != MVT::f64) return SDValue(); + // Only optimize when the source and destination types have the same width. if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits()) return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll new file mode 100644 index 00000000000..b10fe758d95 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -asm-verbose=false -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s + +define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind { +; CHECK-LABEL: foo: +; CHECK-NEXT: fcmeq.4s v0, v0, v1 +; CHECK-NEXT: fmov.4s v1, #1.00000000 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: ret + %cmp = fcmp oeq <4 x float> %val, %test + %ext = zext <4 x i1> %cmp to <4 x i32> + %result = sitofp <4 x i32> %ext to <4 x float> + ret <4 x float> %result +} |

