summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp18
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp28
-rw-r--r--llvm/lib/Target/ARM64/ARM64ISelLowering.cpp34
-rw-r--r--llvm/test/CodeGen/ARM64/neon-v1i1-setcc.ll65
4 files changed, 114 insertions, 31 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 940a9c90593..04de324eabf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -331,12 +331,24 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
"Operand types must be vectors");
-
- SDValue LHS = GetScalarizedVector(N->getOperand(0));
- SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT OpVT = LHS.getValueType();
EVT NVT = N->getValueType(0).getVectorElementType();
SDLoc DL(N);
+ // The result needs scalarizing, but it's not a given that the source does.
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ LHS = GetScalarizedVector(LHS);
+ RHS = GetScalarizedVector(RHS);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ }
+
// Turn it into a scalar SETCC.
SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
N->getOperand(2));
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 54277103902..2edb19281de 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -538,7 +538,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
}
- setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::VSELECT);
}
@@ -4284,32 +4283,6 @@ static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return SDValue(N, 0);
}
-// v1i1 setcc ->
-// v1i1 (bitcast (i1 setcc (extract_vector_elt, extract_vector_elt))
-// FIXME: Currently the type legalizer can't handle SETCC having v1i1 as result.
-// If it can legalize "v1i1 SETCC" correctly, no need to combine such SETCC.
-static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
- EVT ResVT = N->getValueType(0);
-
- if (!ResVT.isVector() || ResVT.getVectorNumElements() != 1 ||
- ResVT.getVectorElementType() != MVT::i1)
- return SDValue();
-
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- EVT CmpVT = LHS.getValueType();
- LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
- CmpVT.getVectorElementType(), LHS,
- DAG.getConstant(0, MVT::i64));
- RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
- CmpVT.getVectorElementType(), RHS,
- DAG.getConstant(0, MVT::i64));
- SDValue SetCC =
- DAG.getSetCC(SDLoc(N), MVT::i1, LHS, RHS,
- cast<CondCodeSDNode>(N->getOperand(2))->get());
- return DAG.getNode(ISD::BITCAST, SDLoc(N), ResVT, SetCC);
-}
-
// vselect (v1i1 setcc) ->
// vselect (v1iXX setcc) (XX is the size of the compared operand type)
// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
@@ -4378,7 +4351,6 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SRA:
case ISD::SRL:
return PerformShiftCombine(N, DCI, getSubtarget());
- case ISD::SETCC: return PerformSETCCCombine(N, DCI.DAG);
case ISD::VSELECT: return PerformVSelectCombine(N, DCI.DAG);
case ISD::SIGN_EXTEND: return PerformSignExtendCombine(N, DCI.DAG);
case ISD::INTRINSIC_WO_CHAIN:
diff --git a/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp b/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp
index 255685232d1..63957abdfdd 100644
--- a/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp
+++ b/llvm/lib/Target/ARM64/ARM64ISelLowering.cpp
@@ -434,6 +434,8 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::VSELECT);
+
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
@@ -7227,6 +7229,36 @@ static SDValue performBRCONDCombine(SDNode *N,
return SDValue();
}
+// vselect (v1i1 setcc) ->
+// vselect (v1iXX setcc) (XX is the size of the compared operand type)
+// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
+// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
+// such VSELECT.
+static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ EVT CCVT = N0.getValueType();
+
+ if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 ||
+ CCVT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ EVT ResVT = N->getValueType(0);
+ EVT CmpVT = N0.getOperand(0).getValueType();
+ // Only combine when the result type is of the same size as the compared
+ // operands.
+ if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
+ return SDValue();
+
+ SDValue IfTrue = N->getOperand(1);
+ SDValue IfFalse = N->getOperand(2);
+ SDValue SetCC =
+ DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
+ IfTrue, IfFalse);
+}
+
SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -7255,6 +7287,8 @@ SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N,
return performBitcastCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
+ case ISD::VSELECT:
+ return performVSelectCombine(N, DCI.DAG);
case ISD::STORE:
return performSTORECombine(N, DCI, DAG, Subtarget);
case ARM64ISD::BRCOND:
diff --git a/llvm/test/CodeGen/ARM64/neon-v1i1-setcc.ll b/llvm/test/CodeGen/ARM64/neon-v1i1-setcc.ll
new file mode 100644
index 00000000000..a7e59fbc002
--- /dev/null
+++ b/llvm/test/CodeGen/ARM64/neon-v1i1-setcc.ll
@@ -0,0 +1,65 @@
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=arm64-none-linux-gnu | FileCheck %s
+
+; This is the analogue of AArch64's file of the same name. It's mostly testing
+; some form of correct lowering occurs, the tests are a little artificial but I
+; strongly suspect there's room for improved CodeGen (FIXME).
+
+define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
+; CHECK-LABEL: test_sext_extr_cmp_0:
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: csinc
+ %1 = icmp sge <1 x i64> %v1, %v2
+ %2 = extractelement <1 x i1> %1, i32 0
+ %vget_lane = sext i1 %2 to i64
+ ret i64 %vget_lane
+}
+
+define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {
+; CHECK-LABEL: test_sext_extr_cmp_1:
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+ %1 = fcmp oeq <1 x double> %v1, %v2
+ %2 = extractelement <1 x i1> %1, i32 0
+ %vget_lane = sext i1 %2 to i64
+ ret i64 %vget_lane
+}
+
+define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_select_v1i1_0:
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+ %1 = icmp eq <1 x i64> %v1, %v2
+ %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
+ ret <1 x i64> %res
+}
+
+define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_select_v1i1_1:
+; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+ %1 = fcmp oeq <1 x double> %v1, %v2
+ %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
+ ret <1 x i64> %res
+}
+
+define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) {
+; CHECK-LABEL: test_select_v1i1_2:
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
+ %1 = icmp eq <1 x i64> %v1, %v2
+ %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3
+ ret <1 x double> %res
+}
+
+define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {
+; CHECK-LABEL: test_br_extr_cmp:
+; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}}
+ %1 = icmp eq <1 x i64> %v1, %v2
+ %2 = extractelement <1 x i1> %1, i32 0
+ br i1 %2, label %if.end, label %if.then
+
+if.then:
+ ret i32 0;
+
+if.end:
+ ret i32 1;
+}
OpenPOWER on IntegriCloud