summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Molloy <james.molloy@arm.com>2015-03-31 10:20:58 +0000
committerJames Molloy <james.molloy@arm.com>2015-03-31 10:20:58 +0000
commit4c1b7467714e01335fcda3e396d8fed6ac273693 (patch)
treea9926b030f0c4df7ad146e2b8b104b8d433d1152
parent47b11c61cab6174816929742a6a0af0a7d7f4466 (diff)
downloadbcm5719-llvm-4c1b7467714e01335fcda3e396d8fed6ac273693.tar.gz
bcm5719-llvm-4c1b7467714e01335fcda3e396d8fed6ac273693.zip
[SDAG] Move TRUNCATE splitting logic into a helper, and use
it more liberally. SplitVecOp_TRUNCATE has logic for recursively splitting oversize vectors that need more than one round of splitting to become legal. There are many other ISD nodes that could benefit from this logic, so factor it out and use it for FP_TO_UINT,FP_TO_SINT,SINT_TO_FP,UINT_TO_FP and FTRUNC. llvm-svn: 233681
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp35
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll22
-rw-r--r--llvm/test/CodeGen/AArch64/vcvt-oversize.ll16
-rw-r--r--llvm/test/CodeGen/ARM/vcvt.ll4
5 files changed, 55 insertions, 24 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index cef3fc99081..9de85d7e777 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -593,6 +593,7 @@ private:
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_UnaryOp(SDNode *N);
+ SDValue SplitVecOp_TruncateHelper(SDNode *N, unsigned TruncateOp);
SDValue SplitVecOp_BITCAST(SDNode *N);
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
@@ -600,7 +601,6 @@ private:
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
- SDValue SplitVecOp_TRUNCATE(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f7e4557dd00..f000902260e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1293,7 +1293,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
- case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break;
+ case ISD::TRUNCATE:
+ Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE);
+ break;
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
@@ -1304,20 +1306,32 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VSELECT:
Res = SplitVecOp_VSELECT(N, OpNo);
break;
- case ISD::CTTZ:
- case ISD::CTLZ:
- case ISD::CTPOP:
- case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+ Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE);
+ else
+ Res = SplitVecOp_UnaryOp(N);
+ break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- case ISD::FTRUNC:
+ if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+ Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC);
+ else
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::FP_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
Res = SplitVecOp_UnaryOp(N);
break;
+ case ISD::FTRUNC:
+ Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC);
+ break;
}
}
@@ -1581,7 +1595,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
}
-SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
+SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N,
+ unsigned TruncateOp) {
// The result type is legal, but the input type is illegal. If splitting
// ends up with the result type of each half still being legal, just
// do that. If, however, that would result in an illegal result type,
@@ -1624,8 +1639,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
NumElements/2);
- SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec);
- SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec);
+ SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+ SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
// Concatenate them to get the full intermediate truncation result.
EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
@@ -1634,7 +1649,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
// type. This should normally be something that ends up being legal directly,
// but in theory if a target has very wide vectors and an annoyingly
// restricted set of legal types, this split can chain to build things up.
- return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+ return DAG.getNode(TruncateOp, DL, OutVT, InterVec);
}
SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
diff --git a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
index 62596adc743..c4e3e4eae63 100644
--- a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -3,11 +3,11 @@
define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
; CHECK: fptosi_v4f64_to_v4i16
-; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v1.2d
-; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v0.2d
-; CHECK-DAG: xtn v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
-; CHECK-DAG: xtn v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
-; CHECK: uzp1 v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
+; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v0.2d
+; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v1.2d
+; CHECK-DAG: xtn v[[MID:[0-9]+]].2s, v[[LHS]].2d
+; CHECK-DAG: xtn2 v[[MID]].4s, v[[RHS]].2d
+; CHECK: xtn v0.4h, v[[MID]].4s
%tmp1 = load <4 x double>, <4 x double>* %ptr
%tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
ret <4 x i16> %tmp2
@@ -19,13 +19,13 @@ define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d
; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d
; CHECK-DAG: fcvtzs v[[CONV3:[0-9]+]].2d, v3.2d
-; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
-; CHECK-DAG: xtn v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
-; CHECK-DAG: xtn v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
-; CHECK-DAG: uzp1 v[[TMP1:[0-9]+]].4h, v[[CONV1]].4h, v[[CONV0]].4h
-; CHECK-DAG: uzp1 v[[TMP2:[0-9]+]].4h, v[[CONV3]].4h, v[[CONV2]].4h
-; CHECK: uzp1 v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
+; CHECK-DAG: xtn2 v[[NA2]].4s, v[[CONV3]].2d
+; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
+; CHECK-DAG: xtn2 v[[NA0]].4s, v[[CONV1]].2d
+; CHECK-DAG: xtn v[[TMP1:[0-9]+]].4h, v[[NA0]].4s
+; CHECK-DAG: xtn2 v[[TMP1]].8h, v[[NA2]].4s
+; CHECK: xtn v0.8b, v[[TMP1]].8h
%tmp1 = load <8 x double>, <8 x double>* %ptr
%tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
ret <8 x i8> %tmp2
diff --git a/llvm/test/CodeGen/AArch64/vcvt-oversize.ll b/llvm/test/CodeGen/AArch64/vcvt-oversize.ll
new file mode 100644
index 00000000000..066a4b66620
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/vcvt-oversize.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define <8 x i8> @float_to_i8(<8 x float>* %in) {
+; CHECK-LABEL: float_to_i8:
+; CHECK-DAG: fadd v[[LSB:[0-9]+]].4s, v0.4s, v0.4s
+; CHECK-DAG: fadd v[[MSB:[0-9]+]].4s, v1.4s, v1.4s
+; CHECK-DAG: fcvtzu v[[LSB2:[0-9]+]].4s, v[[LSB]].4s
+; CHECK-DAG: fcvtzu v[[MSB2:[0-9]+]].4s, v[[MSB]].4s
+; CHECK-DAG: xtn v[[TMP:[0-9]+]].4h, v[[LSB]].4s
+; CHECK-DAG: xtn2 v[[TMP]].8h, v[[MSB]].4s
+; CHECK-DAG: xtn v0.8b, v[[TMP]].8h
+ %l = load <8 x float>, <8 x float>* %in
+ %scale = fmul <8 x float> %l, <float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>
+ %conv = fptoui <8 x float> %scale to <8 x i8>
+ ret <8 x i8> %conv
+}
diff --git a/llvm/test/CodeGen/ARM/vcvt.ll b/llvm/test/CodeGen/ARM/vcvt.ll
index 0b7ffb8960a..78105f7e0ad 100644
--- a/llvm/test/CodeGen/ARM/vcvt.ll
+++ b/llvm/test/CodeGen/ARM/vcvt.ll
@@ -180,8 +180,8 @@ define <2 x i64> @fix_float_to_i64(<2 x float> %in) {
define <4 x i16> @fix_double_to_i16(<4 x double> %in) {
; CHECK-LABEL: fix_double_to_i16:
-; CHECK: vcvt.s32.f64
-; CHECK: vcvt.s32.f64
+; CHECK: vcvt.u32.f64
+; CHECK: vcvt.u32.f64
%scale = fmul <4 x double> %in, <double 2.0, double 2.0, double 2.0, double 2.0>
%conv = fptoui <4 x double> %scale to <4 x i16>
OpenPOWER on IntegriCloud