summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-02-24 12:44:12 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-02-24 12:44:12 +0000
commit744f008a7561cdaf320f24d5488480dd1e46dd50 (patch)
treedcf787b376b6281e67dc4799e7ba5aa4cfc6045c
parent51ce2ed36785c5b141782526260081a67c5e0bd8 (diff)
downloadbcm5719-llvm-744f008a7561cdaf320f24d5488480dd1e46dd50.tar.gz
bcm5719-llvm-744f008a7561cdaf320f24d5488480dd1e46dd50.zip
[X86][SSE] combineSubToSubus - begun generalizing to work with any type sizes with SplitBinaryOpsAndApply
llvm-svn: 326030
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp15
-rw-r--r--llvm/test/CodeGen/X86/psubus.ll28
2 files changed, 25 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 732503f5088..9ad5c185559 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37639,7 +37639,7 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
// for v8i32 requires umin, which appears in SSE41.
if (!(Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) &&
!(Subtarget.hasSSE41() && (VT == MVT::v8i32)) &&
- !(Subtarget.hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)) &&
+ !(Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)) &&
!(Subtarget.useBWIRegs() && (VT == MVT::v64i8 || VT == MVT::v32i16 ||
VT == MVT::v16i32 || VT == MVT::v8i64)))
return SDValue();
@@ -37671,10 +37671,16 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
} else
return SDValue();
+ auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
+ SDValue Op1) {
+ return DAG.getNode(X86ISD::SUBUS, DL, Op0.getValueType(), Op0, Op1);
+ };
+
// PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with
// special preprocessing in some cases.
if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64)
- return DAG.getNode(X86ISD::SUBUS, SDLoc(N), VT, SubusLHS, SubusRHS);
+ return SplitBinaryOpsAndApply(DAG, Subtarget, SDLoc(N), VT, SubusLHS,
+ SubusRHS, SUBUSBuilder);
// Special preprocessing case can be only applied
// if the value was zero extended from 16 bit,
@@ -37704,8 +37710,9 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
SDValue NewSubusLHS =
DAG.getZExtOrTrunc(SubusLHS, SDLoc(SubusLHS), ShrinkedType);
SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType);
- SDValue Psubus = DAG.getNode(X86ISD::SUBUS, SDLoc(N), ShrinkedType,
- NewSubusLHS, NewSubusRHS);
+ SDValue Psubus =
+ SplitBinaryOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType,
+ NewSubusLHS, NewSubusRHS, SUBUSBuilder);
// Zero extend the result, it may be used somewhere as 32 bit,
// if not zext and following trunc will shrink.
return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType);
diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index e3903f2d281..8de1ae47c5a 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -1202,16 +1202,16 @@ define <32 x i16> @psubus_32i16_max(<32 x i16> %x, <32 x i16> %y) nounwind {
;
; AVX1-LABEL: psubus_32i16_max:
; AVX1: # %bb.0: # %vector.ph
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
; AVX1-NEXT: vpsubusw %xmm4, %xmm5, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT: vpsubusw %xmm5, %xmm6, %xmm5
; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpsubusw %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpsubusw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: psubus_32i16_max:
@@ -1242,16 +1242,16 @@ define <64 x i8> @psubus_64i8_max(<64 x i8> %x, <64 x i8> %y) nounwind {
;
; AVX1-LABEL: psubus_64i8_max:
; AVX1: # %bb.0: # %vector.ph
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
; AVX1-NEXT: vpsubusb %xmm4, %xmm5, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT: vpsubusb %xmm5, %xmm6, %xmm5
; AVX1-NEXT: vpsubusb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpsubusb %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpsubusb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: psubus_64i8_max:
OpenPOWER on IntegriCloud