diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-02-24 12:44:12 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-02-24 12:44:12 +0000 |
commit | 744f008a7561cdaf320f24d5488480dd1e46dd50 (patch) | |
tree | dcf787b376b6281e67dc4799e7ba5aa4cfc6045c | |
parent | 51ce2ed36785c5b141782526260081a67c5e0bd8 (diff) | |
download | bcm5719-llvm-744f008a7561cdaf320f24d5488480dd1e46dd50.tar.gz bcm5719-llvm-744f008a7561cdaf320f24d5488480dd1e46dd50.zip |
[X86][SSE] combineSubToSubus - begun generalizing to work with any type sizes with SplitBinaryOpsAndApply
llvm-svn: 326030
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/psubus.ll | 28 |
2 files changed, 25 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 732503f5088..9ad5c185559 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37639,7 +37639,7 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, // for v8i32 requires umin, which appears in SSE41. if (!(Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) && !(Subtarget.hasSSE41() && (VT == MVT::v8i32)) && - !(Subtarget.hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)) && + !(Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)) && !(Subtarget.useBWIRegs() && (VT == MVT::v64i8 || VT == MVT::v32i16 || VT == MVT::v16i32 || VT == MVT::v8i64))) return SDValue(); @@ -37671,10 +37671,16 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, } else return SDValue(); + auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, + SDValue Op1) { + return DAG.getNode(X86ISD::SUBUS, DL, Op0.getValueType(), Op0, Op1); + }; + // PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with // special preprocessing in some cases. if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64) - return DAG.getNode(X86ISD::SUBUS, SDLoc(N), VT, SubusLHS, SubusRHS); + return SplitBinaryOpsAndApply(DAG, Subtarget, SDLoc(N), VT, SubusLHS, + SubusRHS, SUBUSBuilder); // Special preprocessing case can be only applied // if the value was zero extended from 16 bit, @@ -37704,8 +37710,9 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, SDValue NewSubusLHS = DAG.getZExtOrTrunc(SubusLHS, SDLoc(SubusLHS), ShrinkedType); SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType); - SDValue Psubus = DAG.getNode(X86ISD::SUBUS, SDLoc(N), ShrinkedType, - NewSubusLHS, NewSubusRHS); + SDValue Psubus = + SplitBinaryOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType, + NewSubusLHS, NewSubusRHS, SUBUSBuilder); // Zero extend the result, it may be used somewhere as 32 bit, // if not zext and following trunc will shrink. return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType); diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll index e3903f2d281..8de1ae47c5a 100644 --- a/llvm/test/CodeGen/X86/psubus.ll +++ b/llvm/test/CodeGen/X86/psubus.ll @@ -1202,16 +1202,16 @@ define <32 x i16> @psubus_32i16_max(<32 x i16> %x, <32 x i16> %y) nounwind { ; ; AVX1-LABEL: psubus_32i16_max: ; AVX1: # %bb.0: # %vector.ph -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 ; AVX1-NEXT: vpsubusw %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 -; AVX1-NEXT: vpsubusw %xmm5, %xmm6, %xmm5 ; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vpsubusw %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpsubusw %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: psubus_32i16_max: @@ -1242,16 +1242,16 @@ define <64 x i8> @psubus_64i8_max(<64 x i8> %x, <64 x i8> %y) nounwind { ; ; AVX1-LABEL: psubus_64i8_max: ; AVX1: # %bb.0: # %vector.ph -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 ; AVX1-NEXT: vpsubusb %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 -; AVX1-NEXT: vpsubusb %xmm5, %xmm6, %xmm5 ; AVX1-NEXT: vpsubusb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vpsubusb %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpsubusb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: psubus_64i8_max: |