diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 35 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/reduce-trunc-shl.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc-math.ll | 6 |
3 files changed, 13 insertions, 36 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 19ef21be0db..25650d2caeb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36428,32 +36428,15 @@ combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG, } /// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS. -static SDValue -combineVectorTruncationWithPACKSS(SDNode *N, const X86Subtarget &Subtarget, - SelectionDAG &DAG, - SmallVector<SDValue, 8> &Regs) { - assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32); +static SDValue combineVectorTruncationWithPACKSS(SDNode *N, const SDLoc &DL, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + SDValue In = N->getOperand(0); + EVT InVT = In.getValueType(); EVT OutVT = N->getValueType(0); - SDLoc DL(N); - - // Shift left by 16 bits, then arithmetic-shift right by 16 bits. - SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32); - for (auto &Reg : Regs) { - Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, - Subtarget, DAG); - Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, - Subtarget, DAG); - } - - for (unsigned i = 0, e = Regs.size() / 2; i < e; i++) - Regs[i] = DAG.getNode(X86ISD::PACKSS, DL, MVT::v8i16, Regs[i * 2], - Regs[i * 2 + 1]); - - if (Regs.size() > 2) { - Regs.resize(Regs.size() / 2); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs); - } else - return Regs[0]; + In = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, InVT, In, + DAG.getValueType(OutVT)); + return truncateVectorWithPACK(X86ISD::PACKSS, OutVT, In, DL, DAG, Subtarget); } /// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into @@ -36511,7 +36494,7 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, if (Subtarget.hasSSE41() || OutSVT == MVT::i8) return combineVectorTruncationWithPACKUS(N, DAG, SubVec); if (InSVT == MVT::i32) - return combineVectorTruncationWithPACKSS(N, Subtarget, DAG, SubVec); + return combineVectorTruncationWithPACKSS(N, DL, Subtarget, DAG); return SDValue(); } diff --git a/llvm/test/CodeGen/X86/reduce-trunc-shl.ll b/llvm/test/CodeGen/X86/reduce-trunc-shl.ll index b44cadb26d0..5b3f5867155 100644 --- a/llvm/test/CodeGen/X86/reduce-trunc-shl.ll +++ b/llvm/test/CodeGen/X86/reduce-trunc-shl.ll @@ -69,13 +69,7 @@ define <8 x i16> @trunc_shl_16_v8i16_v8i32(<8 x i32> %a) { define <8 x i16> @trunc_shl_17_v8i16_v8i32(<8 x i32> %a) { ; SSE2-LABEL: trunc_shl_17_v8i16_v8i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pslld $17, %xmm0 -; SSE2-NEXT: pslld $17, %xmm1 -; SSE2-NEXT: pslld $16, %xmm1 -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: pslld $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: retq ; ; AVX2-LABEL: trunc_shl_17_v8i16_v8i32: diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll index f0a5449585c..15f2805737b 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-math.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll @@ -3227,10 +3227,10 @@ define <8 x i16> @trunc_and_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { define <8 x i16> @trunc_and_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; SSE-LABEL: trunc_and_v8i32_v8i16: ; SSE: # %bb.0: +; SSE-NEXT: pand %xmm2, %xmm0 ; SSE-NEXT: pand %xmm3, %xmm1 ; SSE-NEXT: pslld $16, %xmm1 ; SSE-NEXT: psrad $16, %xmm1 -; SSE-NEXT: pand %xmm2, %xmm0 ; SSE-NEXT: pslld $16, %xmm0 ; SSE-NEXT: psrad $16, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 @@ -4029,10 +4029,10 @@ define <8 x i16> @trunc_xor_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { define <8 x i16> @trunc_xor_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; SSE-LABEL: trunc_xor_v8i32_v8i16: ; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: pxor %xmm3, %xmm1 ; SSE-NEXT: pslld $16, %xmm1 ; SSE-NEXT: psrad $16, %xmm1 -; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: pslld $16, %xmm0 ; SSE-NEXT: psrad $16, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 @@ -4831,10 +4831,10 @@ define <8 x i16> @trunc_or_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { define <8 x i16> @trunc_or_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; SSE-LABEL: trunc_or_v8i32_v8i16: ; SSE: # %bb.0: +; SSE-NEXT: por %xmm2, %xmm0 ; SSE-NEXT: por %xmm3, %xmm1 ; SSE-NEXT: pslld $16, %xmm1 ; SSE-NEXT: psrad $16, %xmm1 -; SSE-NEXT: por %xmm2, %xmm0 ; SSE-NEXT: pslld $16, %xmm0 ; SSE-NEXT: psrad $16, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 |