diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 25 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/lower-vec-shift-2.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-rotate-128.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shift-ashr-128.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shift-lshr-128.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shift-shl-128.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vshift-4.ll | 10 |
7 files changed, 51 insertions, 36 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7755113873a..63dd407a0bb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20451,30 +20451,43 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, // | i64 | Yes, No | Use ShAmt as lowest elt | // | i32 | Yes | zero-extend in-reg | // | (i32 zext(i16)) | Yes | zero-extend in-reg | + // | (i32 zext(i16)) | No | byte-shift-in-reg | // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) | // +=================+============+=======================================+ if (SVT == MVT::i64) ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt); - else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND && + else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND && + ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) { ShAmt = ShAmt.getOperand(0); - ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v8i16, ShAmt); - ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64); + MVT AmtTy = MVT::v8i16; + ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt); + if (Subtarget.hasSSE41()) + ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64); + else { + SDValue ByteShift = DAG.getConstant( + (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); + ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt); + ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, + ByteShift); + ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, + ByteShift); + } } else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt); ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64); } else { - SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), - DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)}; + SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT), + DAG.getUNDEF(SVT)}; ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps); } // The return type has to be a 128-bit type with the same element // type as the input type. MVT EltVT = VT.getVectorElementType(); - MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits()); + MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits()); ShAmt = DAG.getBitcast(ShVT, ShAmt); return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); diff --git a/llvm/test/CodeGen/X86/lower-vec-shift-2.ll b/llvm/test/CodeGen/X86/lower-vec-shift-2.ll index aeaac0e0e9d..8ca9df88709 100644 --- a/llvm/test/CodeGen/X86/lower-vec-shift-2.ll +++ b/llvm/test/CodeGen/X86/lower-vec-shift-2.ll @@ -5,8 +5,8 @@ define <8 x i16> @test1(<8 x i16> %A, <8 x i16> %B) { ; SSE2-LABEL: test1: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pextrw $0, %xmm1, %eax -; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: psllw %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -59,8 +59,8 @@ entry: define <8 x i16> @test4(<8 x i16> %A, <8 x i16> %B) { ; SSE2-LABEL: test4: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pextrw $0, %xmm1, %eax -; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: psrlw %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -113,8 +113,8 @@ entry: define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) { ; SSE2-LABEL: test7: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pextrw $0, %xmm1, %eax -; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: psraw %xmm1, %xmm0 ; SSE2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll index f43176496e6..c1fcaf06e79 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -877,15 +877,15 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE2-LABEL: splatvar_rotate_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[0,0,2,3,4,5,6,7] +; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] -; SSE2-NEXT: pextrw $0, %xmm1, %eax -; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: psllw %xmm1, %xmm3 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] ; SSE2-NEXT: psubw %xmm2, %xmm1 -; SSE2-NEXT: pextrw $0, %xmm1, %eax -; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: psrlw %xmm1, %xmm0 ; SSE2-NEXT: por %xmm3, %xmm0 ; SSE2-NEXT: retq @@ -993,15 +993,15 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; X32-SSE-LABEL: splatvar_rotate_v8i16: ; X32-SSE: # %bb.0: ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[0,0,2,3,4,5,6,7] +; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] -; X32-SSE-NEXT: pextrw $0, %xmm1, %eax -; X32-SSE-NEXT: movd %eax, %xmm1 ; X32-SSE-NEXT: movdqa %xmm0, %xmm3 ; X32-SSE-NEXT: psllw %xmm1, %xmm3 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] ; X32-SSE-NEXT: psubw %xmm2, %xmm1 -; X32-SSE-NEXT: pextrw $0, %xmm1, %eax -; X32-SSE-NEXT: movd %eax, %xmm1 +; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: psrlw %xmm1, %xmm0 ; X32-SSE-NEXT: por %xmm3, %xmm0 ; X32-SSE-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll index 757efa67c94..bcc64070b01 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -736,8 +736,8 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE2-LABEL: splatvar_shift_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: pextrw $0, %xmm1, %eax -; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: psraw %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -773,8 +773,8 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; ; X32-SSE-LABEL: splatvar_shift_v8i16: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pextrw $0, %xmm1, %eax -; X32-SSE-NEXT: movd %eax, %xmm1 +; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: psraw %xmm1, %xmm0 ; X32-SSE-NEXT: retl %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll index c04add9b5a9..85c2524069a 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -603,8 +603,8 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE2-LABEL: splatvar_shift_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: pextrw $0, %xmm1, %eax -; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: psrlw %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -640,8 +640,8 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; ; X32-SSE-LABEL: splatvar_shift_v8i16: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pextrw $0, %xmm1, %eax -; X32-SSE-NEXT: movd %eax, %xmm1 +; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: psrlw %xmm1, %xmm0 ; X32-SSE-NEXT: retl %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll index 9bebe12a79d..70f5705f915 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll @@ -520,8 +520,8 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE2-LABEL: splatvar_shift_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: pextrw $0, %xmm1, %eax -; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSE2-NEXT: psllw %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -557,8 +557,8 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; ; X32-SSE-LABEL: splatvar_shift_v8i16: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pextrw $0, %xmm1, %eax -; X32-SSE-NEXT: movd %eax, %xmm1 +; X32-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; X32-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: psllw %xmm1, %xmm0 ; X32-SSE-NEXT: retl %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/X86/vshift-4.ll b/llvm/test/CodeGen/X86/vshift-4.ll index a49d6f38449..d2c24fbcae9 100644 --- a/llvm/test/CodeGen/X86/vshift-4.ll +++ b/llvm/test/CodeGen/X86/vshift-4.ll @@ -136,16 +136,18 @@ define void @shift3a(<8 x i16> %val, <8 x i16>* %dst, <8 x i16> %amt) nounwind { ; X32-LABEL: shift3a: ; X32: # %bb.0: # %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: pextrw $6, %xmm1, %ecx -; X32-NEXT: movd %ecx, %xmm1 +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] +; X32-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; X32-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X32-NEXT: psllw %xmm1, %xmm0 ; X32-NEXT: movdqa %xmm0, (%eax) ; X32-NEXT: retl ; ; X64-LABEL: shift3a: ; X64: # %bb.0: # %entry -; X64-NEXT: pextrw $6, %xmm1, %eax -; X64-NEXT: movd %eax, %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] +; X64-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] +; X64-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X64-NEXT: psllw %xmm1, %xmm0 ; X64-NEXT: movdqa %xmm0, (%rdi) ; X64-NEXT: retq |

