diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/combine-sdiv.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/combine-udiv.ll | 3 |
3 files changed, 3 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7298c208cbb..0035335a2a7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23766,12 +23766,7 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RHi, 8, DAG); // Bitcast back to VT and then pack all the even elements from Lo and Hi. - // Shuffle lowering should turn this into PACKUS. - RLo = DAG.getBitcast(VT, RLo); - RHi = DAG.getBitcast(VT, RHi); - return DAG.getVectorShuffle(VT, dl, RLo, RHi, - { 0, 2, 4, 6, 8, 10, 12, 14, - 16, 18, 20, 22, 24, 26, 28, 30}); + return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); } SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll index d77dab8a9fb..a10d9c57e41 100644 --- a/llvm/test/CodeGen/X86/combine-sdiv.ll +++ b/llvm/test/CodeGen/X86/combine-sdiv.ll @@ -3146,9 +3146,8 @@ define <16 x i8> @pr38658(<16 x i8> %x) { ; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; XOP-NEXT: vpmovsxbw %xmm1, %xmm1 ; XOP-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1 -; XOP-NEXT: vpsrlw $8, %xmm1, %xmm1 ; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOP-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 +; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm2[1,3,5,7,9,11,13,15],xmm1[1,3,5,7,9,11,13,15] ; XOP-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; XOP-NEXT: vpshab {{.*}}(%rip), %xmm0, %xmm1 ; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll index 4dea599532e..bfd3095ab96 100644 --- a/llvm/test/CodeGen/X86/combine-udiv.ll +++ b/llvm/test/CodeGen/X86/combine-udiv.ll @@ -719,9 +719,8 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; XOP-NEXT: vmovd %eax, %xmm1 ; XOP-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; XOP-NEXT: vpmullw %xmm1, %xmm2, %xmm1 -; XOP-NEXT: vpsrlw $8, %xmm1, %xmm1 ; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; XOP-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15],xmm2[1,3,5,7,9,11,13,15] ; XOP-NEXT: movl $249, %eax ; XOP-NEXT: vmovd %eax, %xmm2 ; XOP-NEXT: vpshlb %xmm2, %xmm1, %xmm1 |