summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-03-26 15:44:55 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-03-26 15:44:55 +0000
commite4dbeb40c6a08abb5486c25ab0b31926f10d6248 (patch)
treefbba0aa7238ad81f16210166c046e34ab763383d
parent3eef33a80656ffe30f5815708c400b17e8f3bae8 (diff)
downloadbcm5719-llvm-e4dbeb40c6a08abb5486c25ab0b31926f10d6248.tar.gz
bcm5719-llvm-e4dbeb40c6a08abb5486c25ab0b31926f10d6248.zip
[X86][AVX] Enabled MULHS/MULHU v16i16 vectors on AVX1 targets
Correct splitting of v16i16 vectors into v8i16 vectors to prevent scalarization Differential Revision: http://reviews.llvm.org/D18307 llvm-svn: 264512
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll359
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll387
3 files changed, 54 insertions, 694 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f5e01bc05ac..7f1500a2c4e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1253,6 +1253,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v16i16, Custom);
setOperationAction(ISD::MUL, MVT::v32i8, Custom);
+ setOperationAction(ISD::MULHU, MVT::v16i16, Custom);
+ setOperationAction(ISD::MULHS, MVT::v16i16, Custom);
setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
index 78b29b0c9e6..3f81d2e7b8c 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
@@ -193,150 +193,15 @@ define <16 x i16> @test_div7_16i16(<16 x i16> %a) nounwind {
; AVX1-LABEL: test_div7_16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vmovd %xmm1, %ecx
-; AVX1-NEXT: movswl %cx, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm1, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vmovd %xmm0, %ecx
-; AVX1-NEXT: movswl %cx, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm0, %eax
-; AVX1-NEXT: cwtl
-; AVX1-NEXT: imull $18725, %eax, %eax # imm = 0x4925
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: sarw %cx
-; AVX1-NEXT: shrl $31, %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725]
+; AVX1-NEXT: vpmulhw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm3
+; AVX1-NEXT: vpsraw $1, %xmm1, %xmm1
+; AVX1-NEXT: vpaddw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpmulhw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm2
+; AVX1-NEXT: vpsraw $1, %xmm0, %xmm0
+; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -674,198 +539,20 @@ define <16 x i16> @test_rem7_16i16(<16 x i16> %a) nounwind {
; AVX1-LABEL: test_rem7_16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vmovd %xmm1, %ecx
-; AVX1-NEXT: movswl %cx, %edx
-; AVX1-NEXT: imull $18725, %edx, %edx # imm = 0x4925
-; AVX1-NEXT: movl %edx, %esi
-; AVX1-NEXT: shrl $16, %esi
-; AVX1-NEXT: sarw %si
-; AVX1-NEXT: shrl $31, %edx
-; AVX1-NEXT: addl %esi, %edx
-; AVX1-NEXT: leal (,%rdx,8), %esi
-; AVX1-NEXT: subl %edx, %esi
-; AVX1-NEXT: subl %esi, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm1, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vmovd %xmm0, %ecx
-; AVX1-NEXT: movswl %cx, %edx
-; AVX1-NEXT: imull $18725, %edx, %edx # imm = 0x4925
-; AVX1-NEXT: movl %edx, %esi
-; AVX1-NEXT: shrl $16, %esi
-; AVX1-NEXT: sarw %si
-; AVX1-NEXT: shrl $31, %edx
-; AVX1-NEXT: addl %esi, %edx
-; AVX1-NEXT: leal (,%rdx,8), %esi
-; AVX1-NEXT: subl %edx, %esi
-; AVX1-NEXT: subl %esi, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm0, %eax
-; AVX1-NEXT: movswl %ax, %ecx
-; AVX1-NEXT: imull $18725, %ecx, %ecx # imm = 0x4925
-; AVX1-NEXT: movl %ecx, %edx
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: sarw %dx
-; AVX1-NEXT: shrl $31, %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: leal (,%rcx,8), %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: subl %edx, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725]
+; AVX1-NEXT: vpmulhw %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpsrlw $15, %xmm3, %xmm4
+; AVX1-NEXT: vpsraw $1, %xmm3, %xmm3
+; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [7,7,7,7,7,7,7,7]
+; AVX1-NEXT: vpmullw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpsubw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpmulhw %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm3
+; AVX1-NEXT: vpsraw $1, %xmm2, %xmm2
+; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpmullw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpsubw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
index 4f69098d0b9..992be948127 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
@@ -184,154 +184,19 @@ define <8 x i32> @test_div7_8i32(<8 x i32> %a) nounwind {
define <16 x i16> @test_div7_16i16(<16 x i16> %a) nounwind {
; AVX1-LABEL: test_div7_16i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vmovd %xmm1, %ecx
-; AVX1-NEXT: movzwl %cx, %edx
-; AVX1-NEXT: imull $9363, %edx, %edx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: andl $65534, %ecx # imm = 0xFFFE
-; AVX1-NEXT: shrl %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: shrl $2, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vmovd %xmm0, %ecx
-; AVX1-NEXT: movzwl %cx, %edx
-; AVX1-NEXT: imull $9363, %edx, %edx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: andl $65534, %ecx # imm = 0xFFFE
-; AVX1-NEXT: shrl %ecx
-; AVX1-NEXT: addl %edx, %ecx
-; AVX1-NEXT: shrl $2, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: andl $65534, %eax # imm = 0xFFFE
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: addl %ecx, %eax
-; AVX1-NEXT: shrl $2, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9363,9363,9363,9363,9363,9363,9363,9363]
+; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpsubw %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vpsrlw $1, %xmm3, %xmm3
+; AVX1-NEXT: vpaddw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_div7_16i16:
@@ -661,216 +526,22 @@ define <16 x i16> @test_rem7_16i16(<16 x i16> %a) nounwind {
; AVX1-LABEL: test_rem7_16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vmovd %xmm1, %ecx
-; AVX1-NEXT: movzwl %cx, %edx
-; AVX1-NEXT: imull $9363, %edx, %edx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: movl %ecx, %esi
-; AVX1-NEXT: subl %edx, %esi
-; AVX1-NEXT: andl $65534, %esi # imm = 0xFFFE
-; AVX1-NEXT: shrl %esi
-; AVX1-NEXT: addl %edx, %esi
-; AVX1-NEXT: shrl $2, %esi
-; AVX1-NEXT: leal (,%rsi,8), %edx
-; AVX1-NEXT: subl %esi, %edx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm1, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vmovd %xmm0, %ecx
-; AVX1-NEXT: movzwl %cx, %edx
-; AVX1-NEXT: imull $9363, %edx, %edx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %edx
-; AVX1-NEXT: movl %ecx, %esi
-; AVX1-NEXT: subl %edx, %esi
-; AVX1-NEXT: andl $65534, %esi # imm = 0xFFFE
-; AVX1-NEXT: shrl %esi
-; AVX1-NEXT: addl %edx, %esi
-; AVX1-NEXT: shrl $2, %esi
-; AVX1-NEXT: leal (,%rsi,8), %edx
-; AVX1-NEXT: subl %esi, %edx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm0, %eax
-; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: movl %eax, %edx
-; AVX1-NEXT: subl %ecx, %edx
-; AVX1-NEXT: andl $65534, %edx # imm = 0xFFFE
-; AVX1-NEXT: shrl %edx
-; AVX1-NEXT: addl %ecx, %edx
-; AVX1-NEXT: shrl $2, %edx
-; AVX1-NEXT: leal (,%rdx,8), %ecx
-; AVX1-NEXT: subl %edx, %ecx
-; AVX1-NEXT: subl %ecx, %eax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9363,9363,9363,9363,9363,9363,9363,9363]
+; AVX1-NEXT: vpmulhuw %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpsubw %xmm3, %xmm1, %xmm4
+; AVX1-NEXT: vpsrlw $1, %xmm4, %xmm4
+; AVX1-NEXT: vpaddw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $2, %xmm3, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [7,7,7,7,7,7,7,7]
+; AVX1-NEXT: vpmullw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpsubw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpmulhuw %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpsubw %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vpsrlw $1, %xmm3, %xmm3
+; AVX1-NEXT: vpaddw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm2
+; AVX1-NEXT: vpmullw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpsubw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
OpenPOWER on IntegriCloud