summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-10-01 22:40:03 +0000
committerCraig Topper <craig.topper@intel.com>2019-10-01 22:40:03 +0000
commit0da163a2cf2e3d90a8f01a3dc748875906d896b9 (patch)
tree792cd9821ed1385cdb16b60150f3a034a6181627 /llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
parent5269091dc7ce424c575cf57374a0902ef6df40cc (diff)
downloadbcm5719-llvm-0da163a2cf2e3d90a8f01a3dc748875906d896b9.tar.gz
bcm5719-llvm-0da163a2cf2e3d90a8f01a3dc748875906d896b9.zip
Revert r373172 "[X86] Add custom isel logic to match VPTERNLOG from 2 logic ops."
This seems to be causing some performance regresions that I'm trying to investigate. One thing that stands out is that this transform can increase the live range of the operands of the earlier logic op. This can be bad for register allocation. If there are two logic op inputs we should really combine the one that is closest, but SelectionDAG doesn't have a good way to do that. Maybe we need to do this as a basic block transform in Machine IR. llvm-svn: 373401
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-fshr-rot-256.ll')
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-rot-256.ll20
1 files changed, 11 insertions, 9 deletions
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
index e617cc05a01..bf7c057965b 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
@@ -487,11 +487,12 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2
-; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3
+; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
+; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: var_funnnel_v32i8:
@@ -911,11 +912,12 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512VL-NEXT: vpsubb %xmm1, %xmm3, %xmm1
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm3
-; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm0
-; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
-; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $236, %ymm3, %ymm2, %ymm0
+; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm1
+; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v32i8:
OpenPOWER on IntegriCloud