From 3b04a4e322e442690bab05d4163ed40d7fd9f950 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 7 Oct 2018 11:45:46 +0000
Subject: [SelectionDAG] Respect multiple uses in SimplifyDemandedBits to
 SimplifyDemandedVectorElts simplification

rL343913 was using SimplifyDemandedBits's original demanded mask instead of the adjusted 'NewMask' that accounts for multiple uses of the op (those variable names really need improving....).

Annoyingly many of the test changes (back to pre-rL343913 state) are actually safe - but only because their multiple uses are all by PMULDQ/PMULUDQ.

Thanks to Jan Vesely (@jvesely) for bisecting the bug.

llvm-svn: 343935
---
 llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll')
diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
index f064e861a0f..e0c7a5e673b 100644
--- a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
@@ -1826,6 +1826,12 @@ declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind rea
 define <4 x i64> @test_mm256_mul_epi32(<4 x i64> %a0, <4 x i64> %a1) {
 ; CHECK-LABEL: test_mm256_mul_epi32:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpsllq $32, %ymm0, %ymm2
+; CHECK-NEXT:    vpsrad $31, %ymm2, %ymm2
+; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
+; CHECK-NEXT:    vpsllq $32, %ymm1, %ymm2
+; CHECK-NEXT:    vpsrad $31, %ymm2, %ymm2
+; CHECK-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
 ; CHECK-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    ret{{[l|q]}}
   %A = shl <4 x i64> %a0, <i64 32, i64 32, i64 32, i64 32>
@@ -1840,6 +1846,9 @@ declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
 define <4 x i64> @test_mm256_mul_epu32(<4 x i64> %a0, <4 x i64> %a1) {
 ; CHECK-LABEL: test_mm256_mul_epu32:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
+; CHECK-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
 ; CHECK-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    ret{{[l|q]}}
   %A = and <4 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-- 
cgit v1.2.3