From 3b04a4e322e442690bab05d4163ed40d7fd9f950 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 7 Oct 2018 11:45:46 +0000 Subject: [SelectionDAG] Respect multiple uses in SimplifyDemandedBits to SimplifyDemandedVectorElts simplification rL343913 was using SimplifyDemandedBits's original demanded mask instead of the adjusted 'NewMask' that accounts for multiple uses of the op (those variable names really need improving....). Annoyingly many of the test changes (back to pre-rL343913 state) are actually safe - but only because their multiple uses are all by PMULDQ/PMULUDQ. Thanks to Jan Vesely (@jvesely) for bisecting the bug. llvm-svn: 343935 --- llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll') diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll index f064e861a0f..e0c7a5e673b 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll @@ -1826,6 +1826,12 @@ declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind rea define <4 x i64> @test_mm256_mul_epi32(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_mm256_mul_epi32: ; CHECK: # %bb.0: +; CHECK-NEXT: vpsllq $32, %ymm0, %ymm2 +; CHECK-NEXT: vpsrad $31, %ymm2, %ymm2 +; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] +; CHECK-NEXT: vpsllq $32, %ymm1, %ymm2 +; CHECK-NEXT: vpsrad $31, %ymm2, %ymm2 +; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] ; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %A = shl <4 x i64> %a0, @@ -1840,6 +1846,9 @@ declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone define <4 x i64> @test_mm256_mul_epu32(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_mm256_mul_epu32: ; CHECK: # %bb.0: +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] +; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] ; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %A = and <4 x i64> %a0, -- cgit v1.2.3