diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-06-26 09:50:11 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-06-26 09:50:11 +0000 |
| commit | 3845a4f849309eab5dabecf1b0b6320af60b3dea (patch) | |
| tree | 104fff75e6e3fa81aecda8775d186a07afc6a785 /llvm | |
| parent | 1a0810407edcff9791a5354d762ef8b4e7c07287 (diff) | |
| download | bcm5719-llvm-3845a4f849309eab5dabecf1b0b6320af60b3dea.tar.gz bcm5719-llvm-3845a4f849309eab5dabecf1b0b6320af60b3dea.zip | |
[X86][AVX] truncateVectorWithPACK - avoid bitcasted shuffles
truncateVectorWithPACK is often used in conjunction with ComputeNumSignBits which struggles when peeking through bitcasts.
This fix tries to avoid bitcast(shuffle(bitcast())) patterns in the 256-bit 64-bit sublane shuffles so we can still see through at least until lowering when the shuffles will need to be bitcasted to widen the shuffle type.
llvm-svn: 364401
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 7 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/bitcast-setcc-512.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/bitcast-vector-bool.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/movmsk-cmp.ll | 18 |
4 files changed, 5 insertions, 26 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 31c795c47d0..0639940bf02 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18766,8 +18766,11 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In, // 256-bit PACK(ARG0, ARG1) leaves us with ((LO0,LO1),(HI0,HI1)), // so we need to shuffle to get ((LO0,HI0),(LO1,HI1)). - Res = DAG.getBitcast(MVT::v4i64, Res); - Res = DAG.getVectorShuffle(MVT::v4i64, DL, Res, Res, {0, 2, 1, 3}); + // Scale shuffle mask to avoid bitcasts and help ComputeNumSignBits. + SmallVector<int, 64> Mask; + int Scale = 64 / OutVT.getScalarSizeInBits(); + scaleShuffleMask<int>(Scale, makeArrayRef<int>({ 0, 2, 1, 3 }), Mask); + Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask); if (DstVT.is256BitVector()) return DAG.getBitcast(DstVT, Res); diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll index 177be1fd6a6..9f5097bab3c 100644 --- a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll +++ b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll @@ -624,9 +624,6 @@ define void @bitcast_8i64_store(i8* %p, <8 x i64> %a0) { ; ; AVX2-LABEL: bitcast_8i64_store: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vmovmskps %ymm0, %eax diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll index adcee2abe33..1e402ae0e71 100644 --- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll +++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll @@ -539,9 +539,6 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { ; ; AVX2-LABEL: bitcast_v8i64_to_v2i4: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vmovmskps %ymm0, %eax diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index 6f7ec7d3a20..c144ca6183d 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -1079,9 +1079,6 @@ define i1 @allones_v8i64_sign(<8 x i64> %arg) { ; ; AVX2-LABEL: allones_v8i64_sign: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vmovmskps %ymm0, %eax @@ -1180,9 +1177,6 @@ define i1 @allzeros_v8i64_sign(<8 x i64> %arg) { ; ; AVX2-LABEL: allzeros_v8i64_sign: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vmovmskps %ymm0, %eax @@ -2515,11 +2509,8 @@ define i1 @allones_v8i64_and1(<8 x i64> %arg) { ; ; AVX2-LABEL: allones_v8i64_and1: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vmovmskps %ymm0, %eax @@ -2589,11 +2580,8 @@ define i1 @allzeros_v8i64_and1(<8 x i64> %arg) { ; ; AVX2-LABEL: allzeros_v8i64_and1: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vmovmskps %ymm0, %eax @@ -3926,11 +3914,8 @@ define i1 @allones_v8i64_and4(<8 x i64> %arg) { ; ; AVX2-LABEL: allones_v8i64_and4: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vmovmskps %ymm0, %eax @@ -4000,11 +3985,8 @@ define i1 @allzeros_v8i64_and4(<8 x i64> %arg) { ; ; AVX2-LABEL: allzeros_v8i64_and4: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1 -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vmovmskps %ymm0, %eax |

