diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2020-01-14 11:20:09 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2020-01-14 11:41:47 +0000 |
commit | a43b0065c5c78eba3fb83881fb628f5b8182db64 (patch) | |
tree | 983d795963a172cda345f34ac75d1b66d6a29190 | |
parent | e73b20c57dc7a8c847ebadeb7e19c08ec84f5bd7 (diff) | |
download | bcm5719-llvm-a43b0065c5c78eba3fb83881fb628f5b8182db64.tar.gz bcm5719-llvm-a43b0065c5c78eba3fb83881fb628f5b8182db64.zip |
[SelectionDAG] ComputeKnownBits - merge getValidMinimumShiftAmountConstant() and generic ISD::SRL handling.
As mentioned by @nikic on rGef5debac4302 (although that was just about SHL), we can merge the guaranteed top zero bits from the shifted value, and then, if a min shift amount is known, zero out the top bits as well.
SHL tests / handling will be added in a follow up patch.
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 22 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll | 2 |
3 files changed, 12 insertions, 14 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index a0f621ab50a..64d439f5f78 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2885,23 +2885,25 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } break; case ISD::SRL: + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { - Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); unsigned Shift = ShAmt->getZExtValue(); Known.Zero.lshrInPlace(Shift); Known.One.lshrInPlace(Shift); // High bits are known zero. Known.Zero.setHighBits(Shift); - } else if (const APInt *ShMinAmt = - getValidMinimumShiftAmountConstant(Op, DemandedElts)) { - // Minimum shift high bits are known zero. - Known.Zero.setHighBits(ShMinAmt->getZExtValue()); - } else { - // No matter the shift amount, the leading zeros will stay zero. - Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known.Zero = APInt::getHighBitsSet(BitWidth, Known.countMinLeadingZeros()); - Known.One.clearAllBits(); + break; } + + // No matter the shift amount, the leading zeros will stay zero. + Known.Zero = APInt::getHighBitsSet(BitWidth, Known.countMinLeadingZeros()); + Known.One.clearAllBits(); + + // Minimum shift high bits are known zero. + if (const APInt *ShMinAmt = + getValidMinimumShiftAmountConstant(Op, DemandedElts)) + Known.Zero.setHighBits(ShMinAmt->getZExtValue()); break; case ISD::SRA: if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll index 88dd1da8940..47b7d157dda 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll @@ -405,7 +405,6 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0] ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm2, %zmm1 -; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 ; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1 ; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 @@ -923,7 +922,6 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0] ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm2, %zmm1 -; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 ; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1 ; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll index 198c6de8b0e..155990c3df4 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll @@ -345,7 +345,6 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm1, %zmm1 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %zmm1, %zmm1 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 @@ -793,7 +792,6 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm1, %zmm1 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %zmm1, %zmm1 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 |