diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/x86-interleaved-access.ll | 22 |
2 files changed, 8 insertions, 22 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index eaa827309b0..31e87692cbd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -990,11 +990,11 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, // than the width of NewLHS/NewRH, we can avoid inserting real truncate // instruction, which is redudant eventually. unsigned OpLEffectiveBits = - OpL.getValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; + OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; unsigned OpREffectiveBits = - OpR.getValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; - if (OpLEffectiveBits <= NewLHS.getValueSizeInBits() && - OpREffectiveBits <= NewRHS.getValueSizeInBits()) { + OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; + if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() && + OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) { NewLHS = OpL; NewRHS = OpR; } else { diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll index c62f5df086b..946c9b8afa2 100644 --- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll +++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll @@ -543,10 +543,7 @@ define <16 x i1> @interleaved_load_vf16_i8_stride4(<64 x i8>* %ptr) { ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: vpcmpeqb %xmm0, %xmm5, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm2 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqb %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpcmpeqb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -594,9 +591,6 @@ define <16 x i1> @interleaved_load_vf16_i8_stride4(<64 x i8>* %ptr) { ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1] ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX2-NEXT: vpcmpeqb %xmm0, %xmm4, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX2-NEXT: vpand %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpeqb %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -751,17 +745,12 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(<128 x i8>* %ptr) { ; AVX1-NEXT: vextractf128 $1, %ymm8, %xmm4 ; AVX1-NEXT: vpcmpeqb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm4 +; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [72340172838076673,72340172838076673] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpeqb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm3 -; AVX1-NEXT: vpand %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: interleaved_load_vf32_i8_stride4: @@ -856,10 +845,7 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(<128 x i8>* %ptr) { ; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3,4,5],ymm2[6,7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX2-NEXT: vpand %ymm1, %ymm8, %ymm2 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpcmpeqb %ymm0, %ymm8, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: interleaved_load_vf32_i8_stride4: |

