diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-compare-all_of.ll | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-compare-any_of.ll | 25 |
3 files changed, 24 insertions, 30 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0ae7b5b254b..9db9d11904c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34316,6 +34316,14 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract, if (DAG.ComputeNumSignBits(Match) != BitWidth) return SDValue(); + SDLoc DL(Extract); + if (MatchSizeInBits == 256 && BitWidth < 32 && !Subtarget.hasInt256()) { + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(Match, DL); + Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi); + MatchSizeInBits = Match.getValueSizeInBits(); + } + // For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB. MVT MaskSrcVT; if (64 == BitWidth || 32 == BitWidth) @@ -34324,7 +34332,6 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract, else MaskSrcVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8); - SDLoc DL(Extract); SDValue CmpC; ISD::CondCode CondCode; if (BinOp == ISD::OR) { diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll index 8c450945b4d..a2f7e9dda7a 100644 --- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll @@ -673,12 +673,10 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vpmovmskb %xmm2, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %ecx ; AVX1-NEXT: xorl %eax, %eax -; AVX1-NEXT: cmpl $-1, %ecx +; AVX1-NEXT: cmpl $65535, %ecx # imm = 0xFFFF ; AVX1-NEXT: sete %al ; AVX1-NEXT: negl %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax @@ -867,11 +865,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vpmovmskb %xmm2, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: cmpl $-1, %ecx +; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF ; AVX1-NEXT: sete %al ; AVX1-NEXT: negb %al ; AVX1-NEXT: vzeroupper @@ -1555,11 +1551,9 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vpmovmskb %xmm2, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: cmpl $-1, %ecx +; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF ; AVX1-NEXT: sete %al ; AVX1-NEXT: negb %al ; AVX1-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll index 2236e953e07..73e476341c0 100644 --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -611,13 +611,10 @@ define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %ecx -; AVX1-NEXT: vpmovmskb %xmm2, %edx -; AVX1-NEXT: shll $16, %edx -; AVX1-NEXT: xorl %eax, %eax -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: setne %al +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax ; AVX1-NEXT: negl %eax +; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -790,12 +787,10 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vpmovmskb %xmm2, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: setne %al -; AVX1-NEXT: negb %al +; AVX1-NEXT: negl %eax +; AVX1-NEXT: sbbb %al, %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -1488,12 +1483,10 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: vpmovmskb %xmm2, %ecx -; AVX1-NEXT: shll $16, %ecx -; AVX1-NEXT: orl %eax, %ecx -; AVX1-NEXT: setne %al -; AVX1-NEXT: negb %al +; AVX1-NEXT: negl %eax +; AVX1-NEXT: sbbb %al, %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; |