diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 28 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/widen_load-2.ll | 12 |
2 files changed, 36 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 21e60994eac..d6ee4a3e302 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33372,6 +33372,34 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( } break; } + case X86ISD::PEXTRB: + case X86ISD::PEXTRW: { + SDValue Vec = Op.getOperand(0); + auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + MVT VecVT = Vec.getSimpleValueType(); + unsigned NumVecElts = VecVT.getVectorNumElements(); + + if (CIdx && CIdx->getAPIntValue().ult(NumVecElts)) { + unsigned Idx = CIdx->getZExtValue(); + unsigned VecBitWidth = VecVT.getScalarSizeInBits(); + + // If we demand no bits from the vector then we must have demanded + // bits from the implict zext - simplify to zero. + APInt DemandedVecBits = OriginalDemandedBits.trunc(VecBitWidth); + if (DemandedVecBits == 0) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + + KnownBits KnownVec; + APInt DemandedVecElts = APInt::getOneBitSet(NumVecElts, Idx); + if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts, + KnownVec, TLO, Depth + 1)) + return true; + + Known = KnownVec.zext(BitWidth, true); + return false; + } + break; + } case X86ISD::PINSRB: case X86ISD::PINSRW: { SDValue Vec = Op.getOperand(0); diff --git a/llvm/test/CodeGen/X86/widen_load-2.ll b/llvm/test/CodeGen/X86/widen_load-2.ll index 23b68b26980..1fc7aee74cc 100644 --- a/llvm/test/CodeGen/X86/widen_load-2.ll +++ b/llvm/test/CodeGen/X86/widen_load-2.ll @@ -368,10 +368,12 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movdqa {{.*#+}} xmm0 = [10395294,10395294,10395294,10395294] +; X86-NEXT: pextrw $0, %xmm0, (%edx) ; X86-NEXT: movb $-98, 2(%edx) -; X86-NEXT: movw $-24930, (%edx) # imm = 0x9E9E +; X86-NEXT: movdqa {{.*#+}} xmm0 = [65793,65793,65793,65793] +; X86-NEXT: pextrw $0, %xmm0, (%ecx) ; X86-NEXT: movb $1, 2(%ecx) -; X86-NEXT: movw $257, (%ecx) # imm = 0x101 ; X86-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; X86-NEXT: movdqa %xmm0, %xmm1 ; X86-NEXT: psrld $1, %xmm1 @@ -385,10 +387,12 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa ; X64-LABEL: rot: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movdqa {{.*#+}} xmm0 = [10395294,10395294,10395294,10395294] +; X64-NEXT: pextrw $0, %xmm0, (%rsi) ; X64-NEXT: movb $-98, 2(%rsi) -; X64-NEXT: movw $-24930, (%rsi) # imm = 0x9E9E +; X64-NEXT: movdqa {{.*#+}} xmm0 = [65793,65793,65793,65793] +; X64-NEXT: pextrw $0, %xmm0, (%rdx) ; X64-NEXT: movb $1, 2(%rdx) -; X64-NEXT: movw $257, (%rdx) # imm = 0x101 ; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; X64-NEXT: movdqa %xmm0, %xmm1 ; X64-NEXT: psrld $1, %xmm1 |

