summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp28
-rw-r--r--llvm/test/CodeGen/X86/widen_load-2.ll12
2 files changed, 36 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 21e60994eac..d6ee4a3e302 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33372,6 +33372,34 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
break;
}
+ case X86ISD::PEXTRB:
+ case X86ISD::PEXTRW: {
+ SDValue Vec = Op.getOperand(0);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ MVT VecVT = Vec.getSimpleValueType();
+ unsigned NumVecElts = VecVT.getVectorNumElements();
+
+ if (CIdx && CIdx->getAPIntValue().ult(NumVecElts)) {
+ unsigned Idx = CIdx->getZExtValue();
+ unsigned VecBitWidth = VecVT.getScalarSizeInBits();
+
+ // If we demand no bits from the vector then we must have demanded
+ // bits from the implict zext - simplify to zero.
+ APInt DemandedVecBits = OriginalDemandedBits.trunc(VecBitWidth);
+ if (DemandedVecBits == 0)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+
+ KnownBits KnownVec;
+ APInt DemandedVecElts = APInt::getOneBitSet(NumVecElts, Idx);
+ if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts,
+ KnownVec, TLO, Depth + 1))
+ return true;
+
+ Known = KnownVec.zext(BitWidth, true);
+ return false;
+ }
+ break;
+ }
case X86ISD::PINSRB:
case X86ISD::PINSRW: {
SDValue Vec = Op.getOperand(0);
diff --git a/llvm/test/CodeGen/X86/widen_load-2.ll b/llvm/test/CodeGen/X86/widen_load-2.ll
index 23b68b26980..1fc7aee74cc 100644
--- a/llvm/test/CodeGen/X86/widen_load-2.ll
+++ b/llvm/test/CodeGen/X86/widen_load-2.ll
@@ -368,10 +368,12 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movdqa {{.*#+}} xmm0 = [10395294,10395294,10395294,10395294]
+; X86-NEXT: pextrw $0, %xmm0, (%edx)
; X86-NEXT: movb $-98, 2(%edx)
-; X86-NEXT: movw $-24930, (%edx) # imm = 0x9E9E
+; X86-NEXT: movdqa {{.*#+}} xmm0 = [65793,65793,65793,65793]
+; X86-NEXT: pextrw $0, %xmm0, (%ecx)
; X86-NEXT: movb $1, 2(%ecx)
-; X86-NEXT: movw $257, (%ecx) # imm = 0x101
; X86-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrld $1, %xmm1
@@ -385,10 +387,12 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
; X64-LABEL: rot:
; X64: # %bb.0: # %entry
; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: movdqa {{.*#+}} xmm0 = [10395294,10395294,10395294,10395294]
+; X64-NEXT: pextrw $0, %xmm0, (%rsi)
; X64-NEXT: movb $-98, 2(%rsi)
-; X64-NEXT: movw $-24930, (%rsi) # imm = 0x9E9E
+; X64-NEXT: movdqa {{.*#+}} xmm0 = [65793,65793,65793,65793]
+; X64-NEXT: pextrw $0, %xmm0, (%rdx)
; X64-NEXT: movb $1, 2(%rdx)
-; X64-NEXT: movw $257, (%rdx) # imm = 0x101
; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrld $1, %xmm1
OpenPOWER on IntegriCloud