summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp25
-rw-r--r--llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll4
2 files changed, 26 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 72ef6bc6cf8..4fb7aa00223 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34273,6 +34273,31 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// TODO convert SrcUndef to KnownUndef.
break;
}
+ case X86ISD::KSHIFTL:
+ case X86ISD::KSHIFTR: {
+ SDValue Src = Op.getOperand(0);
+ auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
+ assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");
+ unsigned ShiftAmt = Amt->getZExtValue();
+ bool ShiftLeft = (X86ISD::KSHIFTL == Opc);
+
+ APInt DemandedSrc =
+ ShiftLeft ? DemandedElts.lshr(ShiftAmt) : DemandedElts.shl(ShiftAmt);
+ if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO,
+ Depth + 1))
+ return true;
+
+ if (ShiftLeft) {
+ KnownUndef = KnownUndef.shl(ShiftAmt);
+ KnownZero = KnownZero.shl(ShiftAmt);
+ KnownZero.setLowBits(ShiftAmt);
+ } else {
+ KnownUndef = KnownUndef.lshr(ShiftAmt);
+ KnownZero = KnownZero.lshr(ShiftAmt);
+ KnownZero.setHighBits(ShiftAmt);
+ }
+ break;
+ }
case X86ISD::CVTSI2P:
case X86ISD::CVTUI2P: {
SDValue Src = Op.getOperand(0);
diff --git a/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll b/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
index 7abb3a043a6..bf5ba184fc0 100644
--- a/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
+++ b/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
@@ -153,9 +153,7 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
; AVX256VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,12,13,u,u,8,9,6,7,14,15,14,15,0,1,22,23,28,29,18,19,26,27,22,23,u,u,30,31,16,17]
; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm2 {%k1} {z}
; AVX256VL-NEXT: vpmovdw %ymm2, %xmm2
-; AVX256VL-NEXT: kshiftrw $8, %k1, %k1
-; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm3 {%k1} {z}
-; AVX256VL-NEXT: vpmovdw %ymm3, %xmm3
+; AVX256VL-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX256VL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX256VL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,1,2,1]
; AVX256VL-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255]
OpenPOWER on IntegriCloud