summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGNodes.h3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp132
-rw-r--r--llvm/lib/Target/ARM/ARMInstrNEON.td6
-rw-r--r--llvm/test/CodeGen/ARM/vshll.ll2
-rw-r--r--llvm/test/CodeGen/X86/combine-shl.ll11
-rw-r--r--llvm/test/CodeGen/X86/not-and-simplify.ll4
-rw-r--r--llvm/test/CodeGen/X86/sse2-vector-shifts.ll1
-rw-r--r--llvm/test/CodeGen/X86/vector-blend.ll4
-rw-r--r--llvm/test/CodeGen/X86/vector-rotate-128.ll8
-rw-r--r--llvm/test/CodeGen/X86/vector-rotate-256.ll6
-rw-r--r--llvm/test/CodeGen/X86/widen_cast-4.ll2
11 files changed, 91 insertions, 88 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 32d3b787e11..02ea4eacfe8 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1434,6 +1434,9 @@ public:
const APInt &getAPIntValue() const { return Value->getValue(); }
uint64_t getZExtValue() const { return Value->getZExtValue(); }
int64_t getSExtValue() const { return Value->getSExtValue(); }
+ uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX) {
+ return Value->getLimitedValue(Limit);
+ }
bool isOne() const { return Value->isOne(); }
bool isNullValue() const { return Value->isZero(); }
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4004d69c580..f6d14a8546c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -779,33 +779,38 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
}
case ISD::SHL:
- if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- unsigned ShAmt = SA->getZExtValue();
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
SDValue InOp = Op.getOperand(0);
// If the shift count is an invalid immediate, don't do anything.
- if (ShAmt >= BitWidth)
+ if (SA->getAPIntValue().uge(BitWidth))
break;
+ unsigned ShAmt = SA->getZExtValue();
+
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
- if (InOp.getOpcode() == ISD::SRL &&
- isa<ConstantSDNode>(InOp.getOperand(1))) {
- if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
- unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
- unsigned Opc = ISD::SHL;
- int Diff = ShAmt-C1;
- if (Diff < 0) {
- Diff = -Diff;
- Opc = ISD::SRL;
- }
+ if (InOp.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ if (SA2->getAPIntValue().ult(BitWidth)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
+ }
- SDValue NewSA =
- TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- EVT VT = Op.getValueType();
- return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
- InOp.getOperand(0), NewSA));
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0),
+ NewSA));
+ }
+ }
}
}
@@ -817,7 +822,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
SDValue InnerOp = InOp.getOperand(0);
EVT InnerVT = InnerOp.getValueType();
- unsigned InnerBits = InnerVT.getSizeInBits();
+ unsigned InnerBits = InnerVT.getScalarSizeInBits();
if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT, DL);
@@ -836,45 +841,42 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
// aren't demanded (as above) and that the shifted upper c1 bits of
// x aren't demanded.
- if (InOp.hasOneUse() &&
- InnerOp.getOpcode() == ISD::SRL &&
- InnerOp.hasOneUse() &&
- isa<ConstantSDNode>(InnerOp.getOperand(1))) {
- unsigned InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1))
- ->getZExtValue();
- if (InnerShAmt < ShAmt &&
- InnerShAmt < InnerBits &&
- NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
- NewMask.countTrailingZeros() >= ShAmt) {
- SDValue NewSA =
- TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
- Op.getOperand(1).getValueType());
- EVT VT = Op.getValueType();
- SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
- InnerOp.getOperand(0));
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
- NewExt, NewSA));
+ if (InOp.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
+ InnerOp.hasOneUse()) {
+ if (ConstantSDNode *SA2 = isConstOrConstSplat(InnerOp.getOperand(1))) {
+ unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
+ if (InnerShAmt < ShAmt &&
+ InnerShAmt < InnerBits &&
+ NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
+ NewMask.countTrailingZeros() >= ShAmt) {
+ SDValue NewSA =
+ TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
+ Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
+ InnerOp.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
+ NewExt, NewSA));
+ }
}
}
}
- Known.Zero <<= SA->getZExtValue();
- Known.One <<= SA->getZExtValue();
+ Known.Zero <<= ShAmt;
+ Known.One <<= ShAmt;
// low bits known zero.
- Known.Zero.setLowBits(SA->getZExtValue());
+ Known.Zero.setLowBits(ShAmt);
}
break;
case ISD::SRL:
- if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- EVT VT = Op.getValueType();
- unsigned ShAmt = SA->getZExtValue();
- unsigned VTSize = VT.getSizeInBits();
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
SDValue InOp = Op.getOperand(0);
// If the shift count is an invalid immediate, don't do anything.
- if (ShAmt >= BitWidth)
+ if (SA->getAPIntValue().uge(BitWidth))
break;
+ unsigned ShAmt = SA->getZExtValue();
APInt InDemandedMask = (NewMask << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
@@ -885,21 +887,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
- if (InOp.getOpcode() == ISD::SHL &&
- isa<ConstantSDNode>(InOp.getOperand(1))) {
- if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
- unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
- unsigned Opc = ISD::SRL;
- int Diff = ShAmt-C1;
- if (Diff < 0) {
- Diff = -Diff;
- Opc = ISD::SHL;
- }
+ if (InOp.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
+ if (ShAmt &&
+ (NewMask & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+ if (SA2->getAPIntValue().ult(BitWidth)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
+ }
- SDValue NewSA =
- TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
- InOp.getOperand(0), NewSA));
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0),
+ NewSA));
+ }
+ }
}
}
@@ -923,14 +931,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1)));
- if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
EVT VT = Op.getValueType();
- unsigned ShAmt = SA->getZExtValue();
// If the shift count is an invalid immediate, don't do anything.
- if (ShAmt >= BitWidth)
+ if (SA->getAPIntValue().uge(BitWidth))
break;
+ unsigned ShAmt = SA->getZExtValue();
APInt InDemandedMask = (NewMask << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 86c90efce2c..495d44f96b8 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -5529,6 +5529,12 @@ def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))),
(VSHLLi16 DPR:$Rn, 16)>;
def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))),
(VSHLLi32 DPR:$Rn, 32)>;
+def : Pat<(v8i16 (NEONvshl (anyext (v8i8 DPR:$Rn)), (i32 8))),
+ (VSHLLi8 DPR:$Rn, 8)>;
+def : Pat<(v4i32 (NEONvshl (anyext (v4i16 DPR:$Rn)), (i32 16))),
+ (VSHLLi16 DPR:$Rn, 16)>;
+def : Pat<(v2i64 (NEONvshl (anyext (v2i32 DPR:$Rn)), (i32 32))),
+ (VSHLLi32 DPR:$Rn, 32)>;
// VSHRN : Vector Shift Right and Narrow
defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
diff --git a/llvm/test/CodeGen/ARM/vshll.ll b/llvm/test/CodeGen/ARM/vshll.ll
index a8230134d91..61de4fa9db8 100644
--- a/llvm/test/CodeGen/ARM/vshll.ll
+++ b/llvm/test/CodeGen/ARM/vshll.ll
@@ -97,7 +97,7 @@ define <8 x i16> @vshllu8_bad(<8 x i8>* %A) nounwind {
define <4 x i32> @vshlls16_bad(<4 x i16>* %A) nounwind {
; CHECK-LABEL: vshlls16_bad:
-; CHECK: vmovl.s16
+; CHECK: vmovl.u16
; CHECK: vshl.i32
%tmp1 = load <4 x i16>, <4 x i16>* %A
%sext = sext <4 x i16> %tmp1 to <4 x i32>
diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll
index fbddd3c7532..0d130dc0ee8 100644
--- a/llvm/test/CodeGen/X86/combine-shl.ll
+++ b/llvm/test/CodeGen/X86/combine-shl.ll
@@ -193,17 +193,16 @@ define <4 x i32> @combine_vec_shl_shl_zero1(<4 x i32> %x) {
define <8 x i32> @combine_vec_shl_ext_shl0(<8 x i16> %x) {
; SSE-LABEL: combine_vec_shl_ext_shl0:
; SSE: # BB#0:
-; SSE-NEXT: pmovsxwd %xmm0, %xmm2
-; SSE-NEXT: pslld $20, %xmm2
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE-NEXT: pmovsxwd %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE-NEXT: pslld $20, %xmm1
-; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: pslld $20, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_shl_ext_shl0:
; AVX: # BB#0:
-; AVX-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX-NEXT: vpslld $20, %ymm0, %ymm0
; AVX-NEXT: retq
%1 = shl <8 x i16> %x, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
diff --git a/llvm/test/CodeGen/X86/not-and-simplify.ll b/llvm/test/CodeGen/X86/not-and-simplify.ll
index 87aa10a6e29..8ecc859bead 100644
--- a/llvm/test/CodeGen/X86/not-and-simplify.ll
+++ b/llvm/test/CodeGen/X86/not-and-simplify.ll
@@ -47,9 +47,7 @@ define i8 @shrink_xor_constant2(i8 %x) {
define <16 x i8> @shrink_xor_constant2_splat(<16 x i8> %x) {
; ALL-LABEL: shrink_xor_constant2_splat:
; ALL: # BB#0:
-; ALL-NEXT: psllw $5, %xmm0
-; ALL-NEXT: pand {{.*}}(%rip), %xmm0
-; ALL-NEXT: pandn {{.*}}(%rip), %xmm0
+; ALL-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; ALL-NEXT: retq
%sh = shl <16 x i8> %x, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
%not = xor <16 x i8> %sh, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
diff --git a/llvm/test/CodeGen/X86/sse2-vector-shifts.ll b/llvm/test/CodeGen/X86/sse2-vector-shifts.ll
index d1c7adb6263..c2bb239639a 100644
--- a/llvm/test/CodeGen/X86/sse2-vector-shifts.ll
+++ b/llvm/test/CodeGen/X86/sse2-vector-shifts.ll
@@ -336,7 +336,6 @@ define <4 x i16> @sra_trunc_srl_v4i32(<4 x i32> %x) nounwind {
define <4 x i32> @shl_zext_shl_v4i32(<4 x i16> %x) nounwind {
; CHECK-LABEL: shl_zext_shl_v4i32:
; CHECK: # BB#0:
-; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
; CHECK-NEXT: pslld $19, %xmm0
; CHECK-NEXT: retq
%shl0 = shl <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
diff --git a/llvm/test/CodeGen/X86/vector-blend.ll b/llvm/test/CodeGen/X86/vector-blend.ll
index 3eff8813726..03aebed9274 100644
--- a/llvm/test/CodeGen/X86/vector-blend.ll
+++ b/llvm/test/CodeGen/X86/vector-blend.ll
@@ -985,17 +985,15 @@ define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) {
; SSE41-LABEL: blend_neg_logic_v4i32_2:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pxor %xmm3, %xmm3
; SSE41-NEXT: psubd %xmm2, %xmm3
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3
; SSE41-NEXT: movaps %xmm3, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: blend_neg_logic_v4i32_2:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vpsrad $31, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm2
; AVX-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index 5fafecae23d..2d407290acc 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -1559,13 +1559,8 @@ define <2 x i64> @splatconstant_rotate_mask_v2i64(<2 x i64> %a) nounwind {
;
; X32-SSE-LABEL: splatconstant_rotate_mask_v2i64:
; X32-SSE: # BB#0:
-; X32-SSE-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE-NEXT: psllq $15, %xmm1
; X32-SSE-NEXT: psrlq $49, %xmm0
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1
-; X32-SSE-NEXT: por %xmm0, %xmm1
-; X32-SSE-NEXT: movdqa %xmm1, %xmm0
; X32-SSE-NEXT: retl
%shl = shl <2 x i64> %a, <i64 15, i64 15>
%lshr = lshr <2 x i64> %a, <i64 49, i64 49>
@@ -1581,7 +1576,6 @@ define <4 x i32> @splatconstant_rotate_mask_v4i32(<4 x i32> %a) nounwind {
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: pslld $4, %xmm1
; SSE-NEXT: psrld $28, %xmm0
-; SSE-NEXT: pand {{.*}}(%rip), %xmm0
; SSE-NEXT: pand {{.*}}(%rip), %xmm1
; SSE-NEXT: por %xmm0, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm0
@@ -1591,7 +1585,6 @@ define <4 x i32> @splatconstant_rotate_mask_v4i32(<4 x i32> %a) nounwind {
; AVX: # BB#0:
; AVX-NEXT: vpslld $4, %xmm0, %xmm1
; AVX-NEXT: vpsrld $28, %xmm0, %xmm0
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
@@ -1621,7 +1614,6 @@ define <4 x i32> @splatconstant_rotate_mask_v4i32(<4 x i32> %a) nounwind {
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
; X32-SSE-NEXT: pslld $4, %xmm1
; X32-SSE-NEXT: psrld $28, %xmm0
-; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1
; X32-SSE-NEXT: por %xmm0, %xmm1
; X32-SSE-NEXT: movdqa %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index 104c77084d9..3f67ea65b00 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -997,10 +997,10 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind {
define <4 x i64> @splatconstant_rotate_mask_v4i64(<4 x i64> %a) nounwind {
; AVX1-LABEL: splatconstant_rotate_mask_v4i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsrlq $49, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpsrlq $49, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlq $49, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/widen_cast-4.ll b/llvm/test/CodeGen/X86/widen_cast-4.ll
index cc6fb27a629..5c352124725 100644
--- a/llvm/test/CodeGen/X86/widen_cast-4.ll
+++ b/llvm/test/CodeGen/X86/widen_cast-4.ll
@@ -26,7 +26,7 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
; NARROW-NEXT: psubw %xmm0, %xmm2
; NARROW-NEXT: psllw $8, %xmm2
; NARROW-NEXT: psraw $8, %xmm2
-; NARROW-NEXT: psraw $2, %xmm2
+; NARROW-NEXT: psrlw $2, %xmm2
; NARROW-NEXT: pshufb %xmm1, %xmm2
; NARROW-NEXT: movq %xmm2, (%edx,%eax,8)
; NARROW-NEXT: incl (%esp)
OpenPOWER on IntegriCloud