diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 115 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrMMX.td | 29 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 35 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_zero_cse.ll | 35 |
5 files changed, 128 insertions, 97 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index d43ac19e3f3..73cecdb967e 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -842,20 +842,15 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred, // Also handle the case where we explicitly require zeros in the top // elements. This is a vector shuffle from the zero vector. if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() && - N.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && + // Check to see if the top elements are all zeros (or bitcast of zeros). + ISD::isBuildVectorAllZeros(N.getOperand(0).Val) && N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR && N.getOperand(1).Val->hasOneUse() && ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) && N.getOperand(1).getOperand(0).hasOneUse()) { - // Check to see if the BUILD_VECTOR is building a zero vector. - SDOperand BV = N.getOperand(0); - for (unsigned i = 0, e = BV.getNumOperands(); i != e; ++i) - if (!isZeroNode(BV.getOperand(i)) && - BV.getOperand(i).getOpcode() != ISD::UNDEF) - return false; // Not a zero/undef vector. // Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something // from the LHS. - unsigned VecWidth = BV.getNumOperands(); + unsigned VecWidth=MVT::getVectorNumElements(N.getOperand(0).getValueType()); SDOperand ShufMask = N.getOperand(2); assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!"); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d3c89f6de96..3fcae95e459 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2728,7 +2728,7 @@ static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { return true; } -/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as +/// CommuteVectorShuffle - Swap vector_shuffle operands as well as /// values in ther permute mask. static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, SDOperand &V2, SDOperand &Mask, @@ -2867,23 +2867,24 @@ static bool isZeroShuffle(SDNode *N) { unsigned NumElems = Mask.getNumOperands(); for (unsigned i = 0; i != NumElems; ++i) { SDOperand Arg = Mask.getOperand(i); - if (Arg.getOpcode() != ISD::UNDEF) { - unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); - if (Idx < NumElems) { - unsigned Opc = V1.Val->getOpcode(); - if (Opc == ISD::UNDEF) - continue; - if (Opc != ISD::BUILD_VECTOR || - !isZeroNode(V1.Val->getOperand(Idx))) - return false; - } else if (Idx >= NumElems) { - unsigned Opc = V2.Val->getOpcode(); - if (Opc == ISD::UNDEF) - continue; - if (Opc != ISD::BUILD_VECTOR || - !isZeroNode(V2.Val->getOperand(Idx - NumElems))) - return false; - } + if (Arg.getOpcode() == ISD::UNDEF) + continue; + + unsigned Idx = cast<ConstantSDNode>(Arg)->getValue(); + if (Idx < NumElems) { + unsigned Opc = V1.Val->getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.Val)) + continue; + if (Opc != ISD::BUILD_VECTOR || + !isZeroNode(V1.Val->getOperand(Idx))) + return false; + } else if (Idx >= NumElems) { + unsigned Opc = V2.Val->getOpcode(); + if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.Val)) + continue; + if (Opc != ISD::BUILD_VECTOR || + !isZeroNode(V2.Val->getOperand(Idx - NumElems))) + return false; } } return true; @@ -2893,14 +2894,35 @@ static bool isZeroShuffle(SDNode *N) { /// static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { assert(MVT::isVector(VT) && "Expected a vector type"); - unsigned NumElems = MVT::getVectorNumElements(VT); - MVT::ValueType EVT = MVT::getVectorElementType(VT); - bool isFP = MVT::isFloatingPoint(EVT); - SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); - SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero); - return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); + + // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest + // type. This ensures they get CSE'd. + SDOperand Cst = DAG.getTargetConstant(0, MVT::i32); + SDOperand Vec; + if (MVT::getSizeInBits(VT) == 64) // MMX + Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); + else // SSE + Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); + return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); +} + +/// getOnesVector - Returns a vector of specified type with all bits set. +/// +static SDOperand getOnesVector(MVT::ValueType VT, SelectionDAG &DAG) { + assert(MVT::isVector(VT) && "Expected a vector type"); + + // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest + // type. This ensures they get CSE'd. + SDOperand Cst = DAG.getTargetConstant(~0U, MVT::i32); + SDOperand Vec; + if (MVT::getSizeInBits(VT) == 64) // MMX + Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); + else // SSE + Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst); + return DAG.getNode(ISD::BIT_CONVERT, VT, Vec); } + /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { @@ -2981,24 +3003,28 @@ static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { } V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); - MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); - Mask = getZeroVector(MaskVT, DAG); + Mask = getZeroVector(MVT::v4i32, DAG); SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); } /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified -/// vector of zero or undef vector. +/// vector of zero or undef vector. This produces a shuffle where the low +/// element of V2 is swizzled into the zero/undef vector, landing at element +/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, unsigned NumElems, unsigned Idx, bool isZero, SelectionDAG &DAG) { SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); - SDOperand Zero = DAG.getConstant(0, EVT); - SmallVector<SDOperand, 8> MaskVec(NumElems, Zero); - MaskVec[Idx] = DAG.getConstant(NumElems, EVT); + SmallVector<SDOperand, 16> MaskVec; + for (unsigned i = 0; i != NumElems; ++i) + if (i == Idx) // If this is the insertion idx, put the low elt of V2 here. + MaskVec.push_back(DAG.getConstant(NumElems, EVT)); + else + MaskVec.push_back(DAG.getConstant(i, EVT)); SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); @@ -3078,13 +3104,18 @@ static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, SDOperand X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { - // All zero's are handled with pxor. - if (ISD::isBuildVectorAllZeros(Op.Val)) - return Op; + // All zero's are handled with pxor, all one's are handled with pcmpeqd. + if (ISD::isBuildVectorAllZeros(Op.Val) || ISD::isBuildVectorAllOnes(Op.Val)) { + // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to + // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are + // eliminated on x86-32 hosts. + if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) + return Op; - // All one's are handled with pcmpeqd. - if (ISD::isBuildVectorAllOnes(Op.Val)) - return Op; + if (ISD::isBuildVectorAllOnes(Op.Val)) + return getOnesVector(Op.getValueType(), DAG); + return getZeroVector(Op.getValueType(), DAG); + } MVT::ValueType VT = Op.getValueType(); MVT::ValueType EVT = MVT::getVectorElementType(VT); @@ -3113,12 +3144,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { } if (NumNonZero == 0) { - if (NumZero == 0) - // All undef vector. Return an UNDEF. - return DAG.getNode(ISD::UNDEF, VT); - else - // A mix of zero and undef. Return a zero vector. - return getZeroVector(VT, DAG); + // All undef vector. Return an UNDEF. All zero vectors were handled above. + return DAG.getNode(ISD::UNDEF, VT); } // Splat is obviously ok. Let legalizer expand it to a shuffle. @@ -3299,8 +3326,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); bool Commuted = false; + // FIXME: This should also accept a bitcast of a splat? Be careful, not + // 1,1,1,1 -> v8i16 though. V1IsSplat = isSplatVector(V1.Val); V2IsSplat = isSplatVector(V2.Val); + + // Canonicalize the splat or undef, if present, to be on the RHS. if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); std::swap(V1IsSplat, V2IsSplat); diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index b7024bc8766..c892c342334 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -486,14 +486,13 @@ def MMX_MASKMOVQ : MMXI<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask), //===----------------------------------------------------------------------===// // Alias instructions that map zero vector to pxor. -// FIXME: remove when we can teach regalloc that xor reg, reg is ok. let isReMaterializable = 1 in { def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "pxor\t$dst, $dst", - [(set VR64:$dst, (v1i64 immAllZerosV))]>; + [(set VR64:$dst, (v2i32 immAllZerosV))]>; def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), "pcmpeqd\t$dst, $dst", - [(set VR64:$dst, (v1i64 immAllOnesV))]>; + [(set VR64:$dst, (v2i32 immAllOnesV))]>; } //===----------------------------------------------------------------------===// @@ -510,18 +509,6 @@ def : Pat<(store (v2i32 VR64:$src), addr:$dst), def : Pat<(store (v1i64 VR64:$src), addr:$dst), (MMX_MOVQ64mr addr:$dst, VR64:$src)>; -// 64-bit vector all zero's. -def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>; -def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>; -def : Pat<(v2i32 immAllZerosV), (MMX_V_SET0)>; -def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>; - -// 64-bit vector all one's. -def : Pat<(v8i8 immAllOnesV), (MMX_V_SETALLONES)>; -def : Pat<(v4i16 immAllOnesV), (MMX_V_SETALLONES)>; -def : Pat<(v2i32 immAllOnesV), (MMX_V_SETALLONES)>; -def : Pat<(v1i64 immAllOnesV), (MMX_V_SETALLONES)>; - // Bit convert. def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>; def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>; @@ -551,10 +538,10 @@ def MMX_X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>; // Move scalar to XMM zero-extended // movd to XMM register zero-extends let AddedComplexity = 15 in { - def : Pat<(v8i8 (vector_shuffle immAllZerosV, + def : Pat<(v8i8 (vector_shuffle immAllZerosV_bc, (v8i8 (MMX_X86s2vec GR32:$src)), MMX_MOVL_shuffle_mask)), (MMX_MOVZDI2PDIrr GR32:$src)>; - def : Pat<(v4i16 (vector_shuffle immAllZerosV, + def : Pat<(v4i16 (vector_shuffle immAllZerosV_bc, (v4i16 (MMX_X86s2vec GR32:$src)), MMX_MOVL_shuffle_mask)), (MMX_MOVZDI2PDIrr GR32:$src)>; def : Pat<(v2i32 (vector_shuffle immAllZerosV, @@ -606,19 +593,19 @@ let AddedComplexity = 20 in { def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), VR64:$src2)), (MMX_PANDNrr VR64:$src1, VR64:$src2)>; -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV))), +def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))), VR64:$src2)), (MMX_PANDNrr VR64:$src1, VR64:$src2)>; -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV))), +def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))), VR64:$src2)), (MMX_PANDNrr VR64:$src1, VR64:$src2)>; def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), (load addr:$src2))), (MMX_PANDNrm VR64:$src1, addr:$src2)>; -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV))), +def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))), (load addr:$src2))), (MMX_PANDNrm VR64:$src1, addr:$src2)>; -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV))), +def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))), (load addr:$src2))), (MMX_PANDNrm VR64:$src1, addr:$src2)>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 2c86e8d1c33..da23ccbaa09 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -939,11 +939,10 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>; // Alias instructions that map zero vector to pxor / xorp* for sse. -// FIXME: remove when we can teach regalloc that xor reg, reg is ok. let isReMaterializable = 1 in def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "xorps\t$dst, $dst", - [(set VR128:$dst, (v4f32 immAllZerosV))]>; + [(set VR128:$dst, (v4i32 immAllZerosV))]>; // FR32 to 128-bit vector conversion. def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src), @@ -991,7 +990,7 @@ let isTwoAddress = 1 in { let AddedComplexity = 20 in def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src), "movss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV, + [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV_bc, (v4f32 (scalar_to_vector (loadf32 addr:$src))), MOVL_shuffle_mask)))]>; @@ -2119,11 +2118,10 @@ def MFENCE : I<0xAE, MRM6m, (outs), (ins), // Alias instructions that map zero vector to pxor / xorp* for sse. -// FIXME: remove when we can teach regalloc that xor reg, reg is ok. let isReMaterializable = 1 in def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "pcmpeqd\t$dst, $dst", - [(set VR128:$dst, (v2f64 immAllOnesV))]>; + [(set VR128:$dst, (v4i32 immAllOnesV))]>; // FR64 to 128-bit vector conversion. def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src), @@ -2220,7 +2218,7 @@ let AddedComplexity = 20 in def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "movsd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (vector_shuffle immAllZerosV, + (v2f64 (vector_shuffle immAllZerosV_bc, (v2f64 (scalar_to_vector (loadf64 addr:$src))), MOVL_shuffle_mask)))]>; @@ -2692,21 +2690,6 @@ def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>; def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>; def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>; -// 128-bit vector all zero's. -def : Pat<(v16i8 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>; -def : Pat<(v8i16 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>; -def : Pat<(v2f64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>; - -// 128-bit vector all one's. -def : Pat<(v16i8 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>; -def : Pat<(v8i16 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>; -def : Pat<(v4f32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE1]>; - - // Scalar to v8i16 / v16i8. The source may be a GR32, but only the lower 8 or // 16-bits matter. def : Pat<(v8i16 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>, @@ -2751,17 +2734,17 @@ let Predicates = [HasSSE2] in { // Move scalar to XMM zero-extended // movd to XMM register zero-extends let AddedComplexity = 15 in { -def : Pat<(v8i16 (vector_shuffle immAllZerosV, +def : Pat<(v8i16 (vector_shuffle immAllZerosV_bc, (v8i16 (X86s2vec GR32:$src)), MOVL_shuffle_mask)), (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>; -def : Pat<(v16i8 (vector_shuffle immAllZerosV, +def : Pat<(v16i8 (vector_shuffle immAllZerosV_bc, (v16i8 (X86s2vec GR32:$src)), MOVL_shuffle_mask)), (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>; // Zeroing a VR128 then do a MOVS{S|D} to the lower bits. -def : Pat<(v2f64 (vector_shuffle immAllZerosV, +def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, (v2f64 (scalar_to_vector FR64:$src)), MOVL_shuffle_mask)), (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>; -def : Pat<(v4f32 (vector_shuffle immAllZerosV, +def : Pat<(v4f32 (vector_shuffle immAllZerosV_bc, (v4f32 (scalar_to_vector FR32:$src)), MOVL_shuffle_mask)), (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>; } @@ -2911,7 +2894,7 @@ def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, // Set lowest element and zero upper elements. let AddedComplexity = 20 in -def : Pat<(bc_v2i64 (vector_shuffle immAllZerosV, +def : Pat<(bc_v2i64 (vector_shuffle immAllZerosV_bc, (v2f64 (scalar_to_vector (loadf64 addr:$src))), MOVL_shuffle_mask)), (MOVZQI2PQIrm addr:$src)>, Requires<[HasSSE2]>; diff --git a/llvm/test/CodeGen/X86/vec_zero_cse.ll b/llvm/test/CodeGen/X86/vec_zero_cse.ll new file mode 100644 index 00000000000..b882bad1aff --- /dev/null +++ b/llvm/test/CodeGen/X86/vec_zero_cse.ll @@ -0,0 +1,35 @@ +; RUN: llvm-as < %s | llc -relocation-model=static -mcpu=yonah | grep pxor | count 1 +; RUN: llvm-as < %s | llc -relocation-model=static -mcpu=yonah | grep xorps | count 1 +; RUN: llvm-as < %s | llc -relocation-model=static -mcpu=yonah | grep pcmpeqd | count 2 + +@M1 = external global <1 x i64> +@M2 = external global <2 x i32> + +@S1 = external global <2 x i64> +@S2 = external global <4 x i32> + +define void @test() { + store <1 x i64> zeroinitializer, <1 x i64>* @M1 + store <2 x i32> zeroinitializer, <2 x i32>* @M2 + ret void +} + +define void @test2() { + store <1 x i64> < i64 -1 >, <1 x i64>* @M1 + store <2 x i32> < i32 -1, i32 -1 >, <2 x i32>* @M2 + ret void +} + +define void @test3() { + store <2 x i64> zeroinitializer, <2 x i64>* @S1 + store <4 x i32> zeroinitializer, <4 x i32>* @S2 + ret void +} + +define void @test4() { + store <2 x i64> < i64 -1, i64 -1>, <2 x i64>* @S1 + store <4 x i32> < i32 -1, i32 -1, i32 -1, i32 -1 >, <4 x i32>* @S2 + ret void +} + + |