diff options
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 36 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 29 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFoldTables.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 57 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/bitcast-setcc-256.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/bitcast-setcc-512.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/bitcast-vector-bool.ll | 97 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/dagcombine-cse.ll | 13 | ||||
-rwxr-xr-x | llvm/test/CodeGen/X86/evex-to-vex-compress.mir | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fast-isel-fneg.ll | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/masked_store.ll | 82 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/movmsk-cmp.ll | 112 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/peephole.mir | 40 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr41619.ll | 27 |
16 files changed, 374 insertions, 208 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a961685ff71..093778add49 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1571,36 +1571,12 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, KnownSrcBits, TLO, Depth + 1)) return true; - } else if ((NumSrcEltBits % BitWidth) == 0 && - TLO.DAG.getDataLayout().isLittleEndian()) { - unsigned Scale = NumSrcEltBits / BitWidth; - unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); - for (unsigned i = 0; i != NumElts; ++i) - if (DemandedElts[i]) { - unsigned Offset = (i % Scale) * BitWidth; - DemandedSrcBits.insertBits(DemandedBits, Offset); - DemandedSrcElts.setBit(i / Scale); - } - - if (SrcVT.isVector()) { - APInt KnownSrcUndef, KnownSrcZero; - if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef, - KnownSrcZero, TLO, Depth + 1)) - return true; - } - - KnownBits KnownSrcBits; - if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, - KnownSrcBits, TLO, Depth + 1)) - return true; } // If this is a bitcast, let computeKnownBits handle it. Only do this on a // recursive call where Known may be useful to the caller. if (Depth > 0) { - Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + Known = TLO.DAG.computeKnownBits(Op, Depth); return false; } break; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 409fbfa22f3..994e912ac9c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3202,44 +3202,30 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N, SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const { - auto *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); - if (!RHS) + if (N->getValueType(0) != MVT::i64) return SDValue(); - EVT VT = N->getValueType(0); - SDValue LHS = N->getOperand(0); - unsigned ShiftAmt = RHS->getZExtValue(); - SelectionDAG &DAG = DCI.DAG; - SDLoc SL(N); - - // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1) - // this improves the ability to match BFE patterns in isel. - if (LHS.getOpcode() == ISD::AND) { - if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) { - if (Mask->getAPIntValue().isShiftedMask() && - Mask->getAPIntValue().countTrailingZeros() == ShiftAmt) { - return DAG.getNode( - ISD::AND, SL, VT, - DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)), - DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1))); - } - } - } - - if (VT != MVT::i64) + const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!RHS) return SDValue(); + unsigned ShiftAmt = RHS->getZExtValue(); if (ShiftAmt < 32) return SDValue(); // srl i64:x, C for C >= 32 // => // build_pair (srl hi_32(x), C - 32), 0 + + SelectionDAG &DAG = DCI.DAG; + SDLoc SL(N); + SDValue One = DAG.getConstant(1, SL, MVT::i32); SDValue Zero = DAG.getConstant(0, SL, MVT::i32); - SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, LHS); - SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecOp, One); + SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, N->getOperand(0)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, + VecOp, One); SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32); SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index d0d255b6a7f..0165949a7e3 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3832,6 +3832,14 @@ def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), "vmovq\t{$src, $dst|$dst, $src}", []>, EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; let isCodeGenOnly = 1 in { +def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set FR64X:$dst, (bitconvert GR64:$src))]>, + EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; +def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>, + EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64X:$src))]>, @@ -3844,6 +3852,20 @@ def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$ } } // ExeDomain = SSEPackedInt +// Move Int Doubleword to Single Scalar +// +let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { +def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert GR32:$src))]>, + EVEX, Sched<[WriteVecMoveFromGpr]>; + +def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>, + EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; +} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 + // Move doubleword from xmm register to r/m32 // let ExeDomain = SSEPackedInt in { @@ -3860,13 +3882,6 @@ def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; } // ExeDomain = SSEPackedInt -let Predicates = [HasAVX512] in { - def : Pat<(f64 (bitconvert GR64:$src)), - (COPY_TO_REGCLASS (VMOV64toPQIZrr GR64:$src), FR64X)>; - def : Pat<(f32 (bitconvert GR32:$src)), - (COPY_TO_REGCLASS (VMOVDI2PDIZrr GR32:$src), FR32X)>; -} - // Move quadword from xmm1 register to r/m64 // let ExeDomain = SSEPackedInt in { diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp index 82adcd8e147..59e62da55f2 100644 --- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp +++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp @@ -531,11 +531,13 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = { { X86::MOV32rr, X86::MOV32rm, 0 }, { X86::MOV64rr, X86::MOV64rm, 0 }, { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, + { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, { X86::MOV8rr, X86::MOV8rm, 0 }, { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 }, { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 }, { X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE }, { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, + { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 }, { X86::MOVDQUrr, X86::MOVDQUrm, 0 }, { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 }, @@ -816,6 +818,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = { { X86::VGETMANTPSZrri, X86::VGETMANTPSZrmi, 0 }, { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, + { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 }, + { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 }, { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 }, { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 }, { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 }, @@ -833,6 +837,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = { { X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE }, { X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 }, { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 }, + { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, + { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 }, { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 }, { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 }, { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 }, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index feee34b7644..6a2e5cf1aa2 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4109,6 +4109,11 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", []>, VEX, Sched<[WriteVecLoad]>; +let isCodeGenOnly = 1 in +def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert GR64:$src))]>, + VEX, Sched<[WriteVecMoveFromGpr]>; def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", @@ -4129,9 +4134,38 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", []>, Sched<[WriteVecLoad]>; +let isCodeGenOnly = 1 in +def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert GR64:$src))]>, + Sched<[WriteVecMoveFromGpr]>; } // ExeDomain = SSEPackedInt //===---------------------------------------------------------------------===// +// Move Int Doubleword to Single Scalar +// +let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { + def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert GR32:$src))]>, + VEX, Sched<[WriteVecMoveFromGpr]>; + + def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>, + VEX, Sched<[WriteVecLoad]>; + def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert GR32:$src))]>, + Sched<[WriteVecMoveFromGpr]>; + + def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>, + Sched<[WriteVecLoad]>; +} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 + +//===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int // let ExeDomain = SSEPackedInt in { @@ -4158,21 +4192,6 @@ def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), Sched<[WriteVecStore]>; } // ExeDomain = SSEPackedInt -let Predicates = [UseAVX] in { - def : Pat<(f64 (bitconvert GR64:$src)), - (COPY_TO_REGCLASS (VMOV64toPQIrr GR64:$src), FR64)>; - def : Pat<(f32 (bitconvert GR32:$src)), - (COPY_TO_REGCLASS (VMOVDI2PDIrr GR32:$src), FR32)>; -} - -let Predicates = [UseSSE2] in -def : Pat<(f64 (bitconvert GR64:$src)), - (COPY_TO_REGCLASS (MOV64toPQIrr GR64:$src), FR64)>; - -let Predicates = [UseSSE1] in -def : Pat<(f32 (bitconvert GR32:$src)), - (COPY_TO_REGCLASS (MOVDI2PDIrr GR32:$src), FR32)>; - //===---------------------------------------------------------------------===// // Move Packed Doubleword Int first element to Doubleword Int // @@ -4206,6 +4225,10 @@ def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), // let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { let Predicates = [UseAVX] in + def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, + VEX, Sched<[WriteVecLoad]>; def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))]>, @@ -4215,6 +4238,10 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>, VEX, Sched<[WriteVecStore]>; + def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, + Sched<[WriteVecLoad]>; def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))]>, diff --git a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll index f5857a330fa..13380e03e32 100644 --- a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll +++ b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll @@ -86,8 +86,8 @@ define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: ds_write_b16 v1, v2 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 7 -; GFX9-NEXT: ds_write_b8 v1, v0 offset:6 +; GFX9-NEXT: v_and_b32_e32 v0, 0x7f0000, v0 +; GFX9-NEXT: ds_write_b8_d16_hi v1, v0 offset:6 ; GFX9-NEXT: ds_write_b32 v1, v3 ; GFX9-NEXT: s_endpgm store i55 %arg, i55 addrspace(3)* %ptr, align 8 diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-256.ll b/llvm/test/CodeGen/X86/bitcast-setcc-256.ll index f9a233a583b..41635f37528 100644 --- a/llvm/test/CodeGen/X86/bitcast-setcc-256.ll +++ b/llvm/test/CodeGen/X86/bitcast-setcc-256.ll @@ -448,6 +448,22 @@ define void @bitcast_8i32_store(i8* %p, <8 x i32> %a0) { define void @bitcast_4i64_store(i4* %p, <4 x i64> %a0) { ; SSE2-SSSE3-LABEL: bitcast_4i64_store: ; SSE2-SSSE3: # %bb.0: +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3 +; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm3 +; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm4 +; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 +; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] +; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1 +; SSE2-SSSE3-NEXT: por %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3 +; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 +; SSE2-SSSE3-NEXT: por %xmm2, %xmm0 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax ; SSE2-SSSE3-NEXT: movb %al, (%rdi) diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll index 177be1fd6a6..3c294345dd5 100644 --- a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll +++ b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll @@ -609,13 +609,15 @@ define void @bitcast_8i64_store(i8* %p, <8 x i64> %a0) { ; ; AVX1-LABEL: bitcast_8i64_store: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: movb %al, (%rdi) diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll index adcee2abe33..ed487ef8266 100644 --- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll +++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll @@ -63,12 +63,12 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind { ; AVX12: # %bb.0: ; AVX12-NEXT: vmovmskps %xmm0, %eax ; AVX12-NEXT: movl %eax, %ecx -; AVX12-NEXT: shrl $2, %ecx -; AVX12-NEXT: vmovd %ecx, %xmm0 -; AVX12-NEXT: andl $3, %eax -; AVX12-NEXT: vmovd %eax, %xmm1 -; AVX12-NEXT: vpextrb $0, %xmm1, %ecx -; AVX12-NEXT: vpextrb $0, %xmm0, %eax +; AVX12-NEXT: andl $3, %ecx +; AVX12-NEXT: vmovq %rcx, %xmm0 +; AVX12-NEXT: shrl $2, %eax +; AVX12-NEXT: vmovq %rax, %xmm1 +; AVX12-NEXT: vpextrb $0, %xmm0, %ecx +; AVX12-NEXT: vpextrb $0, %xmm1, %eax ; AVX12-NEXT: addb %cl, %al ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: retq @@ -81,9 +81,10 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind { ; AVX512-NEXT: movzbl %al, %ecx ; AVX512-NEXT: shrl $2, %ecx ; AVX512-NEXT: andl $3, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm0 +; AVX512-NEXT: vmovq %rcx, %xmm0 +; AVX512-NEXT: movzwl %ax, %eax ; AVX512-NEXT: andl $3, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 +; AVX512-NEXT: vmovq %rax, %xmm1 ; AVX512-NEXT: vpextrb $0, %xmm1, %ecx ; AVX512-NEXT: vpextrb $0, %xmm0, %eax ; AVX512-NEXT: addb %cl, %al @@ -119,9 +120,9 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind { ; AVX12-NEXT: vpmovmskb %xmm0, %eax ; AVX12-NEXT: movzbl %al, %ecx ; AVX12-NEXT: shrl $4, %ecx -; AVX12-NEXT: vmovd %ecx, %xmm0 +; AVX12-NEXT: vmovq %rcx, %xmm0 ; AVX12-NEXT: andl $15, %eax -; AVX12-NEXT: vmovd %eax, %xmm1 +; AVX12-NEXT: vmovq %rax, %xmm1 ; AVX12-NEXT: vpextrb $0, %xmm1, %ecx ; AVX12-NEXT: vpextrb $0, %xmm0, %eax ; AVX12-NEXT: addb %cl, %al @@ -134,9 +135,10 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind { ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: movzbl %al, %ecx ; AVX512-NEXT: shrl $4, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm0 +; AVX512-NEXT: vmovq %rcx, %xmm0 +; AVX512-NEXT: movzwl %ax, %eax ; AVX512-NEXT: andl $15, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 +; AVX512-NEXT: vmovq %rax, %xmm1 ; AVX512-NEXT: vpextrb $0, %xmm1, %ecx ; AVX512-NEXT: vpextrb $0, %xmm0, %eax ; AVX512-NEXT: addb %cl, %al @@ -208,6 +210,22 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind { define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind { ; SSE2-SSSE3-LABEL: bitcast_v4i64_to_v2i2: ; SSE2-SSSE3: # %bb.0: +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3 +; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm3 +; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm4 +; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 +; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] +; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1 +; SSE2-SSSE3-NEXT: por %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3 +; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 +; SSE2-SSSE3-NEXT: por %xmm2, %xmm0 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax ; SSE2-SSSE3-NEXT: movl %eax, %ecx @@ -225,12 +243,12 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind { ; AVX12: # %bb.0: ; AVX12-NEXT: vmovmskpd %ymm0, %eax ; AVX12-NEXT: movl %eax, %ecx -; AVX12-NEXT: shrl $2, %ecx -; AVX12-NEXT: vmovd %ecx, %xmm0 -; AVX12-NEXT: andl $3, %eax -; AVX12-NEXT: vmovd %eax, %xmm1 -; AVX12-NEXT: vpextrb $0, %xmm1, %ecx -; AVX12-NEXT: vpextrb $0, %xmm0, %eax +; AVX12-NEXT: andl $3, %ecx +; AVX12-NEXT: vmovq %rcx, %xmm0 +; AVX12-NEXT: shrl $2, %eax +; AVX12-NEXT: vmovq %rax, %xmm1 +; AVX12-NEXT: vpextrb $0, %xmm0, %ecx +; AVX12-NEXT: vpextrb $0, %xmm1, %eax ; AVX12-NEXT: addb %cl, %al ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper @@ -244,9 +262,10 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind { ; AVX512-NEXT: movzbl %al, %ecx ; AVX512-NEXT: shrl $2, %ecx ; AVX512-NEXT: andl $3, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm0 +; AVX512-NEXT: vmovq %rcx, %xmm0 +; AVX512-NEXT: movzwl %ax, %eax ; AVX512-NEXT: andl $3, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 +; AVX512-NEXT: vmovq %rax, %xmm1 ; AVX512-NEXT: vpextrb $0, %xmm1, %ecx ; AVX512-NEXT: vpextrb $0, %xmm0, %eax ; AVX512-NEXT: addb %cl, %al @@ -282,12 +301,12 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind { ; AVX12: # %bb.0: ; AVX12-NEXT: vmovmskps %ymm0, %eax ; AVX12-NEXT: movl %eax, %ecx -; AVX12-NEXT: shrl $4, %ecx -; AVX12-NEXT: vmovd %ecx, %xmm0 -; AVX12-NEXT: andl $15, %eax -; AVX12-NEXT: vmovd %eax, %xmm1 -; AVX12-NEXT: vpextrb $0, %xmm1, %ecx -; AVX12-NEXT: vpextrb $0, %xmm0, %eax +; AVX12-NEXT: andl $15, %ecx +; AVX12-NEXT: vmovq %rcx, %xmm0 +; AVX12-NEXT: shrl $4, %eax +; AVX12-NEXT: vmovq %rax, %xmm1 +; AVX12-NEXT: vpextrb $0, %xmm0, %ecx +; AVX12-NEXT: vpextrb $0, %xmm1, %eax ; AVX12-NEXT: addb %cl, %al ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper @@ -300,9 +319,10 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind { ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: movzbl %al, %ecx ; AVX512-NEXT: shrl $4, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm0 +; AVX512-NEXT: vmovq %rcx, %xmm0 +; AVX512-NEXT: movzwl %ax, %eax ; AVX512-NEXT: andl $15, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 +; AVX512-NEXT: vmovq %rax, %xmm1 ; AVX512-NEXT: vpextrb $0, %xmm1, %ecx ; AVX512-NEXT: vpextrb $0, %xmm0, %eax ; AVX512-NEXT: addb %cl, %al @@ -516,20 +536,22 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { ; ; AVX1-LABEL: bitcast_v8i64_to_v2i4: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: movl %eax, %ecx ; AVX1-NEXT: shrl $4, %ecx -; AVX1-NEXT: vmovd %ecx, %xmm0 +; AVX1-NEXT: vmovq %rcx, %xmm0 ; AVX1-NEXT: andl $15, %eax -; AVX1-NEXT: vmovd %eax, %xmm1 +; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vpextrb $0, %xmm1, %ecx ; AVX1-NEXT: vpextrb $0, %xmm0, %eax ; AVX1-NEXT: addb %cl, %al @@ -547,9 +569,9 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { ; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: movl %eax, %ecx ; AVX2-NEXT: shrl $4, %ecx -; AVX2-NEXT: vmovd %ecx, %xmm0 +; AVX2-NEXT: vmovq %rcx, %xmm0 ; AVX2-NEXT: andl $15, %eax -; AVX2-NEXT: vmovd %eax, %xmm1 +; AVX2-NEXT: vmovq %rax, %xmm1 ; AVX2-NEXT: vpextrb $0, %xmm1, %ecx ; AVX2-NEXT: vpextrb $0, %xmm0, %eax ; AVX2-NEXT: addb %cl, %al @@ -564,9 +586,10 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: movzbl %al, %ecx ; AVX512-NEXT: shrl $4, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm0 +; AVX512-NEXT: vmovq %rcx, %xmm0 +; AVX512-NEXT: movzwl %ax, %eax ; AVX512-NEXT: andl $15, %eax -; AVX512-NEXT: vmovd %eax, %xmm1 +; AVX512-NEXT: vmovq %rax, %xmm1 ; AVX512-NEXT: vpextrb $0, %xmm1, %ecx ; AVX512-NEXT: vpextrb $0, %xmm0, %eax ; AVX512-NEXT: addb %cl, %al diff --git a/llvm/test/CodeGen/X86/dagcombine-cse.ll b/llvm/test/CodeGen/X86/dagcombine-cse.ll index a532d87170d..bf1dab35875 100644 --- a/llvm/test/CodeGen/X86/dagcombine-cse.ll +++ b/llvm/test/CodeGen/X86/dagcombine-cse.ll @@ -14,11 +14,18 @@ define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) n ; ; X64-LABEL: t: ; X64: ## %bb.0: ## %entry +; X64-NEXT: ## kill: def $edx killed $edx def $rdx +; X64-NEXT: ## kill: def $esi killed $esi def $rsi ; X64-NEXT: imull %ecx, %esi -; X64-NEXT: addl %edx, %esi -; X64-NEXT: movslq %esi, %rax +; X64-NEXT: leal (%rsi,%rdx), %eax +; X64-NEXT: cltq ; X64-NEXT: movl (%rdi,%rax), %eax -; X64-NEXT: movq %rax, %xmm0 +; X64-NEXT: leal 4(%rsi,%rdx), %ecx +; X64-NEXT: movslq %ecx, %rcx +; X64-NEXT: movzwl (%rdi,%rcx), %ecx +; X64-NEXT: shlq $32, %rcx +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movq %rcx, %xmm0 ; X64-NEXT: movd %xmm0, %eax ; X64-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir index f1397383e89..9937ca08aaf 100755 --- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir +++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir @@ -2216,6 +2216,12 @@ body: | $edi = VCVTTSS2SIZrr $xmm0 ; CHECK: $edi = VCVTTSS2SIrr_Int $xmm0 $edi = VCVTTSS2SIZrr_Int $xmm0 + ; CHECK: $xmm0 = VMOV64toSDrr $rdi + $xmm0 = VMOV64toSDZrr $rdi + ; CHECK: $xmm0 = VMOVDI2SSrm $rip, $noreg, $noreg, $noreg, $noreg + $xmm0 = VMOVDI2SSZrm $rip, $noreg, $noreg, $noreg, $noreg + ; CHECK: $xmm0 = VMOVDI2SSrr $eax + $xmm0 = VMOVDI2SSZrr $eax ; CHECK: VMOVSDmr $rdi, $xmm0, $noreg, $noreg, $noreg, $noreg VMOVSDZmr $rdi, $xmm0, $noreg, $noreg, $noreg, $noreg ; CHECK: $xmm0 = VMOVSDrm $rip, $noreg, $noreg, $noreg, $noreg @@ -2244,6 +2250,8 @@ body: | $xmm0 = VMOV64toPQIZrr $rdi ; CHECK: $xmm0 = VMOV64toPQIrm $rdi, $noreg, $noreg, $noreg, $noreg $xmm0 = VMOV64toPQIZrm $rdi, $noreg, $noreg, $noreg, $noreg + ; CHECK: $xmm0 = VMOV64toSDrr $rdi + $xmm0 = VMOV64toSDZrr $rdi ; CHECK: $xmm0 = VMOVDI2PDIrm $rip, $noreg, $noreg, $noreg, $noreg $xmm0 = VMOVDI2PDIZrm $rip, $noreg, $noreg, $noreg, $noreg ; CHECK: $xmm0 = VMOVDI2PDIrr $edi @@ -4528,6 +4536,12 @@ body: | $edi = VCVTTSS2SIZrr $xmm16 ; CHECK: $edi = VCVTTSS2SIZrr_Int $xmm16 $edi = VCVTTSS2SIZrr_Int $xmm16 + ; CHECK: $xmm16 = VMOV64toSDZrr $rdi + $xmm16 = VMOV64toSDZrr $rdi + ; CHECK: $xmm16 = VMOVDI2SSZrm $rip, $noreg, $noreg, $noreg, $noreg + $xmm16 = VMOVDI2SSZrm $rip, $noreg, $noreg, $noreg, $noreg + ; CHECK: $xmm16 = VMOVDI2SSZrr $eax + $xmm16 = VMOVDI2SSZrr $eax ; CHECK: VMOVSDZmr $rdi, $xmm16, $noreg, $noreg, $noreg, $noreg VMOVSDZmr $rdi, $xmm16, $noreg, $noreg, $noreg, $noreg ; CHECK: $xmm16 = VMOVSDZrm $rip, $noreg, $noreg, $noreg, $noreg @@ -4556,6 +4570,8 @@ body: | $xmm16 = VMOV64toPQIZrr $rdi ; CHECK: $xmm16 = VMOV64toPQIZrm $rdi, $noreg, $noreg, $noreg, $noreg $xmm16 = VMOV64toPQIZrm $rdi, $noreg, $noreg, $noreg, $noreg + ; CHECK: $xmm16 = VMOV64toSDZrr $rdi + $xmm16 = VMOV64toSDZrr $rdi ; CHECK: $xmm16 = VMOVDI2PDIZrm $rip, $noreg, $noreg, $noreg, $noreg $xmm16 = VMOVDI2PDIZrm $rip, $noreg, $noreg, $noreg, $noreg ; CHECK: $xmm16 = VMOVDI2PDIZrr $edi diff --git a/llvm/test/CodeGen/X86/fast-isel-fneg.ll b/llvm/test/CodeGen/X86/fast-isel-fneg.ll index 302c6b5411f..0c2ce6df0a4 100644 --- a/llvm/test/CodeGen/X86/fast-isel-fneg.ll +++ b/llvm/test/CodeGen/X86/fast-isel-fneg.ll @@ -5,9 +5,10 @@ define double @doo(double %x) nounwind { ; CHECK-LABEL: doo: ; CHECK: ## %bb.0: -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: subsd %xmm0, %xmm1 -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movq %xmm0, %rax +; CHECK-NEXT: movabsq $-9223372036854775808, %rcx ## imm = 0x8000000000000000 +; CHECK-NEXT: xorq %rax, %rcx +; CHECK-NEXT: movq %rcx, %xmm0 ; CHECK-NEXT: retq ; ; SSE2-LABEL: doo: @@ -30,9 +31,9 @@ define double @doo(double %x) nounwind { define float @foo(float %x) nounwind { ; CHECK-LABEL: foo: ; CHECK: ## %bb.0: -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: subss %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: xorl $2147483648, %eax ## imm = 0x80000000 +; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: retq ; ; SSE2-LABEL: foo: diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll index efbb1ef8cc6..fa70edbf121 100644 --- a/llvm/test/CodeGen/X86/masked_store.ll +++ b/llvm/test/CodeGen/X86/masked_store.ll @@ -37,21 +37,25 @@ define void @store_v1f64_v1i64(<1 x i64> %trigger, <1 x double>* %addr, <1 x dou define void @store_v2f64_v2i64(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %val) { ; SSE2-LABEL: store_v2f64_v2i64: ; SSE2: ## %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 -; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm3, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] -; SSE2-NEXT: por %xmm2, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] +; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm3, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: pand %xmm2, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] +; SSE2-NEXT: por %xmm3, %xmm4 +; SSE2-NEXT: movd %xmm4, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: je LBB1_2 ; SSE2-NEXT: ## %bb.1: ## %cond.store ; SSE2-NEXT: movlpd %xmm1, (%rdi) ; SSE2-NEXT: LBB1_2: ## %else +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,2,2] +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 ; SSE2-NEXT: pextrw $4, %xmm0, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: je LBB1_4 @@ -120,16 +124,20 @@ define void @store_v4f64_v4i64(<4 x i64> %trigger, <4 x double>* %addr, <4 x dou ; SSE2-NEXT: movdqa %xmm4, %xmm5 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm5 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm5, %xmm6 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3] -; SSE2-NEXT: por %xmm6, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: movdqa %xmm0, %xmm7 +; SSE2-NEXT: pand %xmm5, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3] +; SSE2-NEXT: por %xmm6, %xmm7 +; SSE2-NEXT: movd %xmm7, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: je LBB2_2 ; SSE2-NEXT: ## %bb.1: ## %cond.store ; SSE2-NEXT: movlpd %xmm2, (%rdi) ; SSE2-NEXT: LBB2_2: ## %else +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,0,2,2] +; SSE2-NEXT: pand %xmm5, %xmm0 +; SSE2-NEXT: por %xmm6, %xmm0 ; SSE2-NEXT: pextrw $4, %xmm0, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: je LBB2_4 @@ -139,9 +147,10 @@ define void @store_v4f64_v4i64(<4 x i64> %trigger, <4 x double>* %addr, <4 x dou ; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: movdqa %xmm4, %xmm0 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE2-NEXT: pand %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pextrw $0, %xmm0, %eax @@ -893,21 +902,25 @@ define void @store_v16f32_v16i32(<16 x float> %x, <16 x float>* %ptr, <16 x floa define void @store_v2i64_v2i64(<2 x i64> %trigger, <2 x i64>* %addr, <2 x i64> %val) { ; SSE2-LABEL: store_v2i64_v2i64: ; SSE2: ## %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 -; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm3, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] -; SSE2-NEXT: por %xmm2, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] +; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm3, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: pand %xmm2, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] +; SSE2-NEXT: por %xmm3, %xmm4 +; SSE2-NEXT: movd %xmm4, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: je LBB7_2 ; SSE2-NEXT: ## %bb.1: ## %cond.store ; SSE2-NEXT: movq %xmm1, (%rdi) ; SSE2-NEXT: LBB7_2: ## %else +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,2,2] +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 ; SSE2-NEXT: pextrw $4, %xmm0, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: je LBB7_4 @@ -982,16 +995,20 @@ define void @store_v4i64_v4i64(<4 x i64> %trigger, <4 x i64>* %addr, <4 x i64> % ; SSE2-NEXT: movdqa %xmm4, %xmm5 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm5 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm5, %xmm6 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3] -; SSE2-NEXT: por %xmm6, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: movdqa %xmm0, %xmm7 +; SSE2-NEXT: pand %xmm5, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3] +; SSE2-NEXT: por %xmm6, %xmm7 +; SSE2-NEXT: movd %xmm7, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: je LBB8_2 ; SSE2-NEXT: ## %bb.1: ## %cond.store ; SSE2-NEXT: movq %xmm2, (%rdi) ; SSE2-NEXT: LBB8_2: ## %else +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,0,2,2] +; SSE2-NEXT: pand %xmm5, %xmm0 +; SSE2-NEXT: por %xmm6, %xmm0 ; SSE2-NEXT: pextrw $4, %xmm0, %eax ; SSE2-NEXT: testb $1, %al ; SSE2-NEXT: je LBB8_4 @@ -1002,9 +1019,10 @@ define void @store_v4i64_v4i64(<4 x i64> %trigger, <4 x i64>* %addr, <4 x i64> % ; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: movdqa %xmm4, %xmm0 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE2-NEXT: pand %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pextrw $0, %xmm0, %eax diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index c8b3488af16..2e6123ff014 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -929,6 +929,22 @@ define i1 @allzeros_v16i32_sign(<16 x i32> %arg) { define i1 @allones_v4i64_sign(<4 x i64> %arg) { ; SSE2-LABEL: allones_v4i64_sign: ; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: por %xmm4, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: movmskps %xmm0, %eax ; SSE2-NEXT: cmpb $15, %al @@ -973,6 +989,22 @@ define i1 @allones_v4i64_sign(<4 x i64> %arg) { define i1 @allzeros_v4i64_sign(<4 x i64> %arg) { ; SSE2-LABEL: allzeros_v4i64_sign: ; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: por %xmm4, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: movmskps %xmm0, %eax ; SSE2-NEXT: testb %al, %al @@ -1063,13 +1095,15 @@ define i1 @allones_v8i64_sign(<8 x i64> %arg) { ; ; AVX1-LABEL: allones_v8i64_sign: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: cmpb $-1, %al @@ -1164,13 +1198,15 @@ define i1 @allzeros_v8i64_sign(<8 x i64> %arg) { ; ; AVX1-LABEL: allzeros_v8i64_sign: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: testb %al, %al @@ -2503,17 +2539,19 @@ define i1 @allones_v8i64_and1(<8 x i64> %arg) { ; ; AVX1-LABEL: allones_v8i64_and1: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2 -; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: cmpb $-1, %al @@ -2577,17 +2615,19 @@ define i1 @allzeros_v8i64_and1(<8 x i64> %arg) { ; ; AVX1-LABEL: allzeros_v8i64_and1: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2 -; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: testb %al, %al @@ -3922,17 +3962,19 @@ define i1 @allones_v8i64_and4(<8 x i64> %arg) { ; ; AVX1-LABEL: allones_v8i64_and4: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2 -; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: cmpb $-1, %al @@ -3996,17 +4038,19 @@ define i1 @allzeros_v8i64_and4(<8 x i64> %arg) { ; ; AVX1-LABEL: allzeros_v8i64_and4: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2 -; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: testb %al, %al @@ -4126,6 +4170,22 @@ define i32 @movmskps(<4 x float> %x) { define i32 @movmskpd256(<4 x double> %x) { ; SSE2-LABEL: movmskpd256: ; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: por %xmm4, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: movmskps %xmm0, %eax ; SSE2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/peephole.mir b/llvm/test/CodeGen/X86/peephole.mir new file mode 100644 index 00000000000..28ce9f1f0e8 --- /dev/null +++ b/llvm/test/CodeGen/X86/peephole.mir @@ -0,0 +1,40 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=peephole-opt %s -o - | FileCheck %s +--- | + define void @func() { ret void } +... +--- +# Check that instructions with MI.isBitcast() are only replaced by COPY if there +# are no SUBREG_TO_REG users. +# CHECK-LABEL: name: func +name: func +registers: + - { id: 0, class: gr32 } + - { id: 1, class: fr32 } + - { id: 2, class: gr32 } + + - { id: 3, class: gr32 } + - { id: 4, class: fr32 } + - { id: 5, class: gr32 } + - { id: 6, class: gr64 } + +body: | + bb.0: + ; CHECK: %1:fr32 = VMOVDI2SSrr %0 + ; CHECK: %7:gr32 = COPY %0 + ; CHECK: NOOP implicit %7 + %0 = MOV32ri 42 + %1 = VMOVDI2SSrr %0 + %2 = MOVSS2DIrr %1 + NOOP implicit %2 + + ; CHECK: %4:fr32 = VMOVDI2SSrr %3 + ; CHECK-NOT: COPY + ; CHECK: %5:gr32 = MOVSS2DIrr %4 + ; CHECK: %6:gr64 = SUBREG_TO_REG %5, 0 + ; CHECK: NOOP implicit %6 + %3 = MOV32ri 42 + %4 = VMOVDI2SSrr %3 + %5 = MOVSS2DIrr %4 + %6 = SUBREG_TO_REG %5, 0, %subreg.sub_32bit + NOOP implicit %6 +... diff --git a/llvm/test/CodeGen/X86/pr41619.ll b/llvm/test/CodeGen/X86/pr41619.ll deleted file mode 100644 index 7c71f2c1c29..00000000000 --- a/llvm/test/CodeGen/X86/pr41619.ll +++ /dev/null @@ -1,27 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.14.0 -mattr=avx2 | FileCheck %s - -define void @foo(double %arg) { -; CHECK-LABEL: foo: -; CHECK: ## %bb.0: ## %bb -; CHECK-NEXT: vmovq %xmm0, %rax -; CHECK-NEXT: vmovd %eax, %xmm0 -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vmovq %xmm0, %rax -; CHECK-NEXT: movl %eax, (%rax) -; CHECK-NEXT: vmovlps %xmm1, (%rax) -; CHECK-NEXT: retq -bb: - %tmp = bitcast double %arg to i64 - %tmp1 = trunc i64 %tmp to i32 - %tmp2 = bitcast i32 %tmp1 to float - %tmp3 = insertelement <4 x float> zeroinitializer, float %tmp2, i32 2 - %tmp4 = bitcast <4 x float> %tmp3 to <2 x double> - %tmp5 = extractelement <2 x double> %tmp4, i32 0 - %tmp6 = extractelement <2 x double> %tmp4, i32 1 - %tmp7 = bitcast double %tmp6 to i64 - %tmp8 = trunc i64 %tmp7 to i32 - store i32 %tmp8, i32* undef, align 4 - store double %tmp5, double* undef, align 16 - ret void -} |