diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 36 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 29 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFoldTables.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 57 |
5 files changed, 82 insertions, 72 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a961685ff71..093778add49 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1571,36 +1571,12 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, KnownSrcBits, TLO, Depth + 1)) return true; - } else if ((NumSrcEltBits % BitWidth) == 0 && - TLO.DAG.getDataLayout().isLittleEndian()) { - unsigned Scale = NumSrcEltBits / BitWidth; - unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); - for (unsigned i = 0; i != NumElts; ++i) - if (DemandedElts[i]) { - unsigned Offset = (i % Scale) * BitWidth; - DemandedSrcBits.insertBits(DemandedBits, Offset); - DemandedSrcElts.setBit(i / Scale); - } - - if (SrcVT.isVector()) { - APInt KnownSrcUndef, KnownSrcZero; - if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef, - KnownSrcZero, TLO, Depth + 1)) - return true; - } - - KnownBits KnownSrcBits; - if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, - KnownSrcBits, TLO, Depth + 1)) - return true; } // If this is a bitcast, let computeKnownBits handle it. Only do this on a // recursive call where Known may be useful to the caller. if (Depth > 0) { - Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + Known = TLO.DAG.computeKnownBits(Op, Depth); return false; } break; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 409fbfa22f3..994e912ac9c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3202,44 +3202,30 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N, SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const { - auto *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); - if (!RHS) + if (N->getValueType(0) != MVT::i64) return SDValue(); - EVT VT = N->getValueType(0); - SDValue LHS = N->getOperand(0); - unsigned ShiftAmt = RHS->getZExtValue(); - SelectionDAG &DAG = DCI.DAG; - SDLoc SL(N); - - // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1) - // this improves the ability to match BFE patterns in isel. - if (LHS.getOpcode() == ISD::AND) { - if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) { - if (Mask->getAPIntValue().isShiftedMask() && - Mask->getAPIntValue().countTrailingZeros() == ShiftAmt) { - return DAG.getNode( - ISD::AND, SL, VT, - DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)), - DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1))); - } - } - } - - if (VT != MVT::i64) + const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!RHS) return SDValue(); + unsigned ShiftAmt = RHS->getZExtValue(); if (ShiftAmt < 32) return SDValue(); // srl i64:x, C for C >= 32 // => // build_pair (srl hi_32(x), C - 32), 0 + + SelectionDAG &DAG = DCI.DAG; + SDLoc SL(N); + SDValue One = DAG.getConstant(1, SL, MVT::i32); SDValue Zero = DAG.getConstant(0, SL, MVT::i32); - SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, LHS); - SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecOp, One); + SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, N->getOperand(0)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, + VecOp, One); SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32); SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index d0d255b6a7f..0165949a7e3 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3832,6 +3832,14 @@ def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), "vmovq\t{$src, $dst|$dst, $src}", []>, EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; let isCodeGenOnly = 1 in { +def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set FR64X:$dst, (bitconvert GR64:$src))]>, + EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; +def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>, + EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64X:$src))]>, @@ -3844,6 +3852,20 @@ def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$ } } // ExeDomain = SSEPackedInt +// Move Int Doubleword to Single Scalar +// +let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { +def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert GR32:$src))]>, + EVEX, Sched<[WriteVecMoveFromGpr]>; + +def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>, + EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; +} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 + // Move doubleword from xmm register to r/m32 // let ExeDomain = SSEPackedInt in { @@ -3860,13 +3882,6 @@ def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; } // ExeDomain = SSEPackedInt -let Predicates = [HasAVX512] in { - def : Pat<(f64 (bitconvert GR64:$src)), - (COPY_TO_REGCLASS (VMOV64toPQIZrr GR64:$src), FR64X)>; - def : Pat<(f32 (bitconvert GR32:$src)), - (COPY_TO_REGCLASS (VMOVDI2PDIZrr GR32:$src), FR32X)>; -} - // Move quadword from xmm1 register to r/m64 // let ExeDomain = SSEPackedInt in { diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp index 82adcd8e147..59e62da55f2 100644 --- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp +++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp @@ -531,11 +531,13 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = { { X86::MOV32rr, X86::MOV32rm, 0 }, { X86::MOV64rr, X86::MOV64rm, 0 }, { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, + { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, { X86::MOV8rr, X86::MOV8rm, 0 }, { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 }, { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 }, { X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE }, { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, + { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 }, { X86::MOVDQUrr, X86::MOVDQUrm, 0 }, { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 }, @@ -816,6 +818,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = { { X86::VGETMANTPSZrri, X86::VGETMANTPSZrmi, 0 }, { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, + { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 }, + { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 }, { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 }, { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 }, { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 }, @@ -833,6 +837,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = { { X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE }, { X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 }, { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 }, + { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, + { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 }, { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 }, { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 }, { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 }, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index feee34b7644..6a2e5cf1aa2 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4109,6 +4109,11 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", []>, VEX, Sched<[WriteVecLoad]>; +let isCodeGenOnly = 1 in +def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert GR64:$src))]>, + VEX, Sched<[WriteVecMoveFromGpr]>; def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", @@ -4129,9 +4134,38 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", []>, Sched<[WriteVecLoad]>; +let isCodeGenOnly = 1 in +def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert GR64:$src))]>, + Sched<[WriteVecMoveFromGpr]>; } // ExeDomain = SSEPackedInt //===---------------------------------------------------------------------===// +// Move Int Doubleword to Single Scalar +// +let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { + def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert GR32:$src))]>, + VEX, Sched<[WriteVecMoveFromGpr]>; + + def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>, + VEX, Sched<[WriteVecLoad]>; + def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert GR32:$src))]>, + Sched<[WriteVecMoveFromGpr]>; + + def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>, + Sched<[WriteVecLoad]>; +} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 + +//===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int // let ExeDomain = SSEPackedInt in { @@ -4158,21 +4192,6 @@ def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), Sched<[WriteVecStore]>; } // ExeDomain = SSEPackedInt -let Predicates = [UseAVX] in { - def : Pat<(f64 (bitconvert GR64:$src)), - (COPY_TO_REGCLASS (VMOV64toPQIrr GR64:$src), FR64)>; - def : Pat<(f32 (bitconvert GR32:$src)), - (COPY_TO_REGCLASS (VMOVDI2PDIrr GR32:$src), FR32)>; -} - -let Predicates = [UseSSE2] in -def : Pat<(f64 (bitconvert GR64:$src)), - (COPY_TO_REGCLASS (MOV64toPQIrr GR64:$src), FR64)>; - -let Predicates = [UseSSE1] in -def : Pat<(f32 (bitconvert GR32:$src)), - (COPY_TO_REGCLASS (MOVDI2PDIrr GR32:$src), FR32)>; - //===---------------------------------------------------------------------===// // Move Packed Doubleword Int first element to Doubleword Int // @@ -4206,6 +4225,10 @@ def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), // let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { let Predicates = [UseAVX] in + def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, + VEX, Sched<[WriteVecLoad]>; def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))]>, @@ -4215,6 +4238,10 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>, VEX, Sched<[WriteVecStore]>; + def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, + Sched<[WriteVecLoad]>; def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))]>, |