summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp26
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp36
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td29
-rw-r--r--llvm/lib/Target/X86/X86InstrFoldTables.cpp6
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td57
5 files changed, 82 insertions, 72 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a961685ff71..093778add49 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1571,36 +1571,12 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
KnownSrcBits, TLO, Depth + 1))
return true;
- } else if ((NumSrcEltBits % BitWidth) == 0 &&
- TLO.DAG.getDataLayout().isLittleEndian()) {
- unsigned Scale = NumSrcEltBits / BitWidth;
- unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i]) {
- unsigned Offset = (i % Scale) * BitWidth;
- DemandedSrcBits.insertBits(DemandedBits, Offset);
- DemandedSrcElts.setBit(i / Scale);
- }
-
- if (SrcVT.isVector()) {
- APInt KnownSrcUndef, KnownSrcZero;
- if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
- KnownSrcZero, TLO, Depth + 1))
- return true;
- }
-
- KnownBits KnownSrcBits;
- if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
- KnownSrcBits, TLO, Depth + 1))
- return true;
}
// If this is a bitcast, let computeKnownBits handle it. Only do this on a
// recursive call where Known may be useful to the caller.
if (Depth > 0) {
- Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ Known = TLO.DAG.computeKnownBits(Op, Depth);
return false;
}
break;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 409fbfa22f3..994e912ac9c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3202,44 +3202,30 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
- auto *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!RHS)
+ if (N->getValueType(0) != MVT::i64)
return SDValue();
- EVT VT = N->getValueType(0);
- SDValue LHS = N->getOperand(0);
- unsigned ShiftAmt = RHS->getZExtValue();
- SelectionDAG &DAG = DCI.DAG;
- SDLoc SL(N);
-
- // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1)
- // this improves the ability to match BFE patterns in isel.
- if (LHS.getOpcode() == ISD::AND) {
- if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) {
- if (Mask->getAPIntValue().isShiftedMask() &&
- Mask->getAPIntValue().countTrailingZeros() == ShiftAmt) {
- return DAG.getNode(
- ISD::AND, SL, VT,
- DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)),
- DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1)));
- }
- }
- }
-
- if (VT != MVT::i64)
+ const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!RHS)
return SDValue();
+ unsigned ShiftAmt = RHS->getZExtValue();
if (ShiftAmt < 32)
return SDValue();
// srl i64:x, C for C >= 32
// =>
// build_pair (srl hi_32(x), C - 32), 0
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+
SDValue One = DAG.getConstant(1, SL, MVT::i32);
SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
- SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, LHS);
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecOp, One);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
+ VecOp, One);
SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index d0d255b6a7f..0165949a7e3 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -3832,6 +3832,14 @@ def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
"vmovq\t{$src, $dst|$dst, $src}", []>,
EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
let isCodeGenOnly = 1 in {
+def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set FR64X:$dst, (bitconvert GR64:$src))]>,
+ EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
+def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
+ EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64X:$src))]>,
@@ -3844,6 +3852,20 @@ def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$
}
} // ExeDomain = SSEPackedInt
+// Move Int Doubleword to Single Scalar
+//
+let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
+def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
+ "vmovd\t{$src, $dst|$dst, $src}",
+ [(set FR32X:$dst, (bitconvert GR32:$src))]>,
+ EVEX, Sched<[WriteVecMoveFromGpr]>;
+
+def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
+ "vmovd\t{$src, $dst|$dst, $src}",
+ [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>,
+ EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
+} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
+
// Move doubleword from xmm register to r/m32
//
let ExeDomain = SSEPackedInt in {
@@ -3860,13 +3882,6 @@ def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
} // ExeDomain = SSEPackedInt
-let Predicates = [HasAVX512] in {
- def : Pat<(f64 (bitconvert GR64:$src)),
- (COPY_TO_REGCLASS (VMOV64toPQIZrr GR64:$src), FR64X)>;
- def : Pat<(f32 (bitconvert GR32:$src)),
- (COPY_TO_REGCLASS (VMOVDI2PDIZrr GR32:$src), FR32X)>;
-}
-
// Move quadword from xmm1 register to r/m64
//
let ExeDomain = SSEPackedInt in {
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 82adcd8e147..59e62da55f2 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -531,11 +531,13 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::MOV32rr, X86::MOV32rm, 0 },
{ X86::MOV64rr, X86::MOV64rm, 0 },
{ X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
+ { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
{ X86::MOV8rr, X86::MOV8rm, 0 },
{ X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
{ X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
{ X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE },
{ X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
+ { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
{ X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
{ X86::MOVDQUrr, X86::MOVDQUrm, 0 },
{ X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 },
@@ -816,6 +818,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VGETMANTPSZrri, X86::VGETMANTPSZrmi, 0 },
{ X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
{ X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
+ { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 },
+ { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
{ X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
{ X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
{ X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
@@ -833,6 +837,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE },
{ X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 },
{ X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
+ { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
+ { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
{ X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
{ X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
{ X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 },
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index feee34b7644..6a2e5cf1aa2 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -4109,6 +4109,11 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
def VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}", []>,
VEX, Sched<[WriteVecLoad]>;
+let isCodeGenOnly = 1 in
+def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>,
+ VEX, Sched<[WriteVecMoveFromGpr]>;
def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -4129,9 +4134,38 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}", []>,
Sched<[WriteVecLoad]>;
+let isCodeGenOnly = 1 in
+def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>,
+ Sched<[WriteVecMoveFromGpr]>;
} // ExeDomain = SSEPackedInt
//===---------------------------------------------------------------------===//
+// Move Int Doubleword to Single Scalar
+//
+let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
+ def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))]>,
+ VEX, Sched<[WriteVecMoveFromGpr]>;
+
+ def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
+ VEX, Sched<[WriteVecLoad]>;
+ def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))]>,
+ Sched<[WriteVecMoveFromGpr]>;
+
+ def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
+ Sched<[WriteVecLoad]>;
+} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
+
+//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int to Packed Double Int
//
let ExeDomain = SSEPackedInt in {
@@ -4158,21 +4192,6 @@ def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
Sched<[WriteVecStore]>;
} // ExeDomain = SSEPackedInt
-let Predicates = [UseAVX] in {
- def : Pat<(f64 (bitconvert GR64:$src)),
- (COPY_TO_REGCLASS (VMOV64toPQIrr GR64:$src), FR64)>;
- def : Pat<(f32 (bitconvert GR32:$src)),
- (COPY_TO_REGCLASS (VMOVDI2PDIrr GR32:$src), FR32)>;
-}
-
-let Predicates = [UseSSE2] in
-def : Pat<(f64 (bitconvert GR64:$src)),
- (COPY_TO_REGCLASS (MOV64toPQIrr GR64:$src), FR64)>;
-
-let Predicates = [UseSSE1] in
-def : Pat<(f32 (bitconvert GR32:$src)),
- (COPY_TO_REGCLASS (MOVDI2PDIrr GR32:$src), FR32)>;
-
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int first element to Doubleword Int
//
@@ -4206,6 +4225,10 @@ def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
//
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
let Predicates = [UseAVX] in
+ def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
+ VEX, Sched<[WriteVecLoad]>;
def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))]>,
@@ -4215,6 +4238,10 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
[(store (i64 (bitconvert FR64:$src)), addr:$dst)]>,
VEX, Sched<[WriteVecStore]>;
+ def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
+ Sched<[WriteVecLoad]>;
def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))]>,
OpenPOWER on IntegriCloud