summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-07-01 19:01:37 +0000
committerCraig Topper <craig.topper@intel.com>2019-07-01 19:01:37 +0000
commit5e7815b695d4a21f021f23ed41750a44e9eb3849 (patch)
treec51623980e785bd3c68f74b8a8bcfe87cf414486 /llvm/lib
parentb101c39f587731f0b36f264158e2ad8c7bbf5860 (diff)
downloadbcm5719-llvm-5e7815b695d4a21f021f23ed41750a44e9eb3849.tar.gz
bcm5719-llvm-5e7815b695d4a21f021f23ed41750a44e9eb3849.zip
[X86] Correct v4f32->v2i64 cvt(t)ps2(u)qq memory isel patterns
These instructions only read 64-bits of memory so we shouldn't allow a full vector width load to be pattern matched in case it is marked volatile. Instead allow vzload or scalar_to_vector+load. Also add a DAG combine to turn full vector loads into vzload when used by one of these instructions if the load isn't volatile. This fixes another case for PR42079 llvm-svn: 364838
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp39
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td56
2 files changed, 93 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 71bab13c427..a47a6669849 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41144,6 +41144,41 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+
+ // Convert a full vector load into vzload when not all bits are needed.
+ SDValue In = N->getOperand(0);
+ MVT InVT = In.getSimpleValueType();
+ if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&
+ ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
+ assert(InVT.is128BitVector() && "Expected 128-bit input vector");
+ LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
+ // Unless the load is volatile.
+ if (!LN->isVolatile()) {
+ SDLoc dl(N);
+ unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
+ MVT MemVT = MVT::getFloatingPointVT(NumBits);
+ MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
+ SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue VZLoad =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MemVT,
+ LN->getPointerInfo(),
+ LN->getAlignment(),
+ LN->getMemOperand()->getFlags());
+ SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
+ DAG.getBitcast(InVT, VZLoad));
+ DCI.CombineTo(N, Convert);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ return SDValue(N, 0);
+ }
+ }
+
+ return SDValue();
+}
+
/// Do target-specific dag combines on X86ISD::ANDNP nodes.
static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -43940,6 +43975,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget);
case X86ISD::CVTSI2P:
case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI);
+ case X86ISD::CVTP2SI:
+ case X86ISD::CVTP2UI:
+ case X86ISD::CVTTP2SI:
+ case X86ISD::CVTTP2UI: return combineCVTP2I_CVTTP2I(N, DAG, DCI);
case X86ISD::BT: return combineBT(N, DAG, DCI);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 2c9b6f127a5..0b3d0f59cc6 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7979,7 +7979,11 @@ multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Explicitly specified broadcast string, since we take only 2 elements
// from v4f32x_info source
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
- sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
+ sched.XMM, "{1to2}", "", f64mem, VK2WM,
+ (v2i64 (OpNode (bc_v4f32
+ (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))>,
+ EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
sched.YMM>, EVEX_V256;
}
@@ -7997,7 +8001,11 @@ multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Explicitly specified broadcast string, since we take only 2 elements
// from v4f32x_info source
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
- sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
+ sched.XMM, "{1to2}", "", f64mem, VK2WM,
+ (v2i64 (OpNode (bc_v4f32
+ (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))>,
+ EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
sched.YMM>, EVEX_V256;
}
@@ -8358,6 +8366,50 @@ let Predicates = [HasDQI] in {
}
let Predicates = [HasDQI, HasVLX] in {
+ def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+ (VCVTPS2QQZ128rm addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ VR128X:$src0)),
+ (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
+ (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+ (VCVTPS2UQQZ128rm addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ VR128X:$src0)),
+ (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
+ (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+ (VCVTTPS2QQZ128rm addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ VR128X:$src0)),
+ (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
+ (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+ (VCVTTPS2UQQZ128rm addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ VR128X:$src0)),
+ (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
+ (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
+
def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))),
(VCVTTPS2QQZ256rr VR128X:$src)>;
def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))),
OpenPOWER on IntegriCloud