summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp19
-rw-r--r--llvm/test/CodeGen/X86/avx2-masked-gather.ll14
2 files changed, 18 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 110af66b422..a21145f0755 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1127,6 +1127,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (HasInt256) {
+ // Custom legalize 2x32 to get a little better code.
+ setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
+
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
setOperationAction(ISD::MGATHER, VT, Custom);
@@ -1360,11 +1363,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTPOP, VT, Legal);
}
- // Custom legalize 2x32 to get a little better code.
- if (Subtarget.hasVLX()) {
- setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
- }
-
// Custom lower several nodes.
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
@@ -24863,7 +24861,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::MGATHER: {
EVT VT = N->getValueType(0);
- if (VT == MVT::v2f32 && Subtarget.hasVLX()) {
+ if (VT == MVT::v2f32 && (Subtarget.hasVLX() || !Subtarget.hasAVX512())) {
auto *Gather = cast<MaskedGatherSDNode>(N);
SDValue Index = Gather->getIndex();
if (Index.getValueType() != MVT::v2i64)
@@ -24873,10 +24871,17 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Src0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
Gather->getValue(),
DAG.getUNDEF(MVT::v2f32));
+ if (!Subtarget.hasVLX()) {
+ // We need to widen the mask, but the instruction will only use 2
+ // of its elements. So we can use undef.
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
+ DAG.getUNDEF(MVT::v2i1));
+ Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
+ }
SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(),
Index };
SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
- DAG.getVTList(MVT::v4f32, MVT::v2i1, MVT::Other), Ops, dl,
+ DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl,
Gather->getMemoryVT(), Gather->getMemOperand());
Results.push_back(Res);
Results.push_back(Res.getValue(2));
diff --git a/llvm/test/CodeGen/X86/avx2-masked-gather.ll b/llvm/test/CodeGen/X86/avx2-masked-gather.ll
index bf5ab1657a5..2007b7cf76e 100644
--- a/llvm/test/CodeGen/X86/avx2-masked-gather.ll
+++ b/llvm/test/CodeGen/X86/avx2-masked-gather.ll
@@ -134,13 +134,12 @@ define <2 x float> @masked_gather_v2float(<2 x float*>* %ptr, <2 x i1> %masks, <
;
; X64-LABEL: masked_gather_v2float:
; X64: # BB#0: # %entry
-; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; X64-NEXT: vmovaps (%rdi), %xmm2
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-NEXT: vpslld $31, %xmm0, %xmm0
; X64-NEXT: vpsrad $31, %xmm0, %xmm0
-; X64-NEXT: vmovaps (%rdi), %xmm2
-; X64-NEXT: vgatherqps %xmm0, (,%ymm2), %xmm1
+; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1
; X64-NEXT: vmovaps %xmm1, %xmm0
-; X64-NEXT: vzeroupper
; X64-NEXT: retq
;
; NOGATHER-LABEL: masked_gather_v2float:
@@ -185,13 +184,12 @@ define <4 x float> @masked_gather_v2float_concat(<2 x float*>* %ptr, <2 x i1> %m
;
; X64-LABEL: masked_gather_v2float_concat:
; X64: # BB#0: # %entry
-; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; X64-NEXT: vmovaps (%rdi), %xmm2
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-NEXT: vpslld $31, %xmm0, %xmm0
; X64-NEXT: vpsrad $31, %xmm0, %xmm0
-; X64-NEXT: vmovaps (%rdi), %xmm2
-; X64-NEXT: vgatherqps %xmm0, (,%ymm2), %xmm1
+; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1
; X64-NEXT: vmovaps %xmm1, %xmm0
-; X64-NEXT: vzeroupper
; X64-NEXT: retq
;
; NOGATHER-LABEL: masked_gather_v2float_concat:
OpenPOWER on IntegriCloud