diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 19 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-masked-gather.ll | 14 |
2 files changed, 18 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 110af66b422..a21145f0755 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1127,6 +1127,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (HasInt256) { + // Custom legalize 2x32 to get a little better code. + setOperationAction(ISD::MGATHER, MVT::v2f32, Custom); + for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) setOperationAction(ISD::MGATHER, VT, Custom); @@ -1360,11 +1363,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTPOP, VT, Legal); } - // Custom legalize 2x32 to get a little better code. - if (Subtarget.hasVLX()) { - setOperationAction(ISD::MGATHER, MVT::v2f32, Custom); - } - // Custom lower several nodes. for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) @@ -24863,7 +24861,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::MGATHER: { EVT VT = N->getValueType(0); - if (VT == MVT::v2f32 && Subtarget.hasVLX()) { + if (VT == MVT::v2f32 && (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { auto *Gather = cast<MaskedGatherSDNode>(N); SDValue Index = Gather->getIndex(); if (Index.getValueType() != MVT::v2i64) @@ -24873,10 +24871,17 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue Src0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Gather->getValue(), DAG.getUNDEF(MVT::v2f32)); + if (!Subtarget.hasVLX()) { + // We need to widen the mask, but the instruction will only use 2 + // of its elements. So we can use undef. + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, + DAG.getUNDEF(MVT::v2i1)); + Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask); + } SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(), Index }; SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>( - DAG.getVTList(MVT::v4f32, MVT::v2i1, MVT::Other), Ops, dl, + DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl, Gather->getMemoryVT(), Gather->getMemOperand()); Results.push_back(Res); Results.push_back(Res.getValue(2)); diff --git a/llvm/test/CodeGen/X86/avx2-masked-gather.ll b/llvm/test/CodeGen/X86/avx2-masked-gather.ll index bf5ab1657a5..2007b7cf76e 100644 --- a/llvm/test/CodeGen/X86/avx2-masked-gather.ll +++ b/llvm/test/CodeGen/X86/avx2-masked-gather.ll @@ -134,13 +134,12 @@ define <2 x float> @masked_gather_v2float(<2 x float*>* %ptr, <2 x i1> %masks, < ; ; X64-LABEL: masked_gather_v2float: ; X64: # BB#0: # %entry -; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero +; X64-NEXT: vmovaps (%rdi), %xmm2 +; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X64-NEXT: vpslld $31, %xmm0, %xmm0 ; X64-NEXT: vpsrad $31, %xmm0, %xmm0 -; X64-NEXT: vmovaps (%rdi), %xmm2 -; X64-NEXT: vgatherqps %xmm0, (,%ymm2), %xmm1 +; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1 ; X64-NEXT: vmovaps %xmm1, %xmm0 -; X64-NEXT: vzeroupper ; X64-NEXT: retq ; ; NOGATHER-LABEL: masked_gather_v2float: @@ -185,13 +184,12 @@ define <4 x float> @masked_gather_v2float_concat(<2 x float*>* %ptr, <2 x i1> %m ; ; X64-LABEL: masked_gather_v2float_concat: ; X64: # BB#0: # %entry -; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero +; X64-NEXT: vmovaps (%rdi), %xmm2 +; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X64-NEXT: vpslld $31, %xmm0, %xmm0 ; X64-NEXT: vpsrad $31, %xmm0, %xmm0 -; X64-NEXT: vmovaps (%rdi), %xmm2 -; X64-NEXT: vgatherqps %xmm0, (,%ymm2), %xmm1 +; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1 ; X64-NEXT: vmovaps %xmm1, %xmm0 -; X64-NEXT: vzeroupper ; X64-NEXT: retq ; ; NOGATHER-LABEL: masked_gather_v2float_concat: |

