summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp29
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll22
2 files changed, 44 insertions, 7 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6bc94a8f9fc..b718fb6ee5b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22034,15 +22034,16 @@ static SDValue LowerVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
unsigned NumElems = VT.getVectorNumElements();
unsigned SizeInBits = VT.getSizeInBits();
+ MVT EltVT = VT.getVectorElementType();
+ SDValue Src = Op.getOperand(0);
+ assert(EltVT == Src.getSimpleValueType().getVectorElementType() &&
+ "Src and Op should have the same element type!");
// Extract the Lo/Hi vectors
SDLoc dl(Op);
- SDValue Src = Op.getOperand(0);
- unsigned SrcNumElems = Src.getSimpleValueType().getVectorNumElements();
SDValue Lo = extractSubVector(Src, 0, DAG, dl, SizeInBits / 2);
- SDValue Hi = extractSubVector(Src, SrcNumElems / 2, DAG, dl, SizeInBits / 2);
+ SDValue Hi = extractSubVector(Src, NumElems / 2, DAG, dl, SizeInBits / 2);
- MVT EltVT = VT.getVectorElementType();
MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, Lo),
@@ -23996,8 +23997,17 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
}
// Custom splitting for BWI types when AVX512F is available but BWI isn't.
- if ((SrcVT == MVT::v32i16 || SrcVT == MVT::v64i8) && DstVT.isVector())
- return Lower512IntUnary(Op, DAG);
+ if ((SrcVT == MVT::v32i16 || SrcVT == MVT::v64i8) && DstVT.isVector() &&
+ DAG.getTargetLoweringInfo().isTypeLegal(DstVT)) {
+ SDLoc dl(Op);
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl);
+ EVT CastVT = MVT::getVectorVT(DstVT.getVectorElementType(),
+ DstVT.getVectorNumElements() / 2);
+ Lo = DAG.getBitcast(CastVT, Lo);
+ Hi = DAG.getBitcast(CastVT, Hi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, DstVT, Lo, Hi);
+ }
// Use MOVMSK for vector to scalar conversion to prevent scalarization.
if ((SrcVT == MVT::v16i1 || SrcVT == MVT::v32i1) && DstVT.isScalarInteger()) {
@@ -25391,7 +25401,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// Custom splitting for BWI types when AVX512F is available but BWI isn't.
if ((DstVT == MVT::v32i16 || DstVT == MVT::v64i8) &&
SrcVT.isVector() && isTypeLegal(SrcVT)) {
- SDValue Res = Lower512IntUnary(SDValue(N, 0), DAG);
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+ MVT CastVT = (DstVT == MVT::v32i16) ? MVT::v16i16 : MVT::v32i8;
+ Lo = DAG.getBitcast(CastVT, Lo);
+ Hi = DAG.getBitcast(CastVT, Hi);
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, DstVT, Lo, Hi);
Results.push_back(Res);
return;
}
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
index 1a000fbae8b..e8661222d56 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
@@ -1960,5 +1960,27 @@ define <8 x i64> @test_mm512_mask_mul_epu32(i16 zeroext %__k, <8 x i64> %__A, <8
ret <8 x i64> %tmp4
}
+define <8 x double> @test_mm512_set1_epi8(i8 signext %d) nounwind {
+; X32-LABEL: test_mm512_set1_epi8:
+; X32: # %bb.0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: vmovd %eax, %xmm0
+; X32-NEXT: vpbroadcastb %xmm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_set1_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vmovd %edi, %xmm0
+; X64-NEXT: vpbroadcastb %xmm0, %ymm0
+; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; X64-NEXT: retq
+entry:
+ %vecinit.i = insertelement <64 x i8> undef, i8 %d, i32 0
+ %vecinit63.i = shufflevector <64 x i8> %vecinit.i, <64 x i8> undef, <64 x i32> zeroinitializer
+ %0 = bitcast <64 x i8> %vecinit63.i to <8 x double>
+ ret <8 x double> %0
+}
+
!0 = !{i32 1}
OpenPOWER on IntegriCloud