diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll | 28 |
2 files changed, 24 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e8b3f3656b6..d0e9b365462 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35021,6 +35021,16 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, SDValue StoredVal = St->getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // If this is a store of a scalar_to_vector to v1i1, just use a scalar store. + // This will avoid a copy to k-register. + if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() && + StoredVal.getOpcode() == ISD::SCALAR_TO_VECTOR && + StoredVal.getOperand(0).getValueType() == MVT::i8) { + return DAG.getStore(St->getChain(), dl, StoredVal.getOperand(0), + St->getBasePtr(), St->getPointerInfo(), + St->getAlignment(), St->getMemOperand()->getFlags()); + } + // If we are saving a concatenation of two XMM registers and 32-byte stores // are slow, such as on Sandy Bridge, perform two 16-byte stores. bool Fast; diff --git a/llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll b/llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll index 02cf3734f96..c3bcebe00e3 100644 --- a/llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll +++ b/llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll @@ -5,8 +5,8 @@ define void @load_v1i2_trunc_v1i1_store(<1 x i2>* %a0,<1 x i1>* %a1) { ; AVX512-ALL-LABEL: load_v1i2_trunc_v1i1_store: ; AVX512-ALL: # %bb.0: -; AVX512-ALL-NEXT: kmovb (%rdi), %k0 -; AVX512-ALL-NEXT: kmovb %k0, (%rsi) +; AVX512-ALL-NEXT: movb (%rdi), %al +; AVX512-ALL-NEXT: movb %al, (%rsi) ; AVX512-ALL-NEXT: retq ; ; AVX512-ONLY-LABEL: load_v1i2_trunc_v1i1_store: @@ -22,8 +22,8 @@ define void @load_v1i2_trunc_v1i1_store(<1 x i2>* %a0,<1 x i1>* %a1) { define void @load_v1i3_trunc_v1i1_store(<1 x i3>* %a0,<1 x i1>* %a1) { ; AVX512-ALL-LABEL: load_v1i3_trunc_v1i1_store: ; AVX512-ALL: # %bb.0: -; AVX512-ALL-NEXT: kmovb (%rdi), %k0 -; AVX512-ALL-NEXT: kmovb %k0, (%rsi) +; AVX512-ALL-NEXT: movb (%rdi), %al +; AVX512-ALL-NEXT: movb %al, (%rsi) ; AVX512-ALL-NEXT: retq ; ; AVX512-ONLY-LABEL: load_v1i3_trunc_v1i1_store: @@ -39,8 +39,8 @@ define void @load_v1i3_trunc_v1i1_store(<1 x i3>* %a0,<1 x i1>* %a1) { define void @load_v1i4_trunc_v1i1_store(<1 x i4>* %a0,<1 x i1>* %a1) { ; AVX512-ALL-LABEL: load_v1i4_trunc_v1i1_store: ; AVX512-ALL: # %bb.0: -; AVX512-ALL-NEXT: kmovb (%rdi), %k0 -; AVX512-ALL-NEXT: kmovb %k0, (%rsi) +; AVX512-ALL-NEXT: movb (%rdi), %al +; AVX512-ALL-NEXT: movb %al, (%rsi) ; AVX512-ALL-NEXT: retq ; ; AVX512-ONLY-LABEL: load_v1i4_trunc_v1i1_store: @@ -56,8 +56,8 @@ define void @load_v1i4_trunc_v1i1_store(<1 x i4>* %a0,<1 x i1>* %a1) { define void @load_v1i8_trunc_v1i1_store(<1 x i8>* %a0,<1 x i1>* %a1) { ; AVX512-ALL-LABEL: load_v1i8_trunc_v1i1_store: ; AVX512-ALL: # %bb.0: -; AVX512-ALL-NEXT: kmovb (%rdi), %k0 -; AVX512-ALL-NEXT: kmovb %k0, (%rsi) +; AVX512-ALL-NEXT: movb (%rdi), %al +; AVX512-ALL-NEXT: movb %al, (%rsi) ; AVX512-ALL-NEXT: retq ; ; AVX512-ONLY-LABEL: load_v1i8_trunc_v1i1_store: @@ -73,8 +73,8 @@ define void @load_v1i8_trunc_v1i1_store(<1 x i8>* %a0,<1 x i1>* %a1) { define void @load_v1i16_trunc_v1i1_store(<1 x i16>* %a0,<1 x i1>* %a1) { ; AVX512-ALL-LABEL: load_v1i16_trunc_v1i1_store: ; AVX512-ALL: # %bb.0: -; AVX512-ALL-NEXT: kmovb (%rdi), %k0 -; AVX512-ALL-NEXT: kmovb %k0, (%rsi) +; AVX512-ALL-NEXT: movb (%rdi), %al +; AVX512-ALL-NEXT: movb %al, (%rsi) ; AVX512-ALL-NEXT: retq ; ; AVX512-ONLY-LABEL: load_v1i16_trunc_v1i1_store: @@ -90,8 +90,8 @@ define void @load_v1i16_trunc_v1i1_store(<1 x i16>* %a0,<1 x i1>* %a1) { define void @load_v1i32_trunc_v1i1_store(<1 x i32>* %a0,<1 x i1>* %a1) { ; AVX512-ALL-LABEL: load_v1i32_trunc_v1i1_store: ; AVX512-ALL: # %bb.0: -; AVX512-ALL-NEXT: kmovb (%rdi), %k0 -; AVX512-ALL-NEXT: kmovb %k0, (%rsi) +; AVX512-ALL-NEXT: movb (%rdi), %al +; AVX512-ALL-NEXT: movb %al, (%rsi) ; AVX512-ALL-NEXT: retq ; ; AVX512-ONLY-LABEL: load_v1i32_trunc_v1i1_store: @@ -107,8 +107,8 @@ define void @load_v1i32_trunc_v1i1_store(<1 x i32>* %a0,<1 x i1>* %a1) { define void @load_v1i64_trunc_v1i1_store(<1 x i64>* %a0,<1 x i1>* %a1) { ; AVX512-ALL-LABEL: load_v1i64_trunc_v1i1_store: ; AVX512-ALL: # %bb.0: -; AVX512-ALL-NEXT: kmovb (%rdi), %k0 -; AVX512-ALL-NEXT: kmovb %k0, (%rsi) +; AVX512-ALL-NEXT: movb (%rdi), %al +; AVX512-ALL-NEXT: movb %al, (%rsi) ; AVX512-ALL-NEXT: retq ; ; AVX512-ONLY-LABEL: load_v1i64_trunc_v1i1_store: |