diff options
author | Craig Topper <craig.topper@intel.com> | 2018-03-04 19:33:15 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-03-04 19:33:15 +0000 |
commit | f2aae622287deacfdd0cfeba85ede24418cd472c (patch) | |
tree | d1132e9b366db7d563b112b098e64d37656b7261 | |
parent | 1209eb7d6684096c9a043373b4b027488be9e268 (diff) | |
download | bcm5719-llvm-f2aae622287deacfdd0cfeba85ede24418cd472c.tar.gz bcm5719-llvm-f2aae622287deacfdd0cfeba85ede24418cd472c.zip |
[X86] Add a DAG combine to turn stores of vXi1 constants into scalar stores.
llvm-svn: 326679
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 47 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-mask-op.ll | 140 | ||||
-rwxr-xr-x | llvm/test/CodeGen/X86/avx512-schedule.ll | 6 |
4 files changed, 73 insertions, 124 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 880db37675f..ecda6f536bf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35032,6 +35032,53 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, St->getAlignment(), St->getMemOperand()->getFlags()); } + // Widen v2i1/v4i1 stores to v8i1. + if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && + Subtarget.hasAVX512()) { + unsigned NumConcats = 8 / VT.getVectorNumElements(); + SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT)); + Ops[0] = StoredVal; + StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); + return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), + St->getPointerInfo(), St->getAlignment(), + St->getMemOperand()->getFlags()); + } + + // Turn vXi1 stores of constants into a scalar store. + if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 || + VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) && + ISD::isBuildVectorOfConstantSDNodes(StoredVal.getNode())) { + // If its a v64i1 store without 64-bit support, we need two stores. + if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { + SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl, + StoredVal->ops().slice(0, 32)); + Lo = combinevXi1ConstantToInteger(Lo, DAG); + SDValue Hi = DAG.getBuildVector(MVT::v32i1, dl, + StoredVal->ops().slice(32, 32)); + Hi = combinevXi1ConstantToInteger(Hi, DAG); + + unsigned Alignment = St->getAlignment(); + + SDValue Ptr0 = St->getBasePtr(); + SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 4, dl); + + SDValue Ch0 = + DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(), + Alignment, St->getMemOperand()->getFlags()); + SDValue Ch1 = + DAG.getStore(St->getChain(), dl, Hi, Ptr1, + St->getPointerInfo().getWithOffset(4), + MinAlign(Alignment, 4U), + St->getMemOperand()->getFlags()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); + } + + StoredVal = combinevXi1ConstantToInteger(StoredVal, DAG); + return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), + St->getPointerInfo(), St->getAlignment(), + St->getMemOperand()->getFlags()); + } + // If we are saving a concatenation of two XMM registers and 32-byte stores // are slow, such as on Sandy Bridge, perform two 16-byte stores. bool Fast; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 59ed5fd6f31..e338f6c0f25 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2809,10 +2809,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), // Load/store kreg let Predicates = [HasDQI] in { - def : Pat<(store VK4:$src, addr:$dst), - (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>; - def : Pat<(store VK2:$src, addr:$dst), - (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>; def : Pat<(store VK1:$src, addr:$dst), (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>; diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index f1a2701a588..649fd7debbf 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -447,43 +447,19 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { } define i8 @conv1(<8 x i1>* %R) { -; KNL-LABEL: conv1: -; KNL: ## %bb.0: ## %entry -; KNL-NEXT: movb $-1, (%rdi) -; KNL-NEXT: movb $-2, -{{[0-9]+}}(%rsp) -; KNL-NEXT: movb $-2, %al -; KNL-NEXT: retq -; -; SKX-LABEL: conv1: -; SKX: ## %bb.0: ## %entry -; SKX-NEXT: kxnorw %k0, %k0, %k0 -; SKX-NEXT: kmovb %k0, (%rdi) -; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) -; SKX-NEXT: movb $-2, %al -; SKX-NEXT: retq -; -; AVX512BW-LABEL: conv1: -; AVX512BW: ## %bb.0: ## %entry -; AVX512BW-NEXT: movb $-1, (%rdi) -; AVX512BW-NEXT: movb $-2, -{{[0-9]+}}(%rsp) -; AVX512BW-NEXT: movb $-2, %al -; AVX512BW-NEXT: retq -; -; AVX512DQ-LABEL: conv1: -; AVX512DQ: ## %bb.0: ## %entry -; AVX512DQ-NEXT: kxnorw %k0, %k0, %k0 -; AVX512DQ-NEXT: kmovb %k0, (%rdi) -; AVX512DQ-NEXT: movb $-2, -{{[0-9]+}}(%rsp) -; AVX512DQ-NEXT: movb $-2, %al -; AVX512DQ-NEXT: retq +; CHECK-LABEL: conv1: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movb $-1, (%rdi) +; CHECK-NEXT: movb $-2, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movb $-2, %al +; CHECK-NEXT: retq ; ; X86-LABEL: conv1: ; X86: ## %bb.0: ## %entry ; X86-NEXT: subl $12, %esp ; X86-NEXT: .cfi_def_cfa_offset 16 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: kxnorw %k0, %k0, %k0 -; X86-NEXT: kmovb %k0, (%eax) +; X86-NEXT: movb $-1, (%eax) ; X86-NEXT: movb $-2, (%esp) ; X86-NEXT: movb $-2, %al ; X86-NEXT: addl $12, %esp @@ -3422,43 +3398,17 @@ entry: } define void @store_v64i1_constant(<64 x i1>* %R) { -; KNL-LABEL: store_v64i1_constant: -; KNL: ## %bb.0: ## %entry -; KNL-NEXT: kxnorw %k0, %k0, %k0 -; KNL-NEXT: kmovw %k0, 2(%rdi) -; KNL-NEXT: movl $-536871045, 4(%rdi) ## imm = 0xDFFFFF7B -; KNL-NEXT: movw $-4099, (%rdi) ## imm = 0xEFFD -; KNL-NEXT: retq -; -; SKX-LABEL: store_v64i1_constant: -; SKX: ## %bb.0: ## %entry -; SKX-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD -; SKX-NEXT: movq %rax, (%rdi) -; SKX-NEXT: retq -; -; AVX512BW-LABEL: store_v64i1_constant: -; AVX512BW: ## %bb.0: ## %entry -; AVX512BW-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD -; AVX512BW-NEXT: movq %rax, (%rdi) -; AVX512BW-NEXT: retq -; -; AVX512DQ-LABEL: store_v64i1_constant: -; AVX512DQ: ## %bb.0: ## %entry -; AVX512DQ-NEXT: kxnorw %k0, %k0, %k0 -; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) -; AVX512DQ-NEXT: movl $-536871045, 4(%rdi) ## imm = 0xDFFFFF7B -; AVX512DQ-NEXT: movw $-4099, (%rdi) ## imm = 0xEFFD -; AVX512DQ-NEXT: retq +; CHECK-LABEL: store_v64i1_constant: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD +; CHECK-NEXT: movq %rax, (%rdi) +; CHECK-NEXT: retq ; ; X86-LABEL: store_v64i1_constant: ; X86: ## %bb.0: ## %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $-4099, %ecx ## imm = 0xEFFD -; X86-NEXT: kmovd %ecx, %k0 -; X86-NEXT: movl $-536871045, %ecx ## imm = 0xDFFFFF7B -; X86-NEXT: kmovd %ecx, %k1 -; X86-NEXT: kunpckdq %k0, %k1, %k0 -; X86-NEXT: kmovq %k0, (%eax) +; X86-NEXT: movl $-536871045, 4(%eax) ## imm = 0xDFFFFF7B +; X86-NEXT: movl $-4099, (%eax) ## imm = 0xEFFD ; X86-NEXT: retl entry: store <64 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <64 x i1>* %R @@ -3466,36 +3416,15 @@ entry: } define void @store_v2i1_constant(<2 x i1>* %R) { -; KNL-LABEL: store_v2i1_constant: -; KNL: ## %bb.0: ## %entry -; KNL-NEXT: movb $1, (%rdi) -; KNL-NEXT: retq -; -; SKX-LABEL: store_v2i1_constant: -; SKX: ## %bb.0: ## %entry -; SKX-NEXT: movb $1, %al -; SKX-NEXT: kmovd %eax, %k0 -; SKX-NEXT: kmovb %k0, (%rdi) -; SKX-NEXT: retq -; -; AVX512BW-LABEL: store_v2i1_constant: -; AVX512BW: ## %bb.0: ## %entry -; AVX512BW-NEXT: movb $1, (%rdi) -; AVX512BW-NEXT: retq -; -; AVX512DQ-LABEL: store_v2i1_constant: -; AVX512DQ: ## %bb.0: ## %entry -; AVX512DQ-NEXT: movb $1, %al -; AVX512DQ-NEXT: kmovw %eax, %k0 -; AVX512DQ-NEXT: kmovb %k0, (%rdi) -; AVX512DQ-NEXT: retq +; CHECK-LABEL: store_v2i1_constant: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movb $1, (%rdi) +; CHECK-NEXT: retq ; ; X86-LABEL: store_v2i1_constant: ; X86: ## %bb.0: ## %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movb $1, %cl -; X86-NEXT: kmovd %ecx, %k0 -; X86-NEXT: kmovb %k0, (%eax) +; X86-NEXT: movb $1, (%eax) ; X86-NEXT: retl entry: store <2 x i1> <i1 1, i1 0>, <2 x i1>* %R @@ -3503,36 +3432,15 @@ entry: } define void @store_v4i1_constant(<4 x i1>* %R) { -; KNL-LABEL: store_v4i1_constant: -; KNL: ## %bb.0: ## %entry -; KNL-NEXT: movb $5, (%rdi) -; KNL-NEXT: retq -; -; SKX-LABEL: store_v4i1_constant: -; SKX: ## %bb.0: ## %entry -; SKX-NEXT: movb $5, %al -; SKX-NEXT: kmovd %eax, %k0 -; SKX-NEXT: kmovb %k0, (%rdi) -; SKX-NEXT: retq -; -; AVX512BW-LABEL: store_v4i1_constant: -; AVX512BW: ## %bb.0: ## %entry -; AVX512BW-NEXT: movb $5, (%rdi) -; AVX512BW-NEXT: retq -; -; AVX512DQ-LABEL: store_v4i1_constant: -; AVX512DQ: ## %bb.0: ## %entry -; AVX512DQ-NEXT: movb $5, %al -; AVX512DQ-NEXT: kmovw %eax, %k0 -; AVX512DQ-NEXT: kmovb %k0, (%rdi) -; AVX512DQ-NEXT: retq +; CHECK-LABEL: store_v4i1_constant: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movb $5, (%rdi) +; CHECK-NEXT: retq ; ; X86-LABEL: store_v4i1_constant: ; X86: ## %bb.0: ## %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movb $5, %cl -; X86-NEXT: kmovd %ecx, %k0 -; X86-NEXT: kmovb %k0, (%eax) +; X86-NEXT: movb $5, (%eax) ; X86-NEXT: retl entry: store <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i1>* %R diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index a1c3ed48e0e..5a7ee40d77d 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -6943,16 +6943,14 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { define i8 @conv1(<8 x i1>* %R) { ; GENERIC-LABEL: conv1: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] +; GENERIC-NEXT: movb $-1, (%rdi) # sched: [5:1.00] ; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [5:1.00] ; GENERIC-NEXT: movb $-2, %al # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: conv1: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: movb $-1, (%rdi) # sched: [1:1.00] ; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; SKX-NEXT: movb $-2, %al # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] |