summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Target/TargetLowering.h9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp24
-rw-r--r--llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll53
-rw-r--r--llvm/test/CodeGen/X86/avx-vextractf128.ll2
4 files changed, 77 insertions, 11 deletions
diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h
index 9b5ff57fea8..678817b39d1 100644
--- a/llvm/include/llvm/Target/TargetLowering.h
+++ b/llvm/include/llvm/Target/TargetLowering.h
@@ -286,6 +286,15 @@ public:
return true;
}
+ /// isStoreBitCastBeneficial() - Mirror of isLoadBitCastBeneficial(). Return
+ /// true if the following transform is beneficial.
+ ///
+ /// (store (y (conv x)), y*)) -> (store x, (x*))
+ virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const {
+ // Default to the same logic as stores.
+ return isLoadBitCastBeneficial(StoreVT, BitcastVT);
+ }
+
/// Return true if it is expected to be cheaper to do a store of a non-zero
/// vector constant with the given size and type for the address space than to
/// store the individual scalar element constants.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 690a53e785c..75faf526fab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11970,17 +11970,21 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// resultant store does not need a higher alignment than the original.
if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
ST->isUnindexed()) {
- unsigned OrigAlign = ST->getAlignment();
EVT SVT = Value.getOperand(0).getValueType();
- unsigned Align = DAG.getDataLayout().getABITypeAlignment(
- SVT.getTypeForEVT(*DAG.getContext()));
- if (Align <= OrigAlign &&
- ((!LegalOperations && !ST->isVolatile()) ||
- TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
- return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
- Ptr, ST->getPointerInfo(), ST->isVolatile(),
- ST->isNonTemporal(), OrigAlign,
- ST->getAAInfo());
+ if (((!LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
+ TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
+ unsigned OrigAlign = ST->getAlignment();
+ bool Fast = false;
+ if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
+ ST->getAddressSpace(), OrigAlign, &Fast) &&
+ Fast) {
+ return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
+ ST->isNonTemporal(), OrigAlign,
+ ST->getAAInfo());
+ }
+ }
}
// Turn 'store undef, Ptr' -> nothing.
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll b/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
new file mode 100644
index 00000000000..281e49f804c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
@@ -0,0 +1,53 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}store_v2i32_as_v4i16_align_4:
+; GCN: s_load_dwordx2
+; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
+define void @store_v2i32_as_v4i16_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 {
+ %x.bc = bitcast <2 x i32> %x to <4 x i16>
+ store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_v4i32_as_v8i16_align_4:
+; GCN: s_load_dwordx4
+; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
+define void @store_v4i32_as_v8i16_align_4(<8 x i16> addrspace(3)* align 4 %out, <4 x i32> %x) #0 {
+ %x.bc = bitcast <4 x i32> %x to <8 x i16>
+ store <8 x i16> %x.bc, <8 x i16> addrspace(3)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_v2i32_as_i64_align_4:
+; GCN: s_load_dwordx2
+; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
+define void @store_v2i32_as_i64_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 {
+ %x.bc = bitcast <2 x i32> %x to <4 x i16>
+ store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_v4i32_as_v2i64_align_4:
+; GCN: s_load_dwordx4
+; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
+define void @store_v4i32_as_v2i64_align_4(<2 x i64> addrspace(3)* align 4 %out, <4 x i32> %x) #0 {
+ %x.bc = bitcast <4 x i32> %x to <2 x i64>
+ store <2 x i64> %x.bc, <2 x i64> addrspace(3)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_v4i16_as_v2i32_align_4:
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
+define void @store_v4i16_as_v2i32_align_4(<2 x i32> addrspace(3)* align 4 %out, <4 x i16> %x) #0 {
+ %x.bc = bitcast <4 x i16> %x to <2 x i32>
+ store <2 x i32> %x.bc, <2 x i32> addrspace(3)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/X86/avx-vextractf128.ll b/llvm/test/CodeGen/X86/avx-vextractf128.ll
index d7a6d61ba0a..2feddddaf78 100644
--- a/llvm/test/CodeGen/X86/avx-vextractf128.ll
+++ b/llvm/test/CodeGen/X86/avx-vextractf128.ll
@@ -119,7 +119,7 @@ entry:
define void @t9(i64* %p) {
; CHECK-LABEL: t9:
; CHECK: ## BB#0:
-; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
; CHECK-NEXT: vmovups %ymm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
OpenPOWER on IntegriCloud