diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-08-02 00:43:42 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-08-02 00:43:42 +0000 |
| commit | acc5e82b0e33a27ba64ccb4f88c81fe8709d4576 (patch) | |
| tree | de36eb055031183f0de7f2f65c688c27091bb987 | |
| parent | 6b898beb8e6bf5739d43c1d64646264afc595de8 (diff) | |
| download | bcm5719-llvm-acc5e82b0e33a27ba64ccb4f88c81fe8709d4576.tar.gz bcm5719-llvm-acc5e82b0e33a27ba64ccb4f88c81fe8709d4576.zip | |
DAG: Undo and->or combine with FrameIndexes
This pattern shows up when lowering byval copies on AMDGPU.
The byval object access is split into 4-byte chunks, adding a
constant offset to the FixedStack base. When some of the offsets
turn into ors, this prevents combining the constant offsets.
This makes it not apparent that the object is there when matching
addressing modes, so it ends up using a scratch wave offset
relative access and the lengthy frame index expansion for that.
llvm-svn: 309775
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll | 98 | ||||
| -rw-r--r-- | llvm/test/CodeGen/BPF/undef.ll | 36 |
4 files changed, 64 insertions, 85 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 54cddf77b4c..cf19d7b4fc0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1947,6 +1947,15 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); } } + + // Undo the add -> or combine to merge constant offsets from a frame index. + if (N0.getOpcode() == ISD::OR && + isa<FrameIndexSDNode>(N0.getOperand(0)) && + isa<ConstantSDNode>(N0.getOperand(1)) && + DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { + SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); + } } if (SDValue NewSel = foldBinOpIntoSelect(N)) diff --git a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll index 64a6b9b6b21..d6a1686d566 100644 --- a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll @@ -101,9 +101,8 @@ define i32 @main() nounwind ssp { define void @foo(i8* %fmt, ...) nounwind { entry: ; CHECK-LABEL: foo: -; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8 ; CHECK: ldr {{w[0-9]+}}, [sp, #48] -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #15 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #23 ; CHECK: and x[[ADDR:[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0 ; CHECK: ldr {{q[0-9]+}}, [x[[ADDR]]] %fmt.addr = alloca i8*, align 8 @@ -142,9 +141,8 @@ entry: define void @foo2(i8* %fmt, ...) nounwind { entry: ; CHECK-LABEL: foo2: -; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8 ; CHECK: ldr {{w[0-9]+}}, [sp, #48] -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #15 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #23 ; CHECK: and x[[ADDR:[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0 ; CHECK: ldr {{q[0-9]+}}, [x[[ADDR]]] %fmt.addr = alloca i8*, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll index b53447c8ffb..0f348c6b4cb 100644 --- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -74,46 +74,29 @@ entry: ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 -; VI-DAG: v_lshrrev_b32_e64 v{{[0-9]+}}, 6 -; CI-DAG: v_lshr_b32_e64 v{{[0-9]+}}, s{{[0-9]+}}, 6 - -; GCN-DAG: v_add_i32_e64 [[FI_ADD0:v[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 8, -; GCN-DAG: v_or_b32_e32 [[FI_OR0:v[0-9]+]], 4, [[FI_ADD0]] - ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24 -; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], [[FI_OR0]], s[0:3], s4 offen offset:4 -; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], [[FI_OR0]], s[0:3], s4 offen offset:8 - -; FIXME: or fails to combine with add, so FI doesn't fold and scratch wave offset is used -; VI-DAG: v_lshrrev_b32_e64 v{{[0-9]+}}, 6 -; CI-DAG: v_lshr_b32_e64 v{{[0-9]+}}, s{{[0-9]+}}, 6 - -; GCN-DAG: v_add_i32_e64 [[FI_ADD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 24, -; GCN-DAG: v_or_b32_e32 [[FI_OR1:v[0-9]+]], 4, [[FI_ADD1]] - -; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:8 -; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:12 +; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8 +; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12 +; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16 +; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20 +; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} +; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 +; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 +; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 -; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:8 -; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:12 -; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32{{$}} -; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:4 +; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 +; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 +; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 +; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 - -; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], [[FI_OR1]], s[0:3], s4 offen offset:4 -; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], [[FI_OR1]], s[0:3], s4 offen offset:8 -; GCN: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28 -; GCN: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24 - - -; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:24 -; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:28 -; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:16 -; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:20 +; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 +; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 +; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24 +; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28 ; GCN: s_swappc_b64 ; GCN-NEXT: s_sub_u32 s32, s32, 0x800{{$}} @@ -152,36 +135,25 @@ entry: ; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}} -; FIXME: Fold offset -; GCN-DAG: v_or_b32_e32 [[OR_FI0:v[0-9]+]], 4, - -; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], [[OR_FI0]], s[0:3], s33 offen offset:4 -; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], [[OR_FI0]], s[0:3], s33 offen offset:8 - -; FIXME: Fold offset -; GCN-DAG: v_or_b32_e32 [[OR_FI1:v[0-9]+]], 4, - -; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:12 -; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:8 - - -; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:8 -; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:12 -; GCN: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:4 -; GCN: buffer_store_dword [[LOAD2]], off, s[0:3], s32{{$}} - - - -; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], [[OR_FI1]], s[0:3], s33 offen offset:4 -; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], [[OR_FI1]], s[0:3], s33 offen offset:8 -; GCN: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28 -; GCN: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24 - - -; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:24 -; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:28 -; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:16 -; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:20 +; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 +; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 +; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 +; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 + +; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} +; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 +; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 +; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 + +; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24 +; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28 +; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32 +; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36 + +; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 +; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 +; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24 +; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28 ; GCN: s_swappc_b64 diff --git a/llvm/test/CodeGen/BPF/undef.ll b/llvm/test/CodeGen/BPF/undef.ll index 205d97c80ef..fa816546421 100644 --- a/llvm/test/CodeGen/BPF/undef.ll +++ b/llvm/test/CodeGen/BPF/undef.ll @@ -14,30 +14,30 @@ ; Function Attrs: nounwind uwtable define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" { -; CHECK: r1 = r10 -; CHECK: r1 += -2 -; CHECK: r2 = 0 -; CHECK: *(u16 *)(r1 + 6) = r2 -; CHECK: *(u16 *)(r1 + 4) = r2 -; CHECK: *(u16 *)(r1 + 2) = r2 ; EL: r1 = 134678021 ; EB: r1 = 84281096 ; CHECK: *(u32 *)(r10 - 8) = r1 ; EL: r1 = 2569 ; EB: r1 = 2314 ; CHECK: *(u16 *)(r10 - 4) = r1 -; CHECK: *(u16 *)(r10 + 24) = r2 -; CHECK: *(u16 *)(r10 + 22) = r2 -; CHECK: *(u16 *)(r10 + 20) = r2 -; CHECK: *(u16 *)(r10 + 18) = r2 -; CHECK: *(u16 *)(r10 + 16) = r2 -; CHECK: *(u16 *)(r10 + 14) = r2 -; CHECK: *(u16 *)(r10 + 12) = r2 -; CHECK: *(u16 *)(r10 + 10) = r2 -; CHECK: *(u16 *)(r10 + 8) = r2 -; CHECK: *(u16 *)(r10 + 6) = r2 -; CHECK: *(u16 *)(r10 - 2) = r2 -; CHECK: *(u16 *)(r10 + 26) = r2 + +; CHECK: r1 = 0 +; CHECK: *(u16 *)(r10 + 24) = r1 +; CHECK: *(u16 *)(r10 + 22) = r1 +; CHECK: *(u16 *)(r10 + 20) = r1 +; CHECK: *(u16 *)(r10 + 18) = r1 +; CHECK: *(u16 *)(r10 + 16) = r1 +; CHECK: *(u16 *)(r10 + 14) = r1 +; CHECK: *(u16 *)(r10 + 12) = r1 +; CHECK: *(u16 *)(r10 + 10) = r1 +; CHECK: *(u16 *)(r10 + 8) = r1 +; CHECK: *(u16 *)(r10 + 6) = r1 +; CHECK: *(u16 *)(r10 + 4) = r1 +; CHECK: *(u16 *)(r10 + 2) = r1 +; CHECK: *(u16 *)(r10 + 0) = r1 +; CHECK: *(u16 *)(r10 - 2) = r1 +; CHECK: *(u16 *)(r10 + 26) = r1 + ; CHECK: r2 = r10 ; CHECK: r2 += -8 ; CHECK: r1 = <MCOperand Expr:(routing)>ll |

