From 54e22f33d9bf2e1225e58d8cb69dad0c64886fcc Mon Sep 17 00:00:00 2001 From: Nirav Dave Date: Tue, 14 Mar 2017 00:34:14 +0000 Subject: In visitSTORE, always use FindBetterChain, rather than only when UseAA is enabled. Recommiting with compiler time improvements Recommitting after fixup of 32-bit aliasing sign offset bug in DAGCombiner. * Simplify Consecutive Merge Store Candidate Search Now that address aliasing is much less conservative, push through simplified store merging search and chain alias analysis which only checks for parallel stores through the chain subgraph. This is cleaner as the separation of non-interfering loads/stores from the store-merging logic. When merging stores search up the chain through a single load, and finds all possible stores by looking down from through a load and a TokenFactor to all stores visited. This improves the quality of the output SelectionDAG and the output Codegen (save perhaps for some ARM cases where we correctly constructs wider loads, but then promotes them to float operations which appear but requires more expensive constant generation). Some minor peephole optimizations to deal with improved SubDAG shapes (listed below) Additional Minor Changes: 1. Finishes removing unused AliasLoad code 2. Unifies the chain aggregation in the merged stores across code paths 3. Re-add the Store node to the worklist after calling SimplifyDemandedBits. 4. Increase GatherAllAliasesMaxDepth from 6 to 18. That number is arbitrary, but seems sufficient to not cause regressions in tests. 5. Remove Chain dependencies of Memory operations on CopyfromReg nodes as these are captured by data dependence 6. Forward loads-store values through tokenfactors containing {CopyToReg,CopyFromReg} Values. 7. Peephole to convert buildvector of extract_vector_elt to extract_subvector if possible (see CodeGen/AArch64/store-merge.ll) 8. Store merging for the ARM target is restricted to 32-bit as some in some contexts invalid 64-bit operations are being generated. This can be removed once appropriate checks are added. This finishes the change Matt Arsenault started in r246307 and jyknight's original patch. Many tests required some changes as memory operations are now reorderable, improving load-store forwarding. One test in particular is worth noting: CodeGen/PowerPC/ppc64-align-long-double.ll - Improved load-store forwarding converts a load-store pair into a parallel store and a memory-realized bitcast of the same value. However, because we lose the sharing of the explicit and implicit store values we must create another local store. A similar transformation happens before SelectionDAG as well. Reviewers: arsenm, hfinkel, tstellarAMD, jyknight, nhaehnle llvm-svn: 297695 --- llvm/test/CodeGen/AArch64/argument-blocks.ll | 4 +- llvm/test/CodeGen/AArch64/arm64-abi.ll | 5 +- llvm/test/CodeGen/AArch64/arm64-memset-inline.ll | 4 +- llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll | 2 +- llvm/test/CodeGen/AArch64/merge-store.ll | 3 +- .../test/CodeGen/AArch64/vector_merge_dep_check.ll | 3 +- llvm/test/CodeGen/AMDGPU/debugger-insert-nops.ll | 24 +- llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | 6 +- llvm/test/CodeGen/AMDGPU/merge-stores.ll | 24 +- llvm/test/CodeGen/AMDGPU/private-element-size.ll | 12 +- .../CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll | 17 +- .../CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll | 3 +- llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll | 100 +- llvm/test/CodeGen/ARM/gpr-paired-spill.ll | 18 +- llvm/test/CodeGen/ARM/ifcvt10.ll | 2 - .../test/CodeGen/ARM/illegal-bitfield-loadstore.ll | 86 +- llvm/test/CodeGen/ARM/static-addr-hoisting.ll | 6 +- llvm/test/CodeGen/BPF/undef.ll | 65 +- llvm/test/CodeGen/MSP430/Inst16mm.ll | 2 +- llvm/test/CodeGen/Mips/cconv/arguments-float.ll | 24 +- llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll | 44 +- llvm/test/CodeGen/Mips/fastcc.ll | 76 +- llvm/test/CodeGen/Mips/load-store-left-right.ll | 126 +-- llvm/test/CodeGen/Mips/micromips-li.ll | 2 +- llvm/test/CodeGen/Mips/mips64-f128-call.ll | 15 +- llvm/test/CodeGen/Mips/mips64-f128.ll | 2 +- llvm/test/CodeGen/Mips/mno-ldc1-sdc1.ll | 46 +- llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll | 14 +- llvm/test/CodeGen/Mips/msa/i5_ld_st.ll | 32 +- llvm/test/CodeGen/Mips/o32_cc_byval.ll | 54 +- llvm/test/CodeGen/Mips/o32_cc_vararg.ll | 4 +- llvm/test/CodeGen/PowerPC/anon_aggr.ll | 59 +- llvm/test/CodeGen/PowerPC/complex-return.ll | 12 +- llvm/test/CodeGen/PowerPC/jaggedstructs.ll | 52 +- .../CodeGen/PowerPC/ppc64-align-long-double.ll | 41 +- llvm/test/CodeGen/PowerPC/structsinmem.ll | 28 +- llvm/test/CodeGen/PowerPC/structsinregs.ll | 60 +- llvm/test/CodeGen/SystemZ/unaligned-01.ll | 5 +- .../test/CodeGen/Thumb/2010-07-15-debugOrdering.ll | 2 +- llvm/test/CodeGen/Thumb/stack-access.ll | 26 +- .../CodeGen/X86/2010-09-17-SideEffectsInChain.ll | 2 +- .../CodeGen/X86/2012-11-28-merge-store-alias.ll | 2 +- llvm/test/CodeGen/X86/MergeConsecutiveStores.ll | 17 +- llvm/test/CodeGen/X86/avx-vbroadcast.ll | 18 +- llvm/test/CodeGen/X86/avx512-mask-op.ll | 4 - llvm/test/CodeGen/X86/chain_order.ll | 4 +- .../CodeGen/X86/clear_upper_vector_element_bits.ll | 400 ++++---- llvm/test/CodeGen/X86/combiner-aa-0.ll | 20 - llvm/test/CodeGen/X86/combiner-aa-1.ll | 23 - llvm/test/CodeGen/X86/copy-eflags.ll | 17 +- llvm/test/CodeGen/X86/dag-merge-fast-accesses.ll | 12 +- .../X86/dont-trunc-store-double-to-float.ll | 6 +- .../extractelement-legalization-store-ordering.ll | 15 +- llvm/test/CodeGen/X86/i256-add.ll | 350 +++++-- llvm/test/CodeGen/X86/i386-shrink-wrapping.ll | 5 +- llvm/test/CodeGen/X86/live-range-nosubreg.ll | 5 +- llvm/test/CodeGen/X86/longlong-deadload.ll | 2 - .../CodeGen/X86/merge-consecutive-loads-128.ll | 20 +- .../CodeGen/X86/merge-consecutive-loads-256.ll | 8 +- .../X86/merge-store-partially-alias-loads.ll | 8 +- llvm/test/CodeGen/X86/pr18023.ll | 31 - llvm/test/CodeGen/X86/pr32108.ll | 20 + llvm/test/CodeGen/X86/split-store.ll | 27 +- llvm/test/CodeGen/X86/stores-merging.ll | 11 +- llvm/test/CodeGen/X86/vector-compare-results.ll | 730 +++++++------- .../CodeGen/X86/vector-shuffle-variable-128.ll | 1060 +++++++++----------- .../CodeGen/X86/vector-shuffle-variable-256.ll | 233 ++--- llvm/test/CodeGen/X86/vectorcall.ll | 4 +- llvm/test/CodeGen/X86/win32-eh.ll | 157 +-- llvm/test/CodeGen/XCore/varargs.ll | 2 +- 70 files changed, 2142 insertions(+), 2181 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/combiner-aa-0.ll delete mode 100644 llvm/test/CodeGen/X86/combiner-aa-1.ll delete mode 100644 llvm/test/CodeGen/X86/pr18023.ll create mode 100644 llvm/test/CodeGen/X86/pr32108.ll (limited to 'llvm/test/CodeGen') diff --git a/llvm/test/CodeGen/AArch64/argument-blocks.ll b/llvm/test/CodeGen/AArch64/argument-blocks.ll index 3169abc2dcb..b5374ca8ced 100644 --- a/llvm/test/CodeGen/AArch64/argument-blocks.ll +++ b/llvm/test/CodeGen/AArch64/argument-blocks.ll @@ -59,10 +59,10 @@ define i64 @test_hfa_ignores_gprs([7 x float], [2 x float] %in, i64, i64 %res) { } ; [2 x float] should not be promoted to double by the Darwin varargs handling, -; but should go in an 8-byte aligned slot. +; but should go in an 8-byte aligned slot and can be merged as integer stores. define void @test_varargs_stackalign() { ; CHECK-LABEL: test_varargs_stackalign: -; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16] +; CHECK-DARWINPCS: str {{x[0-9]+}}, [sp, #16] call void(...) @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0]) ret void diff --git a/llvm/test/CodeGen/AArch64/arm64-abi.ll b/llvm/test/CodeGen/AArch64/arm64-abi.ll index fb52b1d99fc..6cf0ab35b9b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-abi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-abi.ll @@ -205,10 +205,7 @@ declare i32 @args_i32(i32, i32, i32, i32, i32, i32, i32, i32, i16 signext, i32, define i32 @test8(i32 %argc, i8** nocapture %argv) nounwind { entry: ; CHECK-LABEL: test8 -; CHECK: strb {{w[0-9]+}}, [sp, #3] -; CHECK: strb wzr, [sp, #2] -; CHECK: strb {{w[0-9]+}}, [sp, #1] -; CHECK: strb wzr, [sp] +; CHECK: str w8, [sp] ; CHECK: bl ; FAST-LABEL: test8 ; FAST: strb {{w[0-9]+}}, [sp] diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll index 8f22f97ca08..384aaa8541d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -13,8 +13,8 @@ define void @t2() nounwind ssp { entry: ; CHECK-LABEL: t2: ; CHECK: strh wzr, [sp, #32] -; CHECK: stp xzr, xzr, [sp, #16] -; CHECK: str xzr, [sp, #8] +; CHECK: stp xzr, xzr, [sp, #8] +; CHECK: str xzr, [sp, #24] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) diff --git a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll index 16ddf690fe9..375877c5179 100644 --- a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -99,7 +99,7 @@ define void @test_nospare([8 x i64], [8 x float], ...) { ; __stack field should point just past them. define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) { ; CHECK-LABEL: test_offsetstack: -; CHECK: sub sp, sp, #80 +; CHECK: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #-80]! ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96 ; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var ; CHECK: str [[STACK_TOP]], [x[[VAR]]] diff --git a/llvm/test/CodeGen/AArch64/merge-store.ll b/llvm/test/CodeGen/AArch64/merge-store.ll index 1d0196ad521..1d26e4a42b1 100644 --- a/llvm/test/CodeGen/AArch64/merge-store.ll +++ b/llvm/test/CodeGen/AArch64/merge-store.ll @@ -4,8 +4,7 @@ @g0 = external global <3 x float>, align 16 @g1 = external global <3 x float>, align 4 -; CHECK: ldr s[[R0:[0-9]+]], {{\[}}[[R1:x[0-9]+]]{{\]}}, #4 -; CHECK: ld1{{\.?s?}} { v[[R0]]{{\.?s?}} }[1], {{\[}}[[R1]]{{\]}} +; CHECK: ldr q[[R0:[0-9]+]], {{\[}}[[R1:x[0-9]+]], :lo12:g0 ; CHECK: str d[[R0]] define void @blam() { diff --git a/llvm/test/CodeGen/AArch64/vector_merge_dep_check.ll b/llvm/test/CodeGen/AArch64/vector_merge_dep_check.ll index 9220947e836..e4e64ef8c8d 100644 --- a/llvm/test/CodeGen/AArch64/vector_merge_dep_check.ll +++ b/llvm/test/CodeGen/AArch64/vector_merge_dep_check.ll @@ -1,5 +1,4 @@ -; RUN: llc --combiner-alias-analysis=false < %s | FileCheck %s -; RUN: llc --combiner-alias-analysis=true < %s | FileCheck %s +; RUN: llc < %s | FileCheck %s ; This test checks that we do not merge stores together which have ; dependencies through their non-chain operands (e.g. one store is the diff --git a/llvm/test/CodeGen/AMDGPU/debugger-insert-nops.ll b/llvm/test/CodeGen/AMDGPU/debugger-insert-nops.ll index 6638f4e2582..7be7d9486a4 100644 --- a/llvm/test/CodeGen/AMDGPU/debugger-insert-nops.ll +++ b/llvm/test/CodeGen/AMDGPU/debugger-insert-nops.ll @@ -1,13 +1,21 @@ -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECKNOP -; CHECK: test01.cl:2:{{[0-9]+}} -; CHECK-NEXT: s_nop 0 +; This test expects that we have one instance for each line in some order with "s_nop 0" instances after each. -; CHECK: test01.cl:3:{{[0-9]+}} -; CHECK-NEXT: s_nop 0 +; Check that each line appears at least once +; CHECK-DAG: test01.cl:2:3 +; CHECK-DAG: test01.cl:3:3 +; CHECK-DAG: test01.cl:4:3 -; CHECK: test01.cl:4:{{[0-9]+}} -; CHECK-NEXT: s_nop 0 + +; Check that each of each of the lines consists of the line output, followed by "s_nop 0" +; CHECKNOP: test01.cl:{{[234]}}:3 +; CHECKNOP-NEXT: s_nop 0 +; CHECKNOP: test01.cl:{{[234]}}:3 +; CHECKNOP-NEXT: s_nop 0 +; CHECKNOP: test01.cl:{{[234]}}:3 +; CHECKNOP-NEXT: s_nop 0 ; CHECK: test01.cl:5:{{[0-9]+}} ; CHECK-NEXT: s_nop 0 @@ -21,7 +29,7 @@ entry: call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !17, metadata !18), !dbg !19 %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !20 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20 - store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21 + store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !20 %1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !22 %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22 store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index c05a6b907c4..97ef5ce9039 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -253,11 +253,9 @@ define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a ; GCN: buffer_load_ubyte v{{[0-9]+}}, off ; GCN: buffer_load_ubyte v{{[0-9]+}}, off -; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6 -; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:5 ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4 - -; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} +; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:5 +; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6 ; GCN-NO-TONGA: buffer_load_ubyte ; GCN-NO-TONGA: buffer_load_ubyte diff --git a/llvm/test/CodeGen/AMDGPU/merge-stores.ll b/llvm/test/CodeGen/AMDGPU/merge-stores.ll index 07104ebc8c9..fd2d8ee1189 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-stores.ll +++ b/llvm/test/CodeGen/AMDGPU/merge-stores.ll @@ -1,8 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s - -; RUN: llc -march=amdgcn -verify-machineinstrs -combiner-alias-analysis -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -combiner-alias-analysis -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s +; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s ; This test is mostly to test DAG store merging, so disable the vectorizer. ; Run with devices with different unaligned load restrictions. @@ -150,12 +147,7 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { } ; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32: -; GCN-NOAA: buffer_store_dwordx4 v - -; GCN-AA: buffer_store_dwordx2 -; GCN-AA: buffer_store_dword v -; GCN-AA: buffer_store_dword v - +; GCN-AA: buffer_store_dwordx4 v ; GCN: s_endpgm define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 @@ -474,17 +466,9 @@ define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1 ret void } -; This works once AA is enabled on the subtarget ; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32: ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] - -; GCN-NOAA: buffer_store_dword v -; GCN-NOAA: buffer_store_dword v -; GCN-NOAA: buffer_store_dword v -; GCN-NOAA: buffer_store_dword v - -; GCN-AA: buffer_store_dwordx4 [[LOAD]] - +; GCN: buffer_store_dwordx4 [[LOAD]] ; GCN: s_endpgm define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/private-element-size.ll b/llvm/test/CodeGen/AMDGPU/private-element-size.ll index eb487637ce9..9e75cc2bae4 100644 --- a/llvm/test/CodeGen/AMDGPU/private-element-size.ll +++ b/llvm/test/CodeGen/AMDGPU/private-element-size.ll @@ -32,10 +32,10 @@ ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:40{{$}} ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:44{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}} +; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}} +; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}} +; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}} +; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}} define void @private_elt_size_v4i32(<4 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -130,8 +130,8 @@ entry: ; HSA-ELT8: private_element_size = 2 ; HSA-ELT4: private_element_size = 1 -; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16 -; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:24 +; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], s9 offset:1 +; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], s9 offset:2 ; HSA-ELTGE8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll index 4beefb047f2..c1d691fcff8 100644 --- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -157,9 +157,8 @@ define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out, ; FUNC-LABEL: @reorder_local_offsets ; CI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:100 offset1:102 -; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:3 offset1:100 -; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12 -; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408 +; CI-DAG: ds_write2_b32 {{v[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:3 offset1:100 +; CI-DAG: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408 ; CI: buffer_store_dword ; CI: s_endpgm define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 { @@ -181,12 +180,12 @@ define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspa } ; FUNC-LABEL: @reorder_global_offsets -; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 -; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408 -; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 -; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 -; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408 -; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 +; CI-DAG: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 +; CI-DAG: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408 +; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 +; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 +; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408 +; CI: buffer_store_dword ; CI: s_endpgm define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 { %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 3 diff --git a/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll index 4a1341c4d6e..d2ff09a6200 100644 --- a/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll +++ b/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll @@ -12,7 +12,8 @@ define void @test_byval_8_bytes_alignment(i32 %i, ...) { entry: ; CHECK: sub sp, sp, #12 ; CHECK: sub sp, sp, #4 -; CHECK: stmib sp, {r1, r2, r3} +; CHECK: add r0, sp, #4 +; CHECK: stm sp, {r0, r1, r2, r3} %g = alloca i8* %g1 = bitcast i8** %g to i8* call void @llvm.va_start(i8* %g1) diff --git a/llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll b/llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll index 7d37c83d748..0e077b3aee5 100644 --- a/llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll +++ b/llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=NO-REALIGN -; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=REALIGN +; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s ; rdar://12713765 ; When realign-stack is set to false, make sure we are not creating stack @@ -8,29 +7,31 @@ define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" { entry: -; NO-REALIGN-LABEL: test1 -; NO-REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]] -; NO-REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32 -; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48 -; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] - -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48 -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32 -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: mov r[[R3:[0-9]+]], r[[R1]] -; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128]! -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128] - -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0:0]], #48 -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0]], #32 -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]! -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128] +; CHECK-LABEL: test1 +; CHECK: ldr r[[R1:[0-9]+]], [pc, r1] +; CHECK: add r[[R2:[0-9]+]], r1, #48 +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: mov r[[R2:[0-9]+]], r[[R1]] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: mov r[[R1:[0-9]+]], sp +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #48 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #32 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128] %retval = alloca <16 x float>, align 16 %0 = load <16 x float>, <16 x float>* @T3_retval, align 16 store <16 x float> %0, <16 x float>* %retval @@ -41,32 +42,33 @@ entry: define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp { entry: -; REALIGN-LABEL: test2 -; REALIGN: bfc sp, #0, #6 -; REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]] -; REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32 -; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48 -; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: ldr r[[R1:[0-9]+]], [pc, r1] +; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: mov r[[R2:[0-9]+]], r[[R1]] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: mov r[[R1:[0-9]+]], sp +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #32 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #48 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #32 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128] -; REALIGN: orr r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #32 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #16 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] - -; REALIGN: add r[[R1:[0-9]+]], r[[R0:0]], #48 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #32 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]! -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128] - %retval = alloca <16 x float>, align 16 +%retval = alloca <16 x float>, align 16 %0 = load <16 x float>, <16 x float>* @T3_retval, align 16 store <16 x float> %0, <16 x float>* %retval %1 = load <16 x float>, <16 x float>* %retval diff --git a/llvm/test/CodeGen/ARM/gpr-paired-spill.ll b/llvm/test/CodeGen/ARM/gpr-paired-spill.ll index ef3e5a54a2d..797b147d5d0 100644 --- a/llvm/test/CodeGen/ARM/gpr-paired-spill.ll +++ b/llvm/test/CodeGen/ARM/gpr-paired-spill.ll @@ -16,22 +16,22 @@ define void @foo(i64* %addr) { ; an LDMIA was created with both a FrameIndex and an offset, which ; is not allowed. -; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] -; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp] +; CHECK-WITH-LDRD-DAG: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] +; CHECK-WITH-LDRD-DAG: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp] -; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] -; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp] +; CHECK-WITH-LDRD-DAG: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] +; CHECK-WITH-LDRD-DAG: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp] ; We also want to ensure the register scavenger is working (i.e. an ; offset from sp can be generated), so we need two spills. -; CHECK-WITHOUT-LDRD: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}} -; CHECK-WITHOUT-LDRD: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} -; CHECK-WITHOUT-LDRD: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD-DAG: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}} +; CHECK-WITHOUT-LDRD-DAG: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD-DAG: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}} ; In principle LLVM may have to recalculate the offset. At the moment ; it reuses the original though. -; CHECK-WITHOUT-LDRD: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} -; CHECK-WITHOUT-LDRD: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD-DAG: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD-DAG: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}} store volatile i64 %val1, i64* %addr store volatile i64 %val2, i64* %addr diff --git a/llvm/test/CodeGen/ARM/ifcvt10.ll b/llvm/test/CodeGen/ARM/ifcvt10.ll index 5725a404c32..c7e18d35dbe 100644 --- a/llvm/test/CodeGen/ARM/ifcvt10.ll +++ b/llvm/test/CodeGen/ARM/ifcvt10.ll @@ -9,8 +9,6 @@ entry: ; CHECK-LABEL: t: ; CHECK: vpop {d8} ; CHECK-NOT: vpopne -; CHECK: pop {r7, pc} -; CHECK: vpop {d8} ; CHECK: pop {r7, pc} br i1 undef, label %if.else, label %if.then diff --git a/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll b/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll index 892f0982755..bfb2b95c256 100644 --- a/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ b/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -6,9 +6,7 @@ define void @i24_or(i24* %a) { ; LE-LABEL: i24_or: ; LE: @ BB#0: ; LE-NEXT: ldrh r1, [r0] -; LE-NEXT: ldrb r2, [r0, #2] ; LE-NEXT: orr r1, r1, #384 -; LE-NEXT: strb r2, [r0, #2] ; LE-NEXT: strh r1, [r0] ; LE-NEXT: mov pc, lr ; @@ -31,21 +29,19 @@ define void @i24_or(i24* %a) { define void @i24_and_or(i24* %a) { ; LE-LABEL: i24_and_or: ; LE: @ BB#0: -; LE-NEXT: ldrb r2, [r0, #2] ; LE-NEXT: ldrh r1, [r0] -; LE-NEXT: strb r2, [r0, #2] ; LE-NEXT: mov r2, #16256 -; LE-NEXT: orr r1, r1, #384 ; LE-NEXT: orr r2, r2, #49152 +; LE-NEXT: orr r1, r1, #384 ; LE-NEXT: and r1, r1, r2 ; LE-NEXT: strh r1, [r0] ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i24_and_or: ; BE: @ BB#0: +; BE-NEXT: mov r1, #128 +; BE-NEXT: strb r1, [r0, #2] ; BE-NEXT: ldrh r1, [r0] -; BE-NEXT: mov r2, #128 -; BE-NEXT: strb r2, [r0, #2] ; BE-NEXT: orr r1, r1, #1 ; BE-NEXT: strh r1, [r0] ; BE-NEXT: mov pc, lr @@ -59,9 +55,7 @@ define void @i24_and_or(i24* %a) { define void @i24_insert_bit(i24* %a, i1 zeroext %bit) { ; LE-LABEL: i24_insert_bit: ; LE: @ BB#0: -; LE-NEXT: ldrb r3, [r0, #2] ; LE-NEXT: ldrh r2, [r0] -; LE-NEXT: strb r3, [r0, #2] ; LE-NEXT: mov r3, #255 ; LE-NEXT: orr r3, r3, #57088 ; LE-NEXT: and r2, r2, r3 @@ -71,9 +65,7 @@ define void @i24_insert_bit(i24* %a, i1 zeroext %bit) { ; ; BE-LABEL: i24_insert_bit: ; BE: @ BB#0: -; BE-NEXT: ldrb r3, [r0, #2] ; BE-NEXT: ldrh r2, [r0] -; BE-NEXT: strb r3, [r0, #2] ; BE-NEXT: mov r3, #57088 ; BE-NEXT: orr r3, r3, #16711680 ; BE-NEXT: and r2, r3, r2, lsl #8 @@ -93,14 +85,9 @@ define void @i24_insert_bit(i24* %a, i1 zeroext %bit) { define void @i56_or(i56* %a) { ; LE-LABEL: i56_or: ; LE: @ BB#0: -; LE-NEXT: mov r2, r0 -; LE-NEXT: ldr r12, [r0] -; LE-NEXT: ldrh r3, [r2, #4]! -; LE-NEXT: ldrb r1, [r2, #2] -; LE-NEXT: strb r1, [r2, #2] -; LE-NEXT: orr r1, r12, #384 +; LE-NEXT: ldr r1, [r0] +; LE-NEXT: orr r1, r1, #384 ; LE-NEXT: str r1, [r0] -; LE-NEXT: strh r3, [r2] ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i56_or: @@ -128,36 +115,29 @@ define void @i56_or(i56* %a) { define void @i56_and_or(i56* %a) { ; LE-LABEL: i56_and_or: ; LE: @ BB#0: -; LE-NEXT: mov r2, r0 ; LE-NEXT: ldr r1, [r0] -; LE-NEXT: ldrh r12, [r2, #4]! ; LE-NEXT: orr r1, r1, #384 -; LE-NEXT: ldrb r3, [r2, #2] ; LE-NEXT: bic r1, r1, #127 -; LE-NEXT: strb r3, [r2, #2] ; LE-NEXT: str r1, [r0] -; LE-NEXT: strh r12, [r2] ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i56_and_or: ; BE: @ BB#0: -; BE-NEXT: .save {r11, lr} -; BE-NEXT: push {r11, lr} -; BE-NEXT: mov r2, r0 -; BE-NEXT: ldr lr, [r0] +; BE-NEXT: mov r1, r0 ; BE-NEXT: mov r3, #128 -; BE-NEXT: ldrh r12, [r2, #4]! -; BE-NEXT: strb r3, [r2, #2] -; BE-NEXT: lsl r3, r12, #8 -; BE-NEXT: orr r3, r3, lr, lsl #24 -; BE-NEXT: orr r3, r3, #384 -; BE-NEXT: lsr r1, r3, #8 -; BE-NEXT: strh r1, [r2] -; BE-NEXT: bic r1, lr, #255 -; BE-NEXT: orr r1, r1, r3, lsr #24 +; BE-NEXT: ldrh r2, [r1, #4]! +; BE-NEXT: strb r3, [r1, #2] +; BE-NEXT: lsl r2, r2, #8 +; BE-NEXT: ldr r12, [r0] +; BE-NEXT: orr r2, r2, r12, lsl #24 +; BE-NEXT: orr r2, r2, #384 +; BE-NEXT: lsr r3, r2, #8 +; BE-NEXT: strh r3, [r1] +; BE-NEXT: bic r1, r12, #255 +; BE-NEXT: orr r1, r1, r2, lsr #24 ; BE-NEXT: str r1, [r0] -; BE-NEXT: pop {r11, lr} ; BE-NEXT: mov pc, lr + %b = load i56, i56* %a, align 1 %c = and i56 %b, -128 %d = or i56 %c, 384 @@ -168,35 +148,27 @@ define void @i56_and_or(i56* %a) { define void @i56_insert_bit(i56* %a, i1 zeroext %bit) { ; LE-LABEL: i56_insert_bit: ; LE: @ BB#0: -; LE-NEXT: .save {r11, lr} -; LE-NEXT: push {r11, lr} -; LE-NEXT: mov r3, r0 -; LE-NEXT: ldr lr, [r0] -; LE-NEXT: ldrh r12, [r3, #4]! -; LE-NEXT: ldrb r2, [r3, #2] -; LE-NEXT: strb r2, [r3, #2] -; LE-NEXT: bic r2, lr, #8192 +; LE-NEXT: ldr r2, [r0] +; LE-NEXT: bic r2, r2, #8192 ; LE-NEXT: orr r1, r2, r1, lsl #13 ; LE-NEXT: str r1, [r0] -; LE-NEXT: strh r12, [r3] -; LE-NEXT: pop {r11, lr} ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i56_insert_bit: ; BE: @ BB#0: ; BE-NEXT: .save {r11, lr} ; BE-NEXT: push {r11, lr} -; BE-NEXT: mov r3, r0 +; BE-NEXT: mov r2, r0 +; BE-NEXT: ldrh r12, [r2, #4]! +; BE-NEXT: ldrb r3, [r2, #2] +; BE-NEXT: strb r3, [r2, #2] +; BE-NEXT: orr r12, r3, r12, lsl #8 ; BE-NEXT: ldr lr, [r0] -; BE-NEXT: ldrh r12, [r3, #4]! -; BE-NEXT: ldrb r2, [r3, #2] -; BE-NEXT: strb r2, [r3, #2] -; BE-NEXT: orr r2, r2, r12, lsl #8 -; BE-NEXT: orr r2, r2, lr, lsl #24 -; BE-NEXT: bic r2, r2, #8192 -; BE-NEXT: orr r1, r2, r1, lsl #13 -; BE-NEXT: lsr r2, r1, #8 -; BE-NEXT: strh r2, [r3] +; BE-NEXT: orr r3, r12, lr, lsl #24 +; BE-NEXT: bic r3, r3, #8192 +; BE-NEXT: orr r1, r3, r1, lsl #13 +; BE-NEXT: lsr r3, r1, #8 +; BE-NEXT: strh r3, [r2] ; BE-NEXT: bic r2, lr, #255 ; BE-NEXT: orr r1, r2, r1, lsr #24 ; BE-NEXT: str r1, [r0] diff --git a/llvm/test/CodeGen/ARM/static-addr-hoisting.ll b/llvm/test/CodeGen/ARM/static-addr-hoisting.ll index 3d47e02f965..683d607936b 100644 --- a/llvm/test/CodeGen/ARM/static-addr-hoisting.ll +++ b/llvm/test/CodeGen/ARM/static-addr-hoisting.ll @@ -6,9 +6,9 @@ define void @multiple_store() { ; CHECK: movs [[VAL:r[0-9]+]], #42 ; CHECK: movt r[[BASE1]], #15 -; CHECK: str [[VAL]], [r[[BASE1]]] -; CHECK: str [[VAL]], [r[[BASE1]], #24] -; CHECK: str.w [[VAL]], [r[[BASE1]], #42] +; CHECK-DAG: str [[VAL]], [r[[BASE1]]] +; CHECK-DAG: str [[VAL]], [r[[BASE1]], #24] +; CHECK-DAG: str.w [[VAL]], [r[[BASE1]], #42] ; CHECK: movw r[[BASE2:[0-9]+]], #20394 ; CHECK: movt r[[BASE2]], #18 diff --git a/llvm/test/CodeGen/BPF/undef.ll b/llvm/test/CodeGen/BPF/undef.ll index 541d81ea07b..de14bfde1ab 100644 --- a/llvm/test/CodeGen/BPF/undef.ll +++ b/llvm/test/CodeGen/BPF/undef.ll @@ -13,50 +13,55 @@ ; Function Attrs: nounwind uwtable define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" { +; CHECK: r2 = r10 +; CHECK: r2 += -2 +; CHECK: r1 = 0 +; CHECK: *(u16 *)(r2 + 6) = r1 +; CHECK: *(u16 *)(r2 + 4) = r1 +; CHECK: *(u16 *)(r2 + 2) = r1 +; CHECK: r2 = 6 +; CHECK: *(u8 *)(r10 - 7) = r2 +; CHECK: r2 = 5 +; CHECK: *(u8 *)(r10 - 8) = r2 +; CHECK: r2 = 7 +; CHECK: *(u8 *)(r10 - 6) = r2 +; CHECK: r2 = 8 +; CHECK: *(u8 *)(r10 - 5) = r2 +; CHECK: r2 = 9 +; CHECK: *(u8 *)(r10 - 4) = r2 +; CHECK: r2 = 10 +; CHECK: *(u8 *)(r10 - 3) = r2 +; CHECK: *(u16 *)(r10 + 24) = r1 +; CHECK: *(u16 *)(r10 + 22) = r1 +; CHECK: *(u16 *)(r10 + 20) = r1 +; CHECK: *(u16 *)(r10 + 18) = r1 +; CHECK: *(u16 *)(r10 + 16) = r1 +; CHECK: *(u16 *)(r10 + 14) = r1 +; CHECK: *(u16 *)(r10 + 12) = r1 +; CHECK: *(u16 *)(r10 + 10) = r1 +; CHECK: *(u16 *)(r10 + 8) = r1 +; CHECK: *(u16 *)(r10 + 6) = r1 +; CHECK: *(u16 *)(r10 - 2) = r1 +; CHECK: *(u16 *)(r10 + 26) = r1 +; CHECK: r2 = r10 +; CHECK: r2 += -8 +; CHECK: r1 = ll +; CHECK: call bpf_map_lookup_elem +; CHECK: exit %key = alloca %struct.routing_key_2, align 1 %1 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 0 -; CHECK: r1 = 5 -; CHECK: *(u8 *)(r10 - 8) = r1 store i8 5, i8* %1, align 1 %2 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 1 -; CHECK: r1 = 6 -; CHECK: *(u8 *)(r10 - 7) = r1 store i8 6, i8* %2, align 1 %3 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 2 -; CHECK: r1 = 7 -; CHECK: *(u8 *)(r10 - 6) = r1 store i8 7, i8* %3, align 1 %4 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 3 -; CHECK: r1 = 8 -; CHECK: *(u8 *)(r10 - 5) = r1 store i8 8, i8* %4, align 1 %5 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 4 -; CHECK: r1 = 9 -; CHECK: *(u8 *)(r10 - 4) = r1 store i8 9, i8* %5, align 1 %6 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 5 -; CHECK: r1 = 10 -; CHECK: *(u8 *)(r10 - 3) = r1 store i8 10, i8* %6, align 1 %7 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 1, i32 0, i64 0 -; CHECK: r1 = r10 -; CHECK: r1 += -2 -; CHECK: r2 = 0 -; CHECK: *(u16 *)(r1 + 6) = r2 -; CHECK: *(u16 *)(r1 + 4) = r2 -; CHECK: *(u16 *)(r1 + 2) = r2 -; CHECK: *(u16 *)(r10 + 24) = r2 -; CHECK: *(u16 *)(r10 + 22) = r2 -; CHECK: *(u16 *)(r10 + 20) = r2 -; CHECK: *(u16 *)(r10 + 18) = r2 -; CHECK: *(u16 *)(r10 + 16) = r2 -; CHECK: *(u16 *)(r10 + 14) = r2 -; CHECK: *(u16 *)(r10 + 12) = r2 -; CHECK: *(u16 *)(r10 + 10) = r2 -; CHECK: *(u16 *)(r10 + 8) = r2 -; CHECK: *(u16 *)(r10 + 6) = r2 -; CHECK: *(u16 *)(r10 - 2) = r2 -; CHECK: *(u16 *)(r10 + 26) = r2 call void @llvm.memset.p0i8.i64(i8* %7, i8 0, i64 30, i32 1, i1 false) %8 = call i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...) bitcast (i32 (...)* @bpf_map_lookup_elem to i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...)*)(%struct.bpf_map_def* nonnull @routing, %struct.routing_key_2* nonnull %key) #3 ret i32 undef diff --git a/llvm/test/CodeGen/MSP430/Inst16mm.ll b/llvm/test/CodeGen/MSP430/Inst16mm.ll index c75e1beb235..a48d8592c1a 100644 --- a/llvm/test/CodeGen/MSP430/Inst16mm.ll +++ b/llvm/test/CodeGen/MSP430/Inst16mm.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=msp430 -combiner-alias-analysis < %s | FileCheck %s +; RUN: llc -march=msp430 < %s | FileCheck %s target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8" target triple = "msp430-generic-generic" @foo = common global i16 0, align 2 diff --git a/llvm/test/CodeGen/Mips/cconv/arguments-float.ll b/llvm/test/CodeGen/Mips/cconv/arguments-float.ll index 7d32992ecb1..004f6d94749 100644 --- a/llvm/test/CodeGen/Mips/cconv/arguments-float.ll +++ b/llvm/test/CodeGen/Mips/cconv/arguments-float.ll @@ -63,39 +63,39 @@ entry: ; NEW-DAG: sd $5, 16([[R2]]) ; O32 has run out of argument registers and starts using the stack -; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 24($sp) -; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 28($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 16($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 20($sp) ; O32-DAG: sw [[R3]], 24([[R2]]) ; O32-DAG: sw [[R4]], 28([[R2]]) ; NEW-DAG: sd $6, 24([[R2]]) -; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 32($sp) -; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 36($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 24($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 28($sp) ; O32-DAG: sw [[R3]], 32([[R2]]) ; O32-DAG: sw [[R4]], 36([[R2]]) ; NEW-DAG: sd $7, 32([[R2]]) -; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 40($sp) -; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 44($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 32($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 36($sp) ; O32-DAG: sw [[R3]], 40([[R2]]) ; O32-DAG: sw [[R4]], 44([[R2]]) ; NEW-DAG: sd $8, 40([[R2]]) -; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 48($sp) -; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 52($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 40($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 44($sp) ; O32-DAG: sw [[R3]], 48([[R2]]) ; O32-DAG: sw [[R4]], 52([[R2]]) ; NEW-DAG: sd $9, 48([[R2]]) -; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 56($sp) -; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 60($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 48($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 52($sp) ; O32-DAG: sw [[R3]], 56([[R2]]) ; O32-DAG: sw [[R4]], 60([[R2]]) ; NEW-DAG: sd $10, 56([[R2]]) ; N32/N64 have run out of registers and starts using the stack too -; O32-DAG: lw [[R3:\$[0-9]+]], 64($sp) -; O32-DAG: lw [[R4:\$[0-9]+]], 68($sp) +; O32-DAG: lw [[R3:\$[0-9]+]], 56($sp) +; O32-DAG: lw [[R4:\$[0-9]+]], 60($sp) ; O32-DAG: sw [[R3]], 64([[R2]]) ; O32-DAG: sw [[R4]], 68([[R2]]) ; NEW-DAG: ld [[R3:\$[0-9]+]], 0($sp) diff --git a/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll b/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll index 785188b3c51..d662128945f 100644 --- a/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll +++ b/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll @@ -315,12 +315,11 @@ entry: ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte ; order. ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) -; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA_TMP2]]) ; O32-DAG: sw [[ARG1]], 8([[GV]]) -; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) -; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 -; O32-DAG: sw [[VA2]], 0([[SP]]) -; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4 +; O32-DAG: sw [[VA3]], 0([[SP]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA_TMP2]]) ; O32-DAG: sw [[ARG1]], 12([[GV]]) ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) @@ -349,10 +348,9 @@ entry: ; Load the second argument from the variable portion and copy it to the global. ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) ; O32-DAG: sw [[ARG2]], 16([[GV]]) -; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) -; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 -; O32-DAG: sw [[VA2]], 0([[SP]]) -; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4 +; O32-DAG: sw [[VA3]], 0([[SP]]) +; O32-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA_TMP2]]) ; O32-DAG: sw [[ARG2]], 20([[GV]]) ; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]]) @@ -678,12 +676,11 @@ entry: ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte ; order. ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) -; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA_TMP2]]) ; O32-DAG: sw [[ARG1]], 8([[GV]]) -; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) -; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 -; O32-DAG: sw [[VA2]], 0([[SP]]) -; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4 +; O32-DAG: sw [[VA3]], 0([[SP]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA_TMP2]]) ; O32-DAG: sw [[ARG1]], 12([[GV]]) ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) @@ -712,10 +709,9 @@ entry: ; Load the second argument from the variable portion and copy it to the global. ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) ; O32-DAG: sw [[ARG2]], 16([[GV]]) -; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) -; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4 ; O32-DAG: sw [[VA2]], 0([[SP]]) -; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA_TMP2]]) ; O32-DAG: sw [[ARG2]], 20([[GV]]) ; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]]) @@ -1040,10 +1036,9 @@ entry: ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) ; O32-DAG: sw [[ARG1]], 8([[GV]]) -; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) -; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 -; O32-DAG: sw [[VA2]], 0([[SP]]) -; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4 +; O32-DAG: sw [[VA3]], 0([[SP]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA_TMP2]]) ; O32-DAG: sw [[ARG1]], 12([[GV]]) ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) @@ -1072,10 +1067,9 @@ entry: ; Load the second argument from the variable portion and copy it to the global. ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) ; O32-DAG: sw [[ARG2]], 16([[GV]]) -; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) -; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 -; O32-DAG: sw [[VA2]], 0([[SP]]) -; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4 +; O32-DAG: sw [[VA3]], 0([[SP]]) +; O32-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA_TMP2]]) ; O32-DAG: sw [[ARG2]], 20([[GV]]) ; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]]) diff --git a/llvm/test/CodeGen/Mips/fastcc.ll b/llvm/test/CodeGen/Mips/fastcc.ll index 13abc20eb3e..fb1bc4d9a8a 100644 --- a/llvm/test/CodeGen/Mips/fastcc.ll +++ b/llvm/test/CodeGen/Mips/fastcc.ll @@ -132,20 +132,19 @@ entry: define internal fastcc void @callee0(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) nounwind noinline { entry: ; CHECK: callee0 -; CHECK: sw $4 -; CHECK: sw $5 -; CHECK: sw $6 -; CHECK: sw $7 -; CHECK: sw $8 -; CHECK: sw $9 -; CHECK: sw $10 -; CHECK: sw $11 -; CHECK: sw $12 -; CHECK: sw $13 -; CHECK: sw $14 -; CHECK: sw $15 -; CHECK: sw $24 -; CHECK: sw $3 +; CHECK-DAG: sw $4 +; CHECK-DAG: sw $5 +; CHECK-DAG: sw $7 +; CHECK-DAG: sw $8 +; CHECK-DAG: sw $9 +; CHECK-DAG: sw $10 +; CHECK-DAG: sw $11 +; CHECK-DAG: sw $12 +; CHECK-DAG: sw $13 +; CHECK-DAG: sw $14 +; CHECK-DAG: sw $15 +; CHECK-DAG: sw $24 +; CHECK-DAG: sw $3 ; t6, t7 and t8 are reserved in NaCl and cannot be used for fastcc. ; CHECK-NACL-NOT: sw $14 @@ -223,27 +222,27 @@ entry: define internal fastcc void @callee1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind noinline { entry: -; CHECK: callee1 -; CHECK: swc1 $f0 -; CHECK: swc1 $f1 -; CHECK: swc1 $f2 -; CHECK: swc1 $f3 -; CHECK: swc1 $f4 -; CHECK: swc1 $f5 -; CHECK: swc1 $f6 -; CHECK: swc1 $f7 -; CHECK: swc1 $f8 -; CHECK: swc1 $f9 -; CHECK: swc1 $f10 -; CHECK: swc1 $f11 -; CHECK: swc1 $f12 -; CHECK: swc1 $f13 -; CHECK: swc1 $f14 -; CHECK: swc1 $f15 -; CHECK: swc1 $f16 -; CHECK: swc1 $f17 -; CHECK: swc1 $f18 -; CHECK: swc1 $f19 +; CHECK-LABEL: callee1: +; CHECK-DAG: swc1 $f0 +; CHECK-DAG: swc1 $f1 +; CHECK-DAG: swc1 $f2 +; CHECK-DAG: swc1 $f3 +; CHECK-DAG: swc1 $f4 +; CHECK-DAG: swc1 $f5 +; CHECK-DAG: swc1 $f6 +; CHECK-DAG: swc1 $f7 +; CHECK-DAG: swc1 $f8 +; CHECK-DAG: swc1 $f9 +; CHECK-DAG: swc1 $f10 +; CHECK-DAG: swc1 $f11 +; CHECK-DAG: swc1 $f12 +; CHECK-DAG: swc1 $f13 +; CHECK-DAG: swc1 $f14 +; CHECK-DAG: swc1 $f15 +; CHECK-DAG: swc1 $f16 +; CHECK-DAG: swc1 $f17 +; CHECK-DAG: swc1 $f18 +; CHECK-DAG: swc1 $f19 store float %a0, float* @gf0, align 4 store float %a1, float* @gf1, align 4 @@ -316,8 +315,6 @@ entry: ; NOODDSPREG-LABEL: callee2: -; NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]] - ; Check that first 10 arguments are received in even float registers ; f0, f2, ... , f18. Check that 11th argument is received on stack. @@ -333,7 +330,7 @@ entry: ; NOODDSPREG-DAG: swc1 $f16, 32($[[R0]]) ; NOODDSPREG-DAG: swc1 $f18, 36($[[R0]]) -; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp) +; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], 0($sp) ; NOODDSPREG-DAG: swc1 $[[F0]], 40($[[R0]]) store float %a0, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 0), align 4 @@ -397,7 +394,6 @@ entry: ; FP64-NOODDSPREG-LABEL: callee3: -; FP64-NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]] ; Check that first 10 arguments are received in even float registers ; f0, f2, ... , f18. Check that 11th argument is received on stack. @@ -414,7 +410,7 @@ entry: ; FP64-NOODDSPREG-DAG: sdc1 $f16, 64($[[R0]]) ; FP64-NOODDSPREG-DAG: sdc1 $f18, 72($[[R0]]) -; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp) +; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], 0($sp) ; FP64-NOODDSPREG-DAG: sdc1 $[[F0]], 80($[[R0]]) store double %a0, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 0), align 8 diff --git a/llvm/test/CodeGen/Mips/load-store-left-right.ll b/llvm/test/CodeGen/Mips/load-store-left-right.ll index 3bd924a8120..6def55cf883 100644 --- a/llvm/test/CodeGen/Mips/load-store-left-right.ll +++ b/llvm/test/CodeGen/Mips/load-store-left-right.ll @@ -250,12 +250,18 @@ entry: ; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s0)( ; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s0)( -; FIXME: We should be able to do better than this on MIPS32r6/MIPS64r6 since -; we have unaligned halfword load/store available -; ALL-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) -; ALL-DAG: sb $[[R1]], 2($[[PTR]]) -; ALL-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]]) -; ALL-DAG: sb $[[R1]], 3($[[PTR]]) +; MIPS32-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32-DAG: sb $[[R1]], 2($[[PTR]]) +; MIPS32-DAG: lbu $[[R2:[0-9]+]], 1($[[PTR]]) +; MIPS32-DAG: sb $[[R2]], 3($[[PTR]]) + +; MIPS32R6: lhu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32R6: sh $[[R1]], 2($[[PTR]]) + +; MIPS64-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64-DAG: sb $[[R1]], 2($[[PTR]]) +; MIPS64-DAG: lbu $[[R2:[0-9]+]], 1($[[PTR]]) +; MIPS64-DAG: sb $[[R2]], 3($[[PTR]]) %0 = load %struct.S0, %struct.S0* getelementptr inbounds (%struct.S0, %struct.S0* @struct_s0, i32 0), align 1 store %struct.S0 %0, %struct.S0* getelementptr inbounds (%struct.S0, %struct.S0* @struct_s0, i32 1), align 1 @@ -268,37 +274,54 @@ entry: ; MIPS32-EL: lw $[[PTR:[0-9]+]], %got(struct_s1)( ; MIPS32-EB: lw $[[PTR:[0-9]+]], %got(struct_s1)( -; MIPS32-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS32-DAG: sb $[[R1]], 4($[[PTR]]) -; MIPS32-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]]) -; MIPS32-DAG: sb $[[R1]], 5($[[PTR]]) -; MIPS32-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]]) -; MIPS32-DAG: sb $[[R1]], 6($[[PTR]]) -; MIPS32-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]]) -; MIPS32-DAG: sb $[[R1]], 7($[[PTR]]) +; MIPS32-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]]) +; MIPS32-EL-DAG: lwr $[[R1]], 0($[[PTR]]) +; MIPS32-EL-DAG: swl $[[R1]], 7($[[PTR]]) +; MIPS32-EL-DAG: swr $[[R1]], 4($[[PTR]]) +; MIPS32-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32-EB-DAG: lwr $[[R1]], 3($[[PTR]]) +; MIPS32-EB-DAG: swl $[[R1]], 4($[[PTR]]) +; MIPS32-EB-DAG: swr $[[R1]], 7($[[PTR]]) + +; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 4($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 5($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 6($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 7($[[PTR]]) ; MIPS32R6: lw $[[PTR:[0-9]+]], %got(struct_s1)( -; MIPS32R6-DAG: lhu $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS32R6-DAG: sh $[[R1]], 4($[[PTR]]) -; MIPS32R6-DAG: lhu $[[R1:[0-9]+]], 2($[[PTR]]) -; MIPS32R6-DAG: sh $[[R1]], 6($[[PTR]]) +; MIPS32R6-DAG: lw $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32R6-DAG: sw $[[R1]], 4($[[PTR]]) ; MIPS64-EL: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)( ; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)( -; MIPS64-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS64-DAG: sb $[[R1]], 4($[[PTR]]) -; MIPS64-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]]) -; MIPS64-DAG: sb $[[R1]], 5($[[PTR]]) -; MIPS64-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]]) -; MIPS64-DAG: sb $[[R1]], 6($[[PTR]]) -; MIPS64-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]]) -; MIPS64-DAG: sb $[[R1]], 7($[[PTR]]) + +; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]]) +; MIPS64-EL-DAG: lwr $[[R1]], 0($[[PTR]]) +; MIPS64-EL-DAG: swl $[[R1]], 7($[[PTR]]) +; MIPS64-EL-DAG: swr $[[R1]], 4($[[PTR]]) + +; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]]) +; MIPS64-EB-DAG: swl $[[R1]], 4($[[PTR]]) +; MIPS64-EB-DAG: swr $[[R1]], 7($[[PTR]]) + + +; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 4($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 5($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 6($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 7($[[PTR]]) ; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)( -; MIPS64R6-DAG: lhu $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS64R6-DAG: sh $[[R1]], 4($[[PTR]]) -; MIPS64R6-DAG: lhu $[[R1:[0-9]+]], 2($[[PTR]]) -; MIPS64R6-DAG: sh $[[R1]], 6($[[PTR]]) +; MIPS64R6-DAG: lw $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64R6-DAG: sw $[[R1]], 4($[[PTR]]) %0 = load %struct.S1, %struct.S1* getelementptr inbounds (%struct.S1, %struct.S1* @struct_s1, i32 0), align 1 store %struct.S1 %0, %struct.S1* getelementptr inbounds (%struct.S1, %struct.S1* @struct_s1, i32 1), align 1 @@ -336,30 +359,21 @@ entry: ; MIPS32R6-DAG: sw $[[R1]], 12($[[PTR]]) ; MIPS64-EL: ld $[[PTR:[0-9]+]], %got_disp(struct_s2)( -; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]]) -; MIPS64-EL-DAG: lwr $[[R1]], 0($[[PTR]]) -; MIPS64-EL-DAG: swl $[[R1]], 11($[[PTR]]) -; MIPS64-EL-DAG: swr $[[R1]], 8($[[PTR]]) -; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 7($[[PTR]]) -; MIPS64-EL-DAG: lwr $[[R1]], 4($[[PTR]]) -; MIPS64-EL-DAG: swl $[[R1]], 15($[[PTR]]) -; MIPS64-EL-DAG: swr $[[R1]], 12($[[PTR]]) + +; MIPS64-EL-DAG: ldl $[[R1:[0-9]+]], 7($[[PTR]]) +; MIPS64-EL-DAG: ldr $[[R1]], 0($[[PTR]]) +; MIPS64-EL-DAG: sdl $[[R1]], 15($[[PTR]]) +; MIPS64-EL-DAG: sdr $[[R1]], 8($[[PTR]]) ; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s2)( -; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]]) -; MIPS64-EB-DAG: swl $[[R1]], 8($[[PTR]]) -; MIPS64-EB-DAG: swr $[[R1]], 11($[[PTR]]) -; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 4($[[PTR]]) -; MIPS64-EB-DAG: lwr $[[R1]], 7($[[PTR]]) -; MIPS64-EB-DAG: swl $[[R1]], 12($[[PTR]]) -; MIPS64-EB-DAG: swr $[[R1]], 15($[[PTR]]) +; MIPS64-EB-DAG: ldl $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64-EB-DAG: ldr $[[R1]], 7($[[PTR]]) +; MIPS64-EB-DAG: sdl $[[R1]], 8($[[PTR]]) +; MIPS64-EB-DAG: sdr $[[R1]], 15($[[PTR]]) ; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s2)( -; MIPS64R6-DAG: lw $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS64R6-DAG: sw $[[R1]], 8($[[PTR]]) -; MIPS64R6-DAG: lw $[[R1:[0-9]+]], 4($[[PTR]]) -; MIPS64R6-DAG: sw $[[R1]], 12($[[PTR]]) +; MIPS64R6-DAG: ld $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64R6-DAG: sd $[[R1]], 8($[[PTR]]) %0 = load %struct.S2, %struct.S2* getelementptr inbounds (%struct.S2, %struct.S2* @struct_s2, i32 0), align 1 store %struct.S2 %0, %struct.S2* getelementptr inbounds (%struct.S2, %struct.S2* @struct_s2, i32 1), align 1 @@ -416,17 +430,17 @@ entry: ; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]]) ; MIPS64-EL-DAG: lwr $[[R1]], 0($[[PTR]]) -; MIPS64-EB: ld $[[SPTR:[0-9]+]], %got_disp(arr)( -; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]]) -; MIPS64-EB-DAG: dsll $[[R1]], $[[R1]], 32 +; MIPS64-EB: ld $[[SPTR:[0-9]+]], %got_disp(arr)( ; MIPS64-EB-DAG: lbu $[[R2:[0-9]+]], 5($[[PTR]]) ; MIPS64-EB-DAG: lbu $[[R3:[0-9]+]], 4($[[PTR]]) ; MIPS64-EB-DAG: dsll $[[T0:[0-9]+]], $[[R3]], 8 ; MIPS64-EB-DAG: or $[[T1:[0-9]+]], $[[T0]], $[[R2]] -; MIPS64-EB-DAG: dsll $[[T1]], $[[T1]], 16 -; MIPS64-EB-DAG: or $[[T3:[0-9]+]], $[[R1]], $[[T1]] ; MIPS64-EB-DAG: lbu $[[R4:[0-9]+]], 6($[[PTR]]) +; MIPS64-EB-DAG: dsll $[[T1]], $[[T1]], 16 +; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]]) +; MIPS64-EB-DAG: dsll $[[R5:[0-9]+]], $[[R1]], 32 +; MIPS64-EB-DAG: or $[[T3:[0-9]+]], $[[R5]], $[[T1]] ; MIPS64-EB-DAG: dsll $[[T4:[0-9]+]], $[[R4]], 8 ; MIPS64-EB-DAG: or $4, $[[T3]], $[[T4]] diff --git a/llvm/test/CodeGen/Mips/micromips-li.ll b/llvm/test/CodeGen/Mips/micromips-li.ll index ac315f93825..997f4e9196a 100644 --- a/llvm/test/CodeGen/Mips/micromips-li.ll +++ b/llvm/test/CodeGen/Mips/micromips-li.ll @@ -13,6 +13,6 @@ entry: ret i32 0 } -; CHECK: li16 ${{[2-7]|16|17}}, 1 ; CHECK: addiu ${{[0-9]+}}, $zero, 2148 +; CHECK: li16 ${{[2-7]|16|17}}, 1 ; CHECK: ori ${{[0-9]+}}, $zero, 33332 diff --git a/llvm/test/CodeGen/Mips/mips64-f128-call.ll b/llvm/test/CodeGen/Mips/mips64-f128-call.ll index c59f25ef4af..19fa8fc7524 100644 --- a/llvm/test/CodeGen/Mips/mips64-f128-call.ll +++ b/llvm/test/CodeGen/Mips/mips64-f128-call.ll @@ -4,8 +4,8 @@ @gld1 = external global fp128 ; CHECK: foo0 -; CHECK: sdc1 $f12, %lo(gld0)(${{[0-9]+}}) -; CHECK: sdc1 $f13, 8(${{[0-9]+}}) +; CHECK-DAG: sdc1 $f12, %lo(gld0)(${{[0-9]+}}) +; CHECK-DAG: sdc1 $f13, 8(${{[0-9]+}}) define void @foo0(fp128 %a0) { entry: @@ -14,8 +14,8 @@ entry: } ; CHECK: foo1 -; CHECK: ldc1 $f12, %lo(gld0)(${{[0-9]+}}) -; CHECK: ldc1 $f13, 8(${{[0-9]+}}) +; CHECK-DAG: ldc1 $f12, %lo(gld0)(${{[0-9]+}}) +; CHECK-DAG: ldc1 $f13, 8(${{[0-9]+}}) define void @foo1() { entry: @@ -26,11 +26,11 @@ entry: declare void @foo2(fp128) + ; CHECK: foo3: -; CHECK: daddiu $[[R0:[0-9]+]], ${{[0-9]+}}, %hi(gld0) -; CHECK: dsll $[[R1:[0-9]+]], $[[R0]], 16 + +; CHECK: daddiu $[[R2:[0-9]+]], $[[R1:[0-9]+]], %lo(gld0) ; CHECK: sdc1 $f0, %lo(gld0)($[[R1]]) -; CHECK: daddiu $[[R2:[0-9]]], $[[R1]], %lo(gld0) ; CHECK: sdc1 $f2, 8($[[R2]]) ; CHECK: daddiu $[[R3:[0-9]+]], ${{[0-9]+}}, %hi(gld1) ; CHECK: dsll $[[R4:[0-9]+]], $[[R3]], 16 @@ -39,7 +39,6 @@ declare void @foo2(fp128) ; CHECK: ldc1 $f2, 8($[[R5]]) - define fp128 @foo3() { entry: %call = tail call fp128 @foo4() diff --git a/llvm/test/CodeGen/Mips/mips64-f128.ll b/llvm/test/CodeGen/Mips/mips64-f128.ll index 304ab8b90d3..237b4c5f1ee 100644 --- a/llvm/test/CodeGen/Mips/mips64-f128.ll +++ b/llvm/test/CodeGen/Mips/mips64-f128.ll @@ -577,10 +577,10 @@ entry: ; ALL-LABEL: store_LD_LD: ; ALL: ld $[[R0:[0-9]+]], %got_disp(gld1) -; ALL: ld $[[R1:[0-9]+]], 0($[[R0]]) ; ALL: ld $[[R2:[0-9]+]], 8($[[R0]]) ; ALL: ld $[[R3:[0-9]+]], %got_disp(gld0) ; ALL: sd $[[R2]], 8($[[R3]]) +; ALL: ld $[[R1:[0-9]+]], 0($[[R0]]) ; ALL: sd $[[R1]], 0($[[R3]]) define void @store_LD_LD() { diff --git a/llvm/test/CodeGen/Mips/mno-ldc1-sdc1.ll b/llvm/test/CodeGen/Mips/mno-ldc1-sdc1.ll index 9663138d4c8..0260afaa186 100644 --- a/llvm/test/CodeGen/Mips/mno-ldc1-sdc1.ll +++ b/llvm/test/CodeGen/Mips/mno-ldc1-sdc1.ll @@ -130,12 +130,12 @@ ; MM-MNO-PIC: addiu $[[R1:[0-9]+]], $[[R0]], %lo(_gp_disp) ; MM-MNO-PIC: addu $[[R2:[0-9]+]], $[[R1]], $25 ; MM-MNO-PIC: lw $[[R3:[0-9]+]], %got(g0)($[[R2]]) -; MM-MNO-PIC: lw16 $[[R4:[0-9]+]], 0($[[R3]]) -; MM-MNO-PIC: lw16 $[[R5:[0-9]+]], 4($[[R3]]) -; MM-MNO-LE-PIC: mtc1 $[[R4]], $f0 -; MM-MNO-LE-PIC: mthc1 $[[R5]], $f0 -; MM-MNO-BE-PIC: mtc1 $[[R5]], $f0 -; MM-MNO-BE-PIC: mthc1 $[[R4]], $f0 +; MM-MNO-PIC-DAG: lw16 $[[R4:[0-9]+]], 0($[[R3]]) +; MM-MNO-PIC-DAG: lw16 $[[R5:[0-9]+]], 4($[[R3]]) +; MM-MNO-LE-PIC-DAG: mtc1 $[[R4]], $f0 +; MM-MNO-LE-PIC-DAG: mthc1 $[[R5]], $f0 +; MM-MNO-BE-PIC-DAG: mtc1 $[[R5]], $f0 +; MM-MNO-BE-PIC-DAG: mthc1 $[[R4]], $f0 ; MM-STATIC-PIC: lui $[[R0:[0-9]+]], %hi(g0) ; MM-STATIC-PIC: ldc1 $f0, %lo(g0)($[[R0]]) @@ -214,13 +214,13 @@ entry: ; MM-MNO-PIC: lui $[[R0:[0-9]+]], %hi(_gp_disp) ; MM-MNO-PIC: addiu $[[R1:[0-9]+]], $[[R0]], %lo(_gp_disp) ; MM-MNO-PIC: addu $[[R2:[0-9]+]], $[[R1]], $25 -; MM-MNO-LE-PIC: mfc1 $[[R3:[0-9]+]], $f12 -; MM-MNO-BE-PIC: mfhc1 $[[R3:[0-9]+]], $f12 -; MM-MNO-PIC: lw $[[R4:[0-9]+]], %got(g0)($[[R2]]) -; MM-MNO-PIC: sw16 $[[R3]], 0($[[R4]]) -; MM-MNO-LE-PIC: mfhc1 $[[R5:[0-9]+]], $f12 -; MM-MNO-BE-PIC: mfc1 $[[R5:[0-9]+]], $f12 -; MM-MNO-PIC: sw16 $[[R5]], 4($[[R4]]) +; MM-MNO-LE-PIC-DAG: mfc1 $[[R3:[0-9]+]], $f12 +; MM-MNO-BE-PIC-DAG: mfhc1 $[[R3:[0-9]+]], $f12 +; MM-MNO-PIC-DAG: lw $[[R4:[0-9]+]], %got(g0)($[[R2]]) +; MM-MNO-PIC-DAG: sw16 $[[R3]], 0($[[R4]]) +; MM-MNO-LE-PIC-DAG: mfhc1 $[[R5:[0-9]+]], $f12 +; MM-MNO-BE-PIC-DAG: mfc1 $[[R5:[0-9]+]], $f12 +; MM-MNO-PIC-DAG: sw16 $[[R5]], 4($[[R4]]) ; MM-STATIC-PIC: lui $[[R0:[0-9]+]], %hi(g0) ; MM-STATIC-PIC: sdc1 $f12, %lo(g0)($[[R0]]) @@ -267,8 +267,8 @@ entry: ; MM-MNO-PIC: sll16 $[[R0:[0-9]+]], $5, 3 ; MM-MNO-PIC: addu16 $[[R1:[0-9]+]], $4, $[[R0]] -; MM-MNO-PIC: lw16 $[[R2:[0-9]+]], 0($[[R1]]) -; MM-MNO-PIC: lw16 $[[R3:[0-9]+]], 4($[[R1]]) +; MM-MNO-PIC-DAG: lw16 $[[R2:[0-9]+]], 0($[[R1]]) +; MM-MNO-PIC-DAG: lw16 $[[R3:[0-9]+]], 4($[[R1]]) ; MM-MNO-LE-PIC: mtc1 $[[R2]], $f0 ; MM-MNO-LE-PIC: mthc1 $[[R3]], $f0 ; MM-MNO-BE-PIC: mtc1 $[[R3]], $f0 @@ -313,14 +313,14 @@ entry: ; MM: addu16 $[[R1:[0-9]+]], $6, $[[R0]] ; MM: sdc1 $f12, 0($[[R1]]) -; MM-MNO-PIC: sll16 $[[R0:[0-9]+]], $7, 3 -; MM-MNO-PIC: addu16 $[[R1:[0-9]+]], $6, $[[R0]] -; MM-MNO-LE-PIC: mfc1 $[[R2:[0-9]+]], $f12 -; MM-MNO-BE-PIC: mfhc1 $[[R2:[0-9]+]], $f12 -; MM-MNO-PIC: sw16 $[[R2]], 0($[[R1]]) -; MM-MNO-LE-PIC: mfhc1 $[[R3:[0-9]+]], $f12 -; MM-MNO-BE-PIC: mfc1 $[[R3:[0-9]+]], $f12 -; MM-MNO-PIC: sw16 $[[R3]], 4($[[R1]]) +; MM-MNO-PIC: sll16 $[[R0:[0-9]+]], $7, 3 +; MM-MNO-PIC: addu16 $[[R1:[0-9]+]], $6, $[[R0]] +; MM-MNO-LE-PIC-DAG: mfc1 $[[R2:[0-9]+]], $f12 +; MM-MNO-BE-PIC-DAG: mfhc1 $[[R2:[0-9]+]], $f12 +; MM-MNO-PIC-DAG: sw16 $[[R2]], 0($[[R1]]) +; MM-MNO-LE-PIC-DAG: mfhc1 $[[R3:[0-9]+]], $f12 +; MM-MNO-BE-PIC-DAG: mfc1 $[[R3:[0-9]+]], $f12 +; MM-MNO-PIC-DAG: sw16 $[[R3]], 4($[[R1]]) ; MM-STATIC-PIC: sll16 $[[R0:[0-9]+]], $7, 3 ; MM-STATIC-PIC: addu16 $[[R1:[0-9]+]], $6, $[[R0]] diff --git a/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll b/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll index 9957d5be26e..ac69dc913c1 100644 --- a/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll +++ b/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll @@ -234,15 +234,15 @@ entry: ; MIPS32: insert.w $w[[W0]][1], $[[R1]] ; MIPS32: insert.w $w[[W0]][3], $[[R1]] -; MIPS64-N64: ld $[[R3:[0-9]+]], %got_disp(h) -; MIPS64-N32: lw $[[R3:[0-9]+]], %got_disp(h) -; MIPS64: dmfc1 $[[R1:[0-9]+]], $f[[F2]] -; MIPS64: fill.d $w[[W0:[0-9]+]], $[[R1]] +; MIPS64-N64-DAG: ld $[[R3:[0-9]+]], %got_disp(h) +; MIPS64-N32-DAG: lw $[[R3:[0-9]+]], %got_disp(h) +; MIPS64-DAG: dmfc1 $[[R1:[0-9]+]], $f[[F2]] +; MIPS64-DAG: fill.d $w[[W0:[0-9]+]], $[[R1]] -; ALL: fexdo.w $w[[W1:[0-9]+]], $w[[W0]], $w[[W0]] -; ALL: fexdo.h $w[[W2:[0-9]+]], $w[[W1]], $w[[W1]] +; ALL-DAG: fexdo.w $w[[W1:[0-9]+]], $w[[W0]], $w[[W0]] +; ALL-DAG: fexdo.h $w[[W2:[0-9]+]], $w[[W1]], $w[[W1]] -; MIPS32: lw $[[R3:[0-9]+]], %got(h) +; MIPS32-DAG: lw $[[R3:[0-9]+]], %got(h) ; ALL: copy_u.h $[[R2:[0-9]+]], $w[[W2]] ; ALL: sh $[[R2]], 0($[[R3]]) diff --git a/llvm/test/CodeGen/Mips/msa/i5_ld_st.ll b/llvm/test/CodeGen/Mips/msa/i5_ld_st.ll index c644d242a00..812c400d46e 100644 --- a/llvm/test/CodeGen/Mips/msa/i5_ld_st.ll +++ b/llvm/test/CodeGen/Mips/msa/i5_ld_st.ll @@ -336,8 +336,8 @@ entry: ; CHECK: llvm_mips_st_b_valid_range_tests: ; CHECK: ld.b -; CHECK: st.b [[R1:\$w[0-9]+]], -512( -; CHECK: st.b [[R1:\$w[0-9]+]], 511( +; CHECK-DAG: st.b [[R1:\$w[0-9]+]], -512( +; CHECK-DAG: st.b [[R1:\$w[0-9]+]], 511( ; CHECK: .size llvm_mips_st_b_valid_range_tests ; @@ -351,10 +351,10 @@ entry: } ; CHECK: llvm_mips_st_b_invalid_range_tests: -; CHECK: addiu $2, $1, -513 +; CHECK: addiu $2, $1, 512 ; CHECK: ld.b ; CHECK: st.b [[R1:\$w[0-9]+]], 0( -; CHECK: addiu $1, $1, 512 +; CHECK: addiu $1, $1, -513 ; CHECK: st.b [[R1:\$w[0-9]+]], 0( ; CHECK: .size llvm_mips_st_b_invalid_range_tests ; @@ -404,8 +404,8 @@ entry: ; CHECK: llvm_mips_st_h_valid_range_tests: ; CHECK: ld.h -; CHECK: st.h [[R1:\$w[0-9]+]], -1024( -; CHECK: st.h [[R1:\$w[0-9]+]], 1022( +; CHECK-DAG: st.h [[R1:\$w[0-9]+]], -1024( +; CHECK-DAG: st.h [[R1:\$w[0-9]+]], 1022( ; CHECK: .size llvm_mips_st_h_valid_range_tests ; @@ -419,10 +419,10 @@ entry: } ; CHECK: llvm_mips_st_h_invalid_range_tests: -; CHECK: addiu $2, $1, -1026 +; CHECK: addiu $2, $1, 1024 ; CHECK: ld.h ; CHECK: st.h [[R1:\$w[0-9]+]], 0( -; CHECK: addiu $1, $1, 1024 +; CHECK: addiu $1, $1, -1026 ; CHECK: st.h [[R1:\$w[0-9]+]], 0( ; CHECK: .size llvm_mips_st_h_invalid_range_tests ; @@ -472,8 +472,8 @@ entry: ; CHECK: llvm_mips_st_w_valid_range_tests: ; CHECK: ld.w -; CHECK: st.w [[R1:\$w[0-9]+]], -2048( -; CHECK: st.w [[R1:\$w[0-9]+]], 2044( +; CHECK-DAG: st.w [[R1:\$w[0-9]+]], -2048( +; CHECK-DAG: st.w [[R1:\$w[0-9]+]], 2044( ; CHECK: .size llvm_mips_st_w_valid_range_tests ; @@ -487,10 +487,10 @@ entry: } ; CHECK: llvm_mips_st_w_invalid_range_tests: -; CHECK: addiu $2, $1, -2052 +; CHECK: addiu $2, $1, 2048 ; CHECK: ld.w ; CHECK: st.w [[R1:\$w[0-9]+]], 0( -; CHECK: addiu $1, $1, 2048 +; CHECK: addiu $1, $1, -2052 ; CHECK: st.w [[R1:\$w[0-9]+]], 0( ; CHECK: .size llvm_mips_st_w_invalid_range_tests ; @@ -540,8 +540,8 @@ entry: ; CHECK: llvm_mips_st_d_valid_range_tests: ; CHECK: ld.d -; CHECK: st.d [[R1:\$w[0-9]+]], -4096( -; CHECK: st.d [[R1:\$w[0-9]+]], 4088( +; CHECK-DAG: st.d [[R1:\$w[0-9]+]], -4096( +; CHECK-DAG: st.d [[R1:\$w[0-9]+]], 4088( ; CHECK: .size llvm_mips_st_d_valid_range_tests ; @@ -555,10 +555,10 @@ entry: } ; CHECK: llvm_mips_st_d_invalid_range_tests: -; CHECK: addiu $2, $1, -4104 +; CHECK: addiu $2, $1, 4096 ; CHECK: ld.d ; CHECK: st.d [[R1:\$w[0-9]+]], 0( -; CHECK: addiu $1, $1, 4096 +; CHECK: addiu $1, $1, -4104 ; CHECK: st.d [[R1:\$w[0-9]+]], 0( ; CHECK: .size llvm_mips_st_d_invalid_range_tests ; diff --git a/llvm/test/CodeGen/Mips/o32_cc_byval.ll b/llvm/test/CodeGen/Mips/o32_cc_byval.ll index 33431dba43c..eadf4abfc75 100644 --- a/llvm/test/CodeGen/Mips/o32_cc_byval.ll +++ b/llvm/test/CodeGen/Mips/o32_cc_byval.ll @@ -45,20 +45,18 @@ declare void @callee3(float, %struct.S3* byval, %struct.S1* byval) define void @f2(float %f, %struct.S1* nocapture byval %s1) nounwind { entry: ; CHECK: addiu $sp, $sp, -48 -; CHECK: sw $7, 60($sp) -; CHECK: sw $6, 56($sp) -; CHECK: lw $4, 80($sp) -; CHECK: ldc1 $f[[F0:[0-9]+]], 72($sp) -; CHECK: lw $[[R3:[0-9]+]], 64($sp) -; CHECK: lw $[[R4:[0-9]+]], 68($sp) -; CHECK: lw $[[R2:[0-9]+]], 60($sp) -; CHECK: lh $[[R1:[0-9]+]], 58($sp) -; CHECK: lb $[[R0:[0-9]+]], 56($sp) -; CHECK: sw $[[R0]], 32($sp) -; CHECK: sw $[[R1]], 28($sp) -; CHECK: sw $[[R2]], 24($sp) -; CHECK: sw $[[R4]], 20($sp) -; CHECK: sw $[[R3]], 16($sp) +; CHECK-DAG: sw $7, 60($sp) +; CHECK-DAG: sw $6, 56($sp) +; CHECK-DAG: ldc1 $f[[F0:[0-9]+]], 72($sp) +; CHECK-DAG: lw $[[R3:[0-9]+]], 64($sp) +; CHECK-DAG: lw $[[R4:[0-9]+]], 68($sp) +; CHECK-DAG: lh $[[R1:[0-9]+]], 58($sp) +; CHECK-DAG: lb $[[R0:[0-9]+]], 56($sp) +; CHECK-DAG: sw $[[R0]], 32($sp) +; CHECK-DAG: sw $[[R1]], 28($sp) +; CHECK-DAG: sw $[[R4]], 20($sp) +; CHECK-DAG: sw $[[R3]], 16($sp) +; CHECK-DAG: sw $7, 24($sp) ; CHECK: mfc1 $6, $f[[F0]] %i2 = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 5 @@ -82,13 +80,11 @@ declare void @callee4(i32, double, i64, i32, i16 signext, i8 signext, float) define void @f3(%struct.S2* nocapture byval %s2) nounwind { entry: ; CHECK: addiu $sp, $sp, -48 -; CHECK: sw $7, 60($sp) -; CHECK: sw $6, 56($sp) -; CHECK: sw $5, 52($sp) -; CHECK: sw $4, 48($sp) -; CHECK: lw $4, 48($sp) -; CHECK: lw $[[R0:[0-9]+]], 60($sp) -; CHECK: sw $[[R0]], 24($sp) +; CHECK-DAG: sw $7, 60($sp) +; CHECK-DAG: sw $6, 56($sp) +; CHECK-DAG: sw $5, 52($sp) +; CHECK-DAG: sw $4, 48($sp) +; CHECK-DAG: sw $7, 24($sp) %arrayidx = getelementptr inbounds %struct.S2, %struct.S2* %s2, i32 0, i32 0, i32 0 %tmp = load i32, i32* %arrayidx, align 4 @@ -101,14 +97,14 @@ entry: define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind { entry: ; CHECK: addiu $sp, $sp, -48 -; CHECK: sw $7, 60($sp) -; CHECK: sw $6, 56($sp) -; CHECK: sw $5, 52($sp) -; CHECK: lw $4, 60($sp) -; CHECK: lw $[[R1:[0-9]+]], 80($sp) -; CHECK: lb $[[R0:[0-9]+]], 52($sp) -; CHECK: sw $[[R0]], 32($sp) -; CHECK: sw $[[R1]], 24($sp) +; CHECK-DAG: sw $7, 60($sp) +; CHECK-DAG: sw $6, 56($sp) +; CHECK-DAG: sw $5, 52($sp) +; CHECK-DAG: lw $[[R1:[0-9]+]], 80($sp) +; CHECK-DAG: lb $[[R0:[0-9]+]], 52($sp) +; CHECK-DAG: sw $[[R0]], 32($sp) +; CHECK-DAG: sw $[[R1]], 24($sp) +; CHECK: move $4, $7 %i = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 2 %tmp = load i32, i32* %i, align 4 diff --git a/llvm/test/CodeGen/Mips/o32_cc_vararg.ll b/llvm/test/CodeGen/Mips/o32_cc_vararg.ll index 1dc3a927d93..73aad48b73e 100644 --- a/llvm/test/CodeGen/Mips/o32_cc_vararg.ll +++ b/llvm/test/CodeGen/Mips/o32_cc_vararg.ll @@ -29,9 +29,9 @@ entry: ; CHECK-LABEL: va1: ; CHECK: addiu $sp, $sp, -16 +; CHECK: sw $5, 20($sp) ; CHECK: sw $7, 28($sp) ; CHECK: sw $6, 24($sp) -; CHECK: sw $5, 20($sp) ; CHECK: lw $2, 20($sp) } @@ -83,8 +83,8 @@ entry: ; CHECK-LABEL: va3: ; CHECK: addiu $sp, $sp, -16 -; CHECK: sw $7, 28($sp) ; CHECK: sw $6, 24($sp) +; CHECK: sw $7, 28($sp) ; CHECK: lw $2, 24($sp) } diff --git a/llvm/test/CodeGen/PowerPC/anon_aggr.ll b/llvm/test/CodeGen/PowerPC/anon_aggr.ll index f4e788849ec..9b32a8f55f3 100644 --- a/llvm/test/CodeGen/PowerPC/anon_aggr.ll +++ b/llvm/test/CodeGen/PowerPC/anon_aggr.ll @@ -60,33 +60,34 @@ equal: unequal: ret i8* %array2_ptr } - ; CHECK-LABEL: func2: -; CHECK: ld [[REG2:[0-9]+]], 72(1) -; CHECK: cmpld {{([0-9]+,)?}}4, [[REG2]] -; CHECK-DAG: std [[REG2]], -[[OFFSET1:[0-9]+]] +; CHECK: cmpld {{([0-9]+,)?}}4, 6 +; CHECK-DAG: std 6, 72(1) +; CHECK-DAG: std 5, 64(1) +; CHECK-DAG: std 6, -[[OFFSET1:[0-9]+]] ; CHECK-DAG: std 4, -[[OFFSET2:[0-9]+]] ; CHECK: ld 3, -[[OFFSET2]](1) ; CHECK: ld 3, -[[OFFSET1]](1) -; DARWIN32: _func2: -; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 36 -; DARWIN32: lwz r[[REG2:[0-9]+]], 44(r[[REGSP]]) +; DARWIN32-LABEL: _func2 +; DARWIN32-DAG: addi r[[REG8:[0-9]+]], r[[REGSP:[0-9]+]], 36 +; DARWIN32-DAG: lwz r[[REG2:[0-9]+]], 44(r[[REGSP]]) ; DARWIN32: mr -; DARWIN32: mr r[[REG3:[0-9]+]], r[[REGA:[0-9]+]] -; DARWIN32: cmplw {{(cr[0-9]+,)?}}r[[REGA]], r[[REG2]] -; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]] -; DARWIN32: stw r[[REG2]], -[[OFFSET2:[0-9]+]] -; DARWIN32: lwz r3, -[[OFFSET1]] -; DARWIN32: lwz r3, -[[OFFSET2]] +; DARWIN32: mr r[[REG7:[0-9]+]], r5 +; DARWIN32-DAG: cmplw {{(cr[0-9]+,)?}}r5, r[[REG2]] +; DARWIN32-DAG: stw r[[REG7]], -[[OFFSET1:[0-9]+]] +; DARWIN32-DAG: stw r[[REG2]], -[[OFFSET2:[0-9]+]] +; DARWIN32-DAG: lwz r3, -[[OFFSET1]] +; DARWIN32-DAG: lwz r3, -[[OFFSET2]] + ; DARWIN64: _func2: ; DARWIN64: ld r[[REG2:[0-9]+]], 72(r1) ; DARWIN64: mr ; DARWIN64: mr r[[REG3:[0-9]+]], r[[REGA:[0-9]+]] ; DARWIN64: cmpld {{(cr[0-9]+,)?}}r[[REGA]], r[[REG2]] -; DARWIN64: std r[[REG3]], -[[OFFSET1:[0-9]+]] ; DARWIN64: std r[[REG2]], -[[OFFSET2:[0-9]+]] +; DARWIN64: std r[[REG3]], -[[OFFSET1:[0-9]+]] ; DARWIN64: ld r3, -[[OFFSET1]] ; DARWIN64: ld r3, -[[OFFSET2]] @@ -106,24 +107,24 @@ unequal: } ; CHECK-LABEL: func3: -; CHECK: ld [[REG3:[0-9]+]], 72(1) -; CHECK: ld [[REG4:[0-9]+]], 56(1) -; CHECK: cmpld {{([0-9]+,)?}}[[REG4]], [[REG3]] -; CHECK: std [[REG3]], -[[OFFSET1:[0-9]+]](1) -; CHECK: std [[REG4]], -[[OFFSET2:[0-9]+]](1) +; CHECK: cmpld {{([0-9]+,)?}}4, 6 +; CHECK-DAG: std 4, -[[OFFSET2:[0-9]+]](1) +; CHECK-DAG: std 6, -[[OFFSET1:[0-9]+]](1) ; CHECK: ld 3, -[[OFFSET2]](1) ; CHECK: ld 3, -[[OFFSET1]](1) -; DARWIN32: _func3: -; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 36 -; DARWIN32: addi r[[REG2:[0-9]+]], r[[REGSP]], 24 -; DARWIN32: lwz r[[REG3:[0-9]+]], 44(r[[REGSP]]) -; DARWIN32: lwz r[[REG4:[0-9]+]], 32(r[[REGSP]]) -; DARWIN32: cmplw {{(cr[0-9]+,)?}}r[[REG4]], r[[REG3]] -; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]] -; DARWIN32: stw r[[REG4]], -[[OFFSET2:[0-9]+]] -; DARWIN32: lwz r3, -[[OFFSET2]] -; DARWIN32: lwz r3, -[[OFFSET1]] +; DARWIN32-LABEL: _func3: +; DARWIN32-DAG: stw r[[REG8:[0-9]+]], 44(r[[REGSP:[0-9]+]]) +; DARWIN32-DAG: stw r[[REG5:[0-9]+]], 32(r[[REGSP]]) +; DARWIN32-DAG: addi r[[REG5a:[0-9]+]], r[[REGSP:[0-9]+]], 36 +; DARWIN32-DAG: addi r[[REG8a:[0-9]+]], r[[REGSP]], 24 +; DARWIN32-DAG: lwz r[[REG5a:[0-9]+]], 44(r[[REGSP]]) +; DARWIN32-DAG: lwz r[[REG8a:[0-9]+]], 32(r[[REGSP]]) +; DARWIN32-DAG: cmplw {{(cr[0-9]+,)?}}r[[REG8a]], r[[REG5a]] +; DARWIN32-DAG: stw r[[REG5a]], -[[OFFSET1:[0-9]+]] +; DARWIN32-DAG: stw r[[REG8a]], -[[OFFSET2:[0-9]+]] +; DARWIN32-DAG: lwz r3, -[[OFFSET1:[0-9]+]] +; DARWIN32-DAG: lwz r3, -[[OFFSET2:[0-9]+]] ; DARWIN64: _func3: ; DARWIN64: ld r[[REG3:[0-9]+]], 72(r1) diff --git a/llvm/test/CodeGen/PowerPC/complex-return.ll b/llvm/test/CodeGen/PowerPC/complex-return.ll index f6097e65512..ec87a89b110 100644 --- a/llvm/test/CodeGen/PowerPC/complex-return.ll +++ b/llvm/test/CodeGen/PowerPC/complex-return.ll @@ -24,10 +24,10 @@ entry: } ; CHECK-LABEL: foo: -; CHECK: lfd 1 -; CHECK: lfd 2 -; CHECK: lfd 3 -; CHECK: lfd 4 +; CHECK-DAG: lfd 1 +; CHECK-DAG: lfd 2 +; CHECK-DAG: lfd 3 +; CHECK_DAG: lfd 4 define { float, float } @oof() nounwind { entry: @@ -50,6 +50,6 @@ entry: } ; CHECK-LABEL: oof: -; CHECK: lfs 2 -; CHECK: lfs 1 +; CHECK-DAG: lfs 2 +; CHECK-DAG: lfs 1 diff --git a/llvm/test/CodeGen/PowerPC/jaggedstructs.ll b/llvm/test/CodeGen/PowerPC/jaggedstructs.ll index b28b34d7814..6128316f45f 100644 --- a/llvm/test/CodeGen/PowerPC/jaggedstructs.ll +++ b/llvm/test/CodeGen/PowerPC/jaggedstructs.ll @@ -18,31 +18,31 @@ entry: ret void } -; CHECK: std 6, 184(1) -; CHECK: std 5, 176(1) -; CHECK: std 4, 168(1) -; CHECK: std 3, 160(1) -; CHECK: lbz {{[0-9]+}}, 167(1) -; CHECK: lhz {{[0-9]+}}, 165(1) -; CHECK: stb {{[0-9]+}}, 55(1) -; CHECK: sth {{[0-9]+}}, 53(1) -; CHECK: lbz {{[0-9]+}}, 175(1) -; CHECK: lwz {{[0-9]+}}, 171(1) -; CHECK: stb {{[0-9]+}}, 63(1) -; CHECK: stw {{[0-9]+}}, 59(1) -; CHECK: lhz {{[0-9]+}}, 182(1) -; CHECK: lwz {{[0-9]+}}, 178(1) -; CHECK: sth {{[0-9]+}}, 70(1) -; CHECK: stw {{[0-9]+}}, 66(1) -; CHECK: lbz {{[0-9]+}}, 191(1) -; CHECK: lhz {{[0-9]+}}, 189(1) -; CHECK: lwz {{[0-9]+}}, 185(1) -; CHECK: stb {{[0-9]+}}, 79(1) -; CHECK: sth {{[0-9]+}}, 77(1) -; CHECK: stw {{[0-9]+}}, 73(1) -; CHECK: ld 6, 72(1) -; CHECK: ld 5, 64(1) -; CHECK: ld 4, 56(1) -; CHECK: ld 3, 48(1) +; CHECK-DAG: std 3, 160(1) +; CHECK-DAG: std 6, 184(1) +; CHECK-DAG: std 5, 176(1) +; CHECK-DAG: std 4, 168(1) +; CHECK-DAG: lbz {{[0-9]+}}, 167(1) +; CHECK-DAG: lhz {{[0-9]+}}, 165(1) +; CHECK-DAG: stb {{[0-9]+}}, 55(1) +; CHECK-DAG-DAG: sth {{[0-9]+}}, 53(1) +; CHECK-DAG: lbz {{[0-9]+}}, 175(1) +; CHECK-DAG: lwz {{[0-9]+}}, 171(1) +; CHECK-DAG: stb {{[0-9]+}}, 63(1) +; CHECK-DAG: stw {{[0-9]+}}, 59(1) +; CHECK-DAG: lhz {{[0-9]+}}, 182(1) +; CHECK-DAG: lwz {{[0-9]+}}, 178(1) +; CHECK-DAG: sth {{[0-9]+}}, 70(1) +; CHECK-DAG: stw {{[0-9]+}}, 66(1) +; CHECK-DAG: lbz {{[0-9]+}}, 191(1) +; CHECK-DAG: lhz {{[0-9]+}}, 189(1) +; CHECK-DAG: lwz {{[0-9]+}}, 185(1) +; CHECK-DAG: stb {{[0-9]+}}, 79(1) +; CHECK-DAG: sth {{[0-9]+}}, 77(1) +; CHECK-DAG: stw {{[0-9]+}}, 73(1) +; CHECK-DAG: ld 6, 72(1) +; CHECK-DAG: ld 5, 64(1) +; CHECK-DAG: ld 4, 56(1) +; CHECK-DAG: ld 3, 48(1) declare void @check(%struct.S3* byval, %struct.S5* byval, %struct.S6* byval, %struct.S7* byval) diff --git a/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll index c3cccd5b293..d59dc64dcf8 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll @@ -1,6 +1,6 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=-vsx < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-P9 %s ; Verify internal alignment of long double in a struct. The double ; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain @@ -19,19 +19,44 @@ entry: ret ppc_fp128 %0 } +; The additional stores are caused because we forward the value in the +; store->load->bitcast path to make a store and bitcast of the same +; value. Since the target does bitcast through memory and we no longer +; remember the address we need to do the store in a fresh local +; address. + ; CHECK-DAG: std 6, 72(1) ; CHECK-DAG: std 5, 64(1) ; CHECK-DAG: std 4, 56(1) ; CHECK-DAG: std 3, 48(1) -; CHECK: lfd 1, 64(1) -; CHECK: lfd 2, 72(1) + +; CHECK-DAG: std 5, -16(1) +; CHECK-DAG: std 6, -8(1) +; CHECK-DAG: lfd 1, -16(1) +; CHECK-DAG: lfd 2, -8(1) + +; FIXMECHECK: lfd 1, 64(1) +; FIXMECHECK: lfd 2, 72(1) ; CHECK-VSX-DAG: std 6, 72(1) ; CHECK-VSX-DAG: std 5, 64(1) ; CHECK-VSX-DAG: std 4, 56(1) ; CHECK-VSX-DAG: std 3, 48(1) -; CHECK-VSX: li 3, 16 -; CHECK-VSX: addi 4, 1, 48 -; CHECK-VSX: lxsdx 1, 4, 3 -; CHECK-VSX: li 3, 24 -; CHECK-VSX: lxsdx 2, 4, 3 +; CHECK-VSX-DAG: std 5, -16(1) +; CHECK-VSX-DAG: std 6, -8(1) +; CHECK-VSX: addi 3, 1, -16 +; CHECK-VSX: lxsdx 1, 0, 3 +; CHECK-VSX: addi 3, 1, -8 +; CHECK-VSX: lxsdx 2, 0, 3 + +; FIXME-VSX: addi 4, 1, 48 +; FIXME-VSX: lxsdx 1, 4, 3 +; FIXME-VSX: li 3, 24 +; FIXME-VSX: lxsdx 2, 4, 3 + +; CHECK-P9: std 6, 72(1) +; CHECK-P9: std 5, 64(1) +; CHECK-P9: std 4, 56(1) +; CHECK-P9: std 3, 48(1) +; CHECK-P9: mtvsrd 1, 5 +; CHECK-P9: mtvsrd 2, 6 diff --git a/llvm/test/CodeGen/PowerPC/structsinmem.ll b/llvm/test/CodeGen/PowerPC/structsinmem.ll index 3777f3ec5ba..01b0848e707 100644 --- a/llvm/test/CodeGen/PowerPC/structsinmem.ll +++ b/llvm/test/CodeGen/PowerPC/structsinmem.ll @@ -113,13 +113,13 @@ entry: %add13 = add nsw i32 %add11, %6 ret i32 %add13 -; CHECK: lha {{[0-9]+}}, 126(1) -; CHECK: lha {{[0-9]+}}, 132(1) -; CHECK: lbz {{[0-9]+}}, 119(1) -; CHECK: lwz {{[0-9]+}}, 140(1) -; CHECK: lwz {{[0-9]+}}, 144(1) -; CHECK: lwz {{[0-9]+}}, 152(1) -; CHECK: lwz {{[0-9]+}}, 160(1) +; CHECK-DAG: lha {{[0-9]+}}, 126(1) +; CHECK-DAG: lha {{[0-9]+}}, 132(1) +; CHECK-DAG: lbz {{[0-9]+}}, 119(1) +; CHECK-DAG: lwz {{[0-9]+}}, 140(1) +; CHECK-DAG: lwz {{[0-9]+}}, 144(1) +; CHECK-DAG: lwz {{[0-9]+}}, 152(1) +; CHECK-DAG: lwz {{[0-9]+}}, 160(1) } define i32 @caller2() nounwind { @@ -205,11 +205,11 @@ entry: %add13 = add nsw i32 %add11, %6 ret i32 %add13 -; CHECK: lha {{[0-9]+}}, 126(1) -; CHECK: lha {{[0-9]+}}, 133(1) -; CHECK: lbz {{[0-9]+}}, 119(1) -; CHECK: lwz {{[0-9]+}}, 140(1) -; CHECK: lwz {{[0-9]+}}, 147(1) -; CHECK: lwz {{[0-9]+}}, 154(1) -; CHECK: lwz {{[0-9]+}}, 161(1) +; CHECK-DAG: lha {{[0-9]+}}, 126(1) +; CHECK-DAG: lha {{[0-9]+}}, 133(1) +; CHECK-DAG: lbz {{[0-9]+}}, 119(1) +; CHECK-DAG: lwz {{[0-9]+}}, 140(1) +; CHECK-DAG: lwz {{[0-9]+}}, 147(1) +; CHECK-DAG: lwz {{[0-9]+}}, 154(1) +; CHECK-DAG: lwz {{[0-9]+}}, 161(1) } diff --git a/llvm/test/CodeGen/PowerPC/structsinregs.ll b/llvm/test/CodeGen/PowerPC/structsinregs.ll index e27041dd4c8..54679f259e9 100644 --- a/llvm/test/CodeGen/PowerPC/structsinregs.ll +++ b/llvm/test/CodeGen/PowerPC/structsinregs.ll @@ -59,6 +59,7 @@ entry: %call = call i32 @callee1(%struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7) ret i32 %call +; CHECK-LABEL: caller1 ; CHECK: ld 9, 112(31) ; CHECK: ld 8, 120(31) ; CHECK: ld 7, 128(31) @@ -97,20 +98,21 @@ entry: %add13 = add nsw i32 %add11, %6 ret i32 %add13 -; CHECK: std 9, 96(1) -; CHECK: std 8, 88(1) -; CHECK: std 7, 80(1) -; CHECK: stw 6, 76(1) -; CHECK: stw 5, 68(1) -; CHECK: sth 4, 62(1) -; CHECK: stb 3, 55(1) -; CHECK: lha {{[0-9]+}}, 62(1) -; CHECK: lha {{[0-9]+}}, 68(1) -; CHECK: lbz {{[0-9]+}}, 55(1) -; CHECK: lwz {{[0-9]+}}, 76(1) -; CHECK: lwz {{[0-9]+}}, 80(1) -; CHECK: lwz {{[0-9]+}}, 88(1) -; CHECK: lwz {{[0-9]+}}, 96(1) +; CHECK-LABEL: callee1 +; CHECK-DAG: std 9, 96(1) +; CHECK-DAG: std 8, 88(1) +; CHECK-DAG: std 7, 80(1) +; CHECK-DAG: stw 6, 76(1) +; CHECK-DAG: stw 5, 68(1) +; CHECK-DAG: sth 4, 62(1) +; CHECK-DAG: stb 3, 55(1) +; CHECK-DAG: lha {{[0-9]+}}, 62(1) +; CHECK-DAG: lha {{[0-9]+}}, 68(1) +; CHECK-DAG: lbz {{[0-9]+}}, 55(1) +; CHECK-DAG: lwz {{[0-9]+}}, 76(1) +; CHECK-DAG: lwz {{[0-9]+}}, 80(1) +; CHECK-DAG: lwz {{[0-9]+}}, 88(1) +; CHECK-DAG: lwz {{[0-9]+}}, 96(1) } define i32 @caller2() nounwind { @@ -139,6 +141,7 @@ entry: %call = call i32 @callee2(%struct.t1* byval %p1, %struct.t2* byval %p2, %struct.t3* byval %p3, %struct.t4* byval %p4, %struct.t5* byval %p5, %struct.t6* byval %p6, %struct.t7* byval %p7) ret i32 %call +; CHECK-LABEL: caller2 ; CHECK: stb {{[0-9]+}}, 71(1) ; CHECK: sth {{[0-9]+}}, 69(1) ; CHECK: stb {{[0-9]+}}, 87(1) @@ -184,18 +187,19 @@ entry: %add13 = add nsw i32 %add11, %6 ret i32 %add13 -; CHECK: std 9, 96(1) -; CHECK: std 8, 88(1) -; CHECK: std 7, 80(1) -; CHECK: stw 6, 76(1) -; CHECK: std 5, 64(1) -; CHECK: sth 4, 62(1) -; CHECK: stb 3, 55(1) -; CHECK: lha {{[0-9]+}}, 62(1) -; CHECK: lha {{[0-9]+}}, 69(1) -; CHECK: lbz {{[0-9]+}}, 55(1) -; CHECK: lwz {{[0-9]+}}, 76(1) -; CHECK: lwz {{[0-9]+}}, 83(1) -; CHECK: lwz {{[0-9]+}}, 90(1) -; CHECK: lwz {{[0-9]+}}, 97(1) +; CHECK-LABEL: callee2 +; CHECK-DAG: std 9, 96(1) +; CHECK-DAG: std 8, 88(1) +; CHECK-DAG: std 7, 80(1) +; CHECK-DAG: stw 6, 76(1) +; CHECK-DAG: std 5, 64(1) +; CHECK-DAG: sth 4, 62(1) +; CHECK-DAG: stb 3, 55(1) +; CHECK-DAG: lha {{[0-9]+}}, 62(1) +; CHECK-DAG: lha {{[0-9]+}}, 69(1) +; CHECK-DAG: lbz {{[0-9]+}}, 55(1) +; CHECK-DAG: lwz {{[0-9]+}}, 76(1) +; CHECK-DAG: lwz {{[0-9]+}}, 83(1) +; CHECK-DAG: lwz {{[0-9]+}}, 90(1) +; CHECK-DAG: lwz {{[0-9]+}}, 97(1) } diff --git a/llvm/test/CodeGen/SystemZ/unaligned-01.ll b/llvm/test/CodeGen/SystemZ/unaligned-01.ll index 94cad0e1743..2af1aa79a23 100644 --- a/llvm/test/CodeGen/SystemZ/unaligned-01.ll +++ b/llvm/test/CodeGen/SystemZ/unaligned-01.ll @@ -1,10 +1,7 @@ ; Check that unaligned accesses are allowed in general. We check the ; few exceptions (like CRL) in their respective test files. ; -; FIXME: -combiner-alias-analysis (the default for SystemZ) stops -; f1 from being optimized. -; RUN: llc < %s -mtriple=s390x-linux-gnu -combiner-alias-analysis=false \ -; RUN: | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; Check that these four byte stores become a single word store. define void @f1(i8 *%ptr) { diff --git a/llvm/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/llvm/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll index 2f8e36b66b8..08349a31dfa 100644 --- a/llvm/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll +++ b/llvm/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll @@ -9,9 +9,9 @@ define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind { ; CHECK: bl ___muldf3 -; CHECK: bl ___muldf3 ; CHECK: beq LBB0 ; CHECK: bl ___muldf3 +; CHECK: bl ___muldf3 ;