diff options
Diffstat (limited to 'llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll')
-rw-r--r-- | llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll | 280 |
1 files changed, 280 insertions, 0 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll new file mode 100644 index 00000000000..4d28e06f252 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll @@ -0,0 +1,280 @@ +; RUN: opt -S -codegenprepare < %s | FileCheck %s + +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +@x = external global [1 x [2 x <4 x float>]] + +; Can we sink single addressing mode computation to use? +define void @test1(i1 %cond, i64* %base) { +; CHECK-LABEL: @test1 +; CHECK: getelementptr inbounds i8, {{.+}} 40 +entry: + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %v = load i32, i32* %casted, align 4 + br label %fallthrough + +fallthrough: + ret void +} + +declare void @foo(i32) + +; Make sure sinking two copies of addressing mode into different blocks works +define void @test2(i1 %cond, i64* %base) { +; CHECK-LABEL: @test2 +entry: + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: +; CHECK-LABEL: if.then: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %v1 = load i32, i32* %casted, align 4 + call void @foo(i32 %v1) + %cmp = icmp eq i32 %v1, 0 + br i1 %cmp, label %next, label %fallthrough + +next: +; CHECK-LABEL: next: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %v2 = load i32, i32* %casted, align 4 + call void @foo(i32 %v2) + br label %fallthrough + +fallthrough: + ret void +} + +; If we have two loads in the same block, only need one copy of addressing mode +; - instruction selection will duplicate if needed +define void @test3(i1 %cond, i64* %base) { +; CHECK-LABEL: @test3 +entry: + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: +; CHECK-LABEL: if.then: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %v1 = load i32, i32* %casted, align 4 + call void @foo(i32 %v1) +; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 + %v2 = load i32, i32* %casted, align 4 + call void @foo(i32 %v2) + br label %fallthrough + +fallthrough: + ret void +} + +; Can we still sink addressing mode if there's a cold use of the +; address itself? +define void @test4(i1 %cond, i64* %base) { +; CHECK-LABEL: @test4 +entry: + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: +; CHECK-LABEL: if.then: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %v1 = load i32, i32* %casted, align 4 + call void @foo(i32 %v1) + %cmp = icmp eq i32 %v1, 0 + br i1 %cmp, label %rare.1, label %fallthrough + +fallthrough: + ret void + +rare.1: +; CHECK-LABEL: rare.1: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + call void @slowpath(i32 %v1, i32* %casted) cold + br label %fallthrough +} + +; Negative test - don't want to duplicate addressing into hot path +define void @test5(i1 %cond, i64* %base) { +; CHECK-LABEL: @test5 +entry: +; CHECK: %addr = getelementptr inbounds + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: +; CHECK-LABEL: if.then: +; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 + %v1 = load i32, i32* %casted, align 4 + call void @foo(i32 %v1) + %cmp = icmp eq i32 %v1, 0 + br i1 %cmp, label %rare.1, label %fallthrough + +fallthrough: + ret void + +rare.1: + call void @slowpath(i32 %v1, i32* %casted) ;; NOT COLD + br label %fallthrough +} + +; Negative test - opt for size +define void @test6(i1 %cond, i64* %base) minsize { +; CHECK-LABEL: @test6 +entry: +; CHECK: %addr = getelementptr + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: +; CHECK-LABEL: if.then: +; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 + %v1 = load i32, i32* %casted, align 4 + call void @foo(i32 %v1) + %cmp = icmp eq i32 %v1, 0 + br i1 %cmp, label %rare.1, label %fallthrough + +fallthrough: + ret void + +rare.1: + call void @slowpath(i32 %v1, i32* %casted) cold + br label %fallthrough +} + + +; Make sure sinking two copies of addressing mode into different blocks works +; when there are cold paths for each. +define void @test7(i1 %cond, i64* %base) { +; CHECK-LABEL: @test7 +entry: + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: +; CHECK-LABEL: if.then: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %v1 = load i32, i32* %casted, align 4 + call void @foo(i32 %v1) + %cmp = icmp eq i32 %v1, 0 + br i1 %cmp, label %rare.1, label %next + +next: +; CHECK-LABEL: next: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %v2 = load i32, i32* %casted, align 4 + call void @foo(i32 %v2) + %cmp2 = icmp eq i32 %v2, 0 + br i1 %cmp2, label %rare.1, label %fallthrough + +fallthrough: + ret void + +rare.1: +; CHECK-LABEL: rare.1: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + call void @slowpath(i32 %v1, i32* %casted) cold + br label %next + +rare.2: +; CHECK-LABEL: rare.2: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + call void @slowpath(i32 %v2, i32* %casted) cold + br label %fallthrough +} + +declare void @slowpath(i32, i32*) + +; Make sure we don't end up in an infinite loop after we fail to sink. +; CHECK-LABEL: define void @test8 +; CHECK: %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef +define void @test8() { +allocas: + %aFOO_load = load float*, float** undef + %aFOO_load_ptr2int = ptrtoint float* %aFOO_load to i64 + %aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0 + %aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to i8* + %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef + br label %load.i145 + +load.i145: + %ptr.i143 = bitcast i8* %ptr to <4 x float>* + %valall.i144 = load <4 x float>, <4 x float>* %ptr.i143, align 4 + %x_offset = getelementptr [1 x [2 x <4 x float>]], [1 x [2 x <4 x float>]]* @x, i32 0, i64 0 + br label %pl_loop.i.i122 + +pl_loop.i.i122: + br label %pl_loop.i.i122 +} + +; Make sure we can sink address computation even +; if there is a cycle in phi nodes. +define void @test9(i1 %cond, i64* %base) { +; CHECK-LABEL: @test9 +entry: + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br label %header + +header: + %iv = phi i32 [0, %entry], [%iv.inc, %backedge] + %casted.loop = phi i32* [%casted, %entry], [%casted.merged, %backedge] + br i1 %cond, label %if.then, label %backedge + +if.then: + call void @foo(i32 %iv) + %addr.1 = getelementptr inbounds i64, i64* %base, i64 5 + %casted.1 = bitcast i64* %addr.1 to i32* + br label %backedge + +backedge: +; CHECK-LABEL: backedge: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then] + %v = load i32, i32* %casted.merged, align 4 + call void @foo(i32 %v) + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, 1000 + br i1 %cmp, label %header, label %exit + +exit: + ret void +} + +; Make sure we can eliminate a select when both arguments perform equivalent +; address computation. +define void @test10(i1 %cond, i64* %base) { +; CHECK-LABEL: @test10 +; CHECK: getelementptr inbounds i8, {{.+}} 40 +; CHECK-NOT: select +entry: + %gep1 = getelementptr inbounds i64, i64* %base, i64 5 + %gep1.casted = bitcast i64* %gep1 to i32* + %base.casted = bitcast i64* %base to i32* + %gep2 = getelementptr inbounds i32, i32* %base.casted, i64 10 + %casted.merged = select i1 %cond, i32* %gep1.casted, i32* %gep2 + %v = load i32, i32* %casted.merged, align 4 + call void @foo(i32 %v) + ret void +} + +; Found by fuzzer, getSExtValue of > 64 bit constant +define void @i96_mul(i1* %base, i96 %offset) { +BB: + ;; RHS = 0x7FFFFFFFFFFFFFFFFFFFFFFF + %B84 = mul i96 %offset, 39614081257132168796771975167 + %G23 = getelementptr i1, i1* %base, i96 %B84 + store i1 false, i1* %G23 + ret void +} |