diff options
author | Eric Christopher <echristo@gmail.com> | 2019-04-17 04:52:47 +0000 |
---|---|---|
committer | Eric Christopher <echristo@gmail.com> | 2019-04-17 04:52:47 +0000 |
commit | cee313d288a4faf0355d76fb6e0e927e211d08a5 (patch) | |
tree | d386075318d761197779a96e5d8fc0dc7b06342b /llvm/test/Transforms/CodeGenPrepare/X86 | |
parent | c3d6a929fdd92fd06d4304675ade8d7210ee711a (diff) | |
download | bcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.tar.gz bcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.zip |
Revert "Temporarily Revert "Add basic loop fusion pass.""
The reversion apparently deleted the test/Transforms directory.
Will be re-reverting again.
llvm-svn: 358552
Diffstat (limited to 'llvm/test/Transforms/CodeGenPrepare/X86')
21 files changed, 2751 insertions, 0 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll b/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll new file mode 100644 index 00000000000..1121abb7314 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll @@ -0,0 +1,117 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +; The following target lines are needed for the test to exercise what it should. +; Without these lines, CodeGenPrepare does not try to sink the bitcasts. +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +declare i32 @__CxxFrameHandler3(...) + +declare void @f() + +declare void @g(i8*) +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2 + +; CodeGenPrepare will want to sink these bitcasts, but it selects the catchpad +; blocks as the place to which the bitcast should be sunk. Since catchpads +; do not allow non-phi instructions before the terminator, this isn't possible. + +; CHECK-LABEL: @test( +define void @test(i32* %addr) personality i32 (...)* @__CxxFrameHandler3 { +entry: + %x = getelementptr i32, i32* %addr, i32 1 + %p1 = bitcast i32* %x to i8* + invoke void @f() + to label %invoke.cont unwind label %catch1 + +; CHECK: invoke.cont: +; CHECK-NEXT: %y = getelementptr i32, i32* %addr, i32 2 +invoke.cont: + %y = getelementptr i32, i32* %addr, i32 2 + %p2 = bitcast i32* %y to i8* + invoke void @f() + to label %done unwind label %catch2 + +done: + ret void + +catch1: + %cs1 = catchswitch within none [label %handler1] unwind to caller + +handler1: + %cp1 = catchpad within %cs1 [] + br label %catch.shared +; CHECK: handler1: +; CHECK-NEXT: catchpad within %cs1 +; CHECK: %[[p1:[0-9]+]] = bitcast i32* %x to i8* + +catch2: + %cs2 = catchswitch within none [label %handler2] unwind to caller + +handler2: + %cp2 = catchpad within %cs2 [] + br label %catch.shared +; CHECK: handler2: +; CHECK: catchpad within %cs2 +; CHECK: %[[p2:[0-9]+]] = bitcast i32* %y to i8* + +; CHECK: catch.shared: +; CHECK-NEXT: %p = phi i8* [ %[[p1]], %handler1 ], [ %[[p2]], %handler2 ] +catch.shared: + %p = phi i8* [ %p1, %handler1 ], [ %p2, %handler2 ] + call void @g(i8* %p) + unreachable +} + +; CodeGenPrepare will want to hoist these llvm.dbg.value calls to the phi, but +; there is no insertion point in a catchpad block. + +; CHECK-LABEL: @test_dbg_value( +define void @test_dbg_value() personality i32 (...)* @__CxxFrameHandler3 { +entry: + %a = alloca i8 + %b = alloca i8 + invoke void @f() to label %next unwind label %catch.dispatch +next: + invoke void @f() to label %ret unwind label %catch.dispatch +ret: + ret void + +catch.dispatch: + %p = phi i8* [%a, %entry], [%b, %next] + %cs1 = catchswitch within none [label %catch] unwind to caller + +catch: + %cp1 = catchpad within %cs1 [] + tail call void @llvm.dbg.value(metadata i8* %p, i64 0, metadata !11, metadata !13), !dbg !14 + call void @g(i8* %p) + catchret from %cp1 to label %ret + +; CHECK: catch.dispatch: +; CHECK-NEXT: phi i8 +; CHECK-NEXT: catchswitch +; CHECK-NOT: llvm.dbg.value + +; CHECK: catch: +; CHECK-NEXT: catchpad +; CHECK-NEXT: call void @llvm.dbg.value +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: null) +!1 = !DIFile(filename: "t.c", directory: "D:\5Csrc\5Cllvm\5Cbuild") +!4 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, retainedNodes: null) +!5 = !DISubroutineType(types: !6) +!6 = !{null} +!7 = !{i32 2, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"PIC Level", i32 2} +!10 = !{!"clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)"} +!11 = !DILocalVariable(name: "p", scope: !4, file: !1, line: 2, type: !12) +!12 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char) +!13 = !DIExpression(DW_OP_deref) +!14 = !DILocation(line: 2, column: 8, scope: !4) +!15 = !DILocation(line: 3, column: 1, scope: !4) diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll b/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll new file mode 100644 index 00000000000..6a3804f2a75 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll @@ -0,0 +1,294 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -codegenprepare -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @use(i32) local_unnamed_addr +declare void @useptr([2 x i8*]*) local_unnamed_addr + +; CHECK: @simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16 +@simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16 + +; CHECK: @multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16 +@multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16 + +; CHECK: @loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16 +@loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16 + +; CHECK: @nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16 +@nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16 + +; CHECK: @noncritical.targets = constant [2 x i8*] [i8* blockaddress(@noncritical, %bb0), i8* blockaddress(@noncritical, %bb1)], align 16 +@noncritical.targets = constant [2 x i8*] [i8* blockaddress(@noncritical, %bb0), i8* blockaddress(@noncritical, %bb1)], align 16 + +; Check that we break the critical edge when an jump table has only one use. +define void @simple(i32* nocapture readonly %p) { +; CHECK-LABEL: @simple( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[INITVAL:%.*]] = load i32, i32* [[P]], align 4 +; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4 +; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [ +; CHECK-NEXT: i32 0, label [[BB0_CLONE:%.*]] +; CHECK-NEXT: i32 1, label [[BB1_CLONE:%.*]] +; CHECK-NEXT: ] +; CHECK: bb0: +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[MERGE:%.*]] = phi i32* [ [[PTR:%.*]], [[BB0:%.*]] ], [ [[INCDEC_PTR]], [[BB0_CLONE]] ] +; CHECK-NEXT: [[MERGE2:%.*]] = phi i32 [ 0, [[BB0]] ], [ [[INITVAL]], [[BB0_CLONE]] ] +; CHECK-NEXT: tail call void @use(i32 [[MERGE2]]) +; CHECK-NEXT: br label [[INDIRECTGOTO:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[DOTSPLIT3:%.*]] +; CHECK: .split3: +; CHECK-NEXT: [[MERGE5:%.*]] = phi i32* [ [[PTR]], [[BB1:%.*]] ], [ [[INCDEC_PTR]], [[BB1_CLONE]] ] +; CHECK-NEXT: [[MERGE7:%.*]] = phi i32 [ 1, [[BB1]] ], [ [[INITVAL]], [[BB1_CLONE]] ] +; CHECK-NEXT: tail call void @use(i32 [[MERGE7]]) +; CHECK-NEXT: br label [[INDIRECTGOTO]] +; CHECK: indirectgoto: +; CHECK-NEXT: [[P_ADDR_SINK:%.*]] = phi i32* [ [[MERGE5]], [[DOTSPLIT3]] ], [ [[MERGE]], [[DOTSPLIT]] ] +; CHECK-NEXT: [[PTR]] = getelementptr inbounds i32, i32* [[P_ADDR_SINK]], i64 1 +; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[P_ADDR_SINK]], align 4 +; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 [[IDX]] +; CHECK-NEXT: [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +; CHECK-NEXT: indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: bb0.clone: +; CHECK-NEXT: br label [[DOTSPLIT]] +; CHECK: bb1.clone: +; CHECK-NEXT: br label [[DOTSPLIT3]] +; +entry: + %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1 + %initval = load i32, i32* %p, align 4 + %initop = load i32, i32* %incdec.ptr, align 4 + switch i32 %initop, label %exit [ + i32 0, label %bb0 + i32 1, label %bb1 + ] + +bb0: + %p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ] + %opcode.0 = phi i32 [ %initval, %entry ], [ 0, %indirectgoto ] + tail call void @use(i32 %opcode.0) + br label %indirectgoto + +bb1: + %p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ] + %opcode.1 = phi i32 [ %initval, %entry ], [ 1, %indirectgoto ] + tail call void @use(i32 %opcode.1) + br label %indirectgoto + +indirectgoto: + %p.addr.sink = phi i32* [ %p.addr.1, %bb1 ], [ %p.addr.0, %bb0 ] + %ptr = getelementptr inbounds i32, i32* %p.addr.sink, i64 1 + %newp = load i32, i32* %p.addr.sink, align 4 + %idx = sext i32 %newp to i64 + %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 %idx + %newop = load i8*, i8** %arrayidx, align 8 + indirectbr i8* %newop, [label %bb0, label %bb1] + +exit: + ret void +} + +; Don't try to break critical edges when several indirectbr point to a single block +define void @multi(i32* nocapture readonly %p) { +; CHECK-LABEL: @multi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[INITVAL:%.*]] = load i32, i32* [[P]], align 4 +; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4 +; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [ +; CHECK-NEXT: i32 0, label [[BB0:%.*]] +; CHECK-NEXT: i32 1, label [[BB1:%.*]] +; CHECK-NEXT: ] +; CHECK: bb0: +; CHECK-NEXT: [[P_ADDR_0:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY:%.*]] ], [ [[NEXT0:%.*]], [[BB0]] ], [ [[NEXT1:%.*]], [[BB1]] ] +; CHECK-NEXT: [[OPCODE_0:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ] +; CHECK-NEXT: tail call void @use(i32 [[OPCODE_0]]) +; CHECK-NEXT: [[NEXT0]] = getelementptr inbounds i32, i32* [[P_ADDR_0]], i64 1 +; CHECK-NEXT: [[NEWP0:%.*]] = load i32, i32* [[P_ADDR_0]], align 4 +; CHECK-NEXT: [[IDX0:%.*]] = sext i32 [[NEWP0]] to i64 +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX0]] +; CHECK-NEXT: [[NEWOP0:%.*]] = load i8*, i8** [[ARRAYIDX0]], align 8 +; CHECK-NEXT: indirectbr i8* [[NEWOP0]], [label [[BB0]], label %bb1] +; CHECK: bb1: +; CHECK-NEXT: [[P_ADDR_1:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY]] ], [ [[NEXT0]], [[BB0]] ], [ [[NEXT1]], [[BB1]] ] +; CHECK-NEXT: [[OPCODE_1:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ] +; CHECK-NEXT: tail call void @use(i32 [[OPCODE_1]]) +; CHECK-NEXT: [[NEXT1]] = getelementptr inbounds i32, i32* [[P_ADDR_1]], i64 1 +; CHECK-NEXT: [[NEWP1:%.*]] = load i32, i32* [[P_ADDR_1]], align 4 +; CHECK-NEXT: [[IDX1:%.*]] = sext i32 [[NEWP1]] to i64 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX1]] +; CHECK-NEXT: [[NEWOP1:%.*]] = load i8*, i8** [[ARRAYIDX1]], align 8 +; CHECK-NEXT: indirectbr i8* [[NEWOP1]], [label [[BB0]], label %bb1] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1 + %initval = load i32, i32* %p, align 4 + %initop = load i32, i32* %incdec.ptr, align 4 + switch i32 %initop, label %exit [ + i32 0, label %bb0 + i32 1, label %bb1 + ] + +bb0: + %p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ] + %opcode.0 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ] + tail call void @use(i32 %opcode.0) + %next0 = getelementptr inbounds i32, i32* %p.addr.0, i64 1 + %newp0 = load i32, i32* %p.addr.0, align 4 + %idx0 = sext i32 %newp0 to i64 + %arrayidx0 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx0 + %newop0 = load i8*, i8** %arrayidx0, align 8 + indirectbr i8* %newop0, [label %bb0, label %bb1] + +bb1: + %p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ] + %opcode.1 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ] + tail call void @use(i32 %opcode.1) + %next1 = getelementptr inbounds i32, i32* %p.addr.1, i64 1 + %newp1 = load i32, i32* %p.addr.1, align 4 + %idx1 = sext i32 %newp1 to i64 + %arrayidx1 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx1 + %newop1 = load i8*, i8** %arrayidx1, align 8 + indirectbr i8* %newop1, [label %bb0, label %bb1] + +exit: + ret void +} + +; Make sure we do the right thing for cases where the indirectbr branches to +; the block it terminates. +define void @loop(i64* nocapture readonly %p) { +; CHECK-LABEL: @loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: bb0: +; CHECK-NEXT: br label [[DOTSPLIT]] +; CHECK: .split: +; CHECK-NEXT: [[MERGE:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[BB0:%.*]] ], [ 0, [[BB0_CLONE:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[MERGE]] +; CHECK-NEXT: store i64 [[MERGE]], i64* [[TMP0]], align 4 +; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[MERGE]], 1 +; CHECK-NEXT: [[IDX:%.*]] = srem i64 [[MERGE]], 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 [[IDX]] +; CHECK-NEXT: [[TARGET:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +; CHECK-NEXT: indirectbr i8* [[TARGET]], [label [[BB0]], label %bb1] +; CHECK: bb1: +; CHECK-NEXT: ret void +; +entry: + br label %bb0 + +bb0: + %i = phi i64 [ %i.next, %bb0 ], [ 0, %entry ] + %tmp0 = getelementptr inbounds i64, i64* %p, i64 %i + store i64 %i, i64* %tmp0, align 4 + %i.next = add nuw nsw i64 %i, 1 + %idx = srem i64 %i, 2 + %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 %idx + %target = load i8*, i8** %arrayidx, align 8 + indirectbr i8* %target, [label %bb0, label %bb1] + +bb1: + ret void +} + +; Don't do anything for cases that contain no phis. +define void @nophi(i32* %p) { +; CHECK-LABEL: @nophi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4 +; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [ +; CHECK-NEXT: i32 0, label [[BB0:%.*]] +; CHECK-NEXT: i32 1, label [[BB1:%.*]] +; CHECK-NEXT: ] +; CHECK: bb0: +; CHECK-NEXT: tail call void @use(i32 0) +; CHECK-NEXT: br label [[INDIRECTGOTO:%.*]] +; CHECK: bb1: +; CHECK-NEXT: tail call void @use(i32 1) +; CHECK-NEXT: br label [[INDIRECTGOTO]] +; CHECK: indirectgoto: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to i8* +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 4 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR]] to i32* +; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 [[IDX]] +; CHECK-NEXT: [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +; CHECK-NEXT: indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1 + %initop = load i32, i32* %incdec.ptr, align 4 + switch i32 %initop, label %exit [ + i32 0, label %bb0 + i32 1, label %bb1 + ] + +bb0: + tail call void @use(i32 0) br label %indirectgoto + +bb1: + tail call void @use(i32 1) + br label %indirectgoto + +indirectgoto: + %newp = load i32, i32* %incdec.ptr, align 4 + %idx = sext i32 %newp to i64 + %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 %idx + %newop = load i8*, i8** %arrayidx, align 8 + indirectbr i8* %newop, [label %bb0, label %bb1] + +exit: + ret void +} + +; Don't do anything if the edge isn't critical. +define i32 @noncritical(i32 %k, i8* %p) +; CHECK-LABEL: @noncritical( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[D:%.*]] = add i32 [[K:%.*]], 1 +; CHECK-NEXT: indirectbr i8* [[P:%.*]], [label [[BB0:%.*]], label %bb1] +; CHECK: bb0: +; CHECK-NEXT: [[R0:%.*]] = sub i32 [[K]], [[D]] +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[R1:%.*]] = sub i32 [[D]], [[K]] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[R0]], [[BB0]] ], [ [[R1]], [[BB1:%.*]] ] +; CHECK-NEXT: ret i32 0 +; +{ +entry: + %d = add i32 %k, 1 + indirectbr i8* %p, [label %bb0, label %bb1] + +bb0: + %v00 = phi i32 [%k, %entry] + %v01 = phi i32 [%d, %entry] + %r0 = sub i32 %v00, %v01 + br label %exit + +bb1: + %v10 = phi i32 [%d, %entry] + %v11 = phi i32 [%k, %entry] + %r1 = sub i32 %v10, %v11 + br label %exit + +exit: + %v = phi i32 [%r0, %bb0], [%r1, %bb1] + ret i32 0 +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll b/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll new file mode 100644 index 00000000000..72d82e2a162 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll @@ -0,0 +1,56 @@ +; RUN: opt -S -codegenprepare < %s | FileCheck %s --check-prefix=SLOW +; RUN: opt -S -codegenprepare -mattr=+bmi < %s | FileCheck %s --check-prefix=FAST_TZ +; RUN: opt -S -codegenprepare -mattr=+lzcnt < %s | FileCheck %s --check-prefix=FAST_LZ + +target triple = "x86_64-unknown-unknown" +target datalayout = "e-n32:64" + +; If the intrinsic is cheap, nothing should change. +; If the intrinsic is expensive, check if the input is zero to avoid the call. +; This is undoing speculation that may have been created by SimplifyCFG + InstCombine. + +define i64 @cttz(i64 %A) { +entry: + %z = call i64 @llvm.cttz.i64(i64 %A, i1 false) + ret i64 %z + +; SLOW-LABEL: @cttz( +; SLOW: entry: +; SLOW: %cmpz = icmp eq i64 %A, 0 +; SLOW: br i1 %cmpz, label %cond.end, label %cond.false +; SLOW: cond.false: +; SLOW: %z = call i64 @llvm.cttz.i64(i64 %A, i1 true) +; SLOW: br label %cond.end +; SLOW: cond.end: +; SLOW: %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ] +; SLOW: ret i64 %ctz + +; FAST_TZ-LABEL: @cttz( +; FAST_TZ: %z = call i64 @llvm.cttz.i64(i64 %A, i1 false) +; FAST_TZ: ret i64 %z +} + +define i64 @ctlz(i64 %A) { +entry: + %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false) + ret i64 %z + +; SLOW-LABEL: @ctlz( +; SLOW: entry: +; SLOW: %cmpz = icmp eq i64 %A, 0 +; SLOW: br i1 %cmpz, label %cond.end, label %cond.false +; SLOW: cond.false: +; SLOW: %z = call i64 @llvm.ctlz.i64(i64 %A, i1 true) +; SLOW: br label %cond.end +; SLOW: cond.end: +; SLOW: %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ] +; SLOW: ret i64 %ctz + +; FAST_LZ-LABEL: @ctlz( +; FAST_LZ: %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false) +; FAST_LZ: ret i64 %z +} + +declare i64 @llvm.cttz.i64(i64, i1) +declare i64 @llvm.ctlz.i64(i64, i1) + diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll b/llvm/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll new file mode 100644 index 00000000000..51d1e0ab676 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll @@ -0,0 +1,128 @@ +; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s + + +@a = global [10 x i8] zeroinitializer, align 1 +declare void @foo() + +; ext(and(ld, cst)) -> and(ext(ld), ext(cst)) +define void @test1(i32* %p, i32 %ll) { +; CHECK-LABEL: @test1 +; CHECK-NEXT: entry: +; CHECK-NEXT: load +; CHECK-NEXT: zext +; CHECK-NEXT: and +entry: + %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1 + %and = and i8 %tmp, 60 + %cmp = icmp ugt i8 %and, 20 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv2 = zext i8 %and to i32 + %add = add nsw i32 %conv2, %ll + store i32 %add, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @foo() + ret void +} + +; ext(or(ld, cst)) -> or(ext(ld), ext(cst)) +define void @test2(i32* %p, i32 %ll) { +; CHECK-LABEL: @test2 +; CHECK-NEXT: entry: +; CHECK-NEXT: load +; CHECK-NEXT: zext +; CHECK-NEXT: or +entry: + %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1 + %or = or i8 %tmp, 60 + %cmp = icmp ugt i8 %or, 20 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv2 = zext i8 %or to i32 + %add = add nsw i32 %conv2, %ll + store i32 %add, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @foo() + ret void +} + +; ext(and(shl(ld, cst), cst)) -> and(shl(ext(ld), ext(cst)), ext(cst)) +define void @test3(i32* %p, i32 %ll) { +; CHECK-LABEL: @test3 +; CHECK-NEXT: entry: +; CHECK-NEXT: load +; CHECK-NEXT: zext +; CHECK-NEXT: shl +; CHECK-NEXT: and +entry: + %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1 + %shl = shl i8 %tmp, 2 + %and = and i8 %shl, 60 + %cmp = icmp ugt i8 %and, 20 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv2 = zext i8 %and to i32 + %add = add nsw i32 %conv2, %ll + store i32 %add, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @foo() + ret void +} + +; zext(shrl(ld, cst)) -> shrl(zext(ld), zext(cst)) +define void @test4(i32* %p, i32 %ll) { +; CHECK-LABEL: @test4 +; CHECK-NEXT: entry: +; CHECK-NEXT: load +; CHECK-NEXT: zext +; CHECK-NEXT: lshr +entry: + %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1 + %lshr = lshr i8 %tmp, 2 + %cmp = icmp ugt i8 %lshr, 20 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv2 = zext i8 %lshr to i32 + %add = add nsw i32 %conv2, %ll + store i32 %add, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @foo() + ret void +} + +; ext(xor(ld, cst)) -> xor(ext(ld), ext(cst)) +define void @test5(i32* %p, i32 %ll) { +; CHECK-LABEL: @test5 +; CHECK-NEXT: entry: +; CHECK-NEXT: load +; CHECK-NEXT: zext +; CHECK-NEXT: xor +entry: + %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1 + %xor = xor i8 %tmp, 60 + %cmp = icmp ugt i8 %xor, 20 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv2 = zext i8 %xor to i32 + %add = add nsw i32 %conv2, %ll + store i32 %add, i32* %p, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @foo() + ret void +} + diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll b/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll new file mode 100644 index 00000000000..519e1ee2ce6 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll @@ -0,0 +1,64 @@ +; RUN: opt -codegenprepare -disable-cgp-branch-opts -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; The first cast should be sunk into block2, in order that the +; instruction selector can form an efficient +; i64 * i64 -> i128 multiplication. +define i128 @sink(i64* %mem1, i64* %mem2) { +; CHECK-LABEL: block1: +; CHECK-NEXT: load +block1: + %l1 = load i64, i64* %mem1 + %s1 = sext i64 %l1 to i128 + br label %block2 + +; CHECK-LABEL: block2: +; CHECK-NEXT: sext +; CHECK-NEXT: load +; CHECK-NEXT: sext +block2: + %l2 = load i64, i64* %mem2 + %s2 = sext i64 %l2 to i128 + %res = mul i128 %s1, %s2 + ret i128 %res +} + +; The first cast should be hoisted into block1, in order that the +; instruction selector can form an extend-load. +define i64 @hoist(i32* %mem1, i32* %mem2) { +; CHECK-LABEL: block1: +; CHECK-NEXT: load +; CHECK-NEXT: sext +block1: + %l1 = load i32, i32* %mem1 + br label %block2 + +; CHECK-LABEL: block2: +; CHECK-NEXT: load +; CHECK-NEXT: sext +block2: + %s1 = sext i32 %l1 to i64 + %l2 = load i32, i32* %mem2 + %s2 = sext i32 %l2 to i64 + %res = mul i64 %s1, %s2 + ret i64 %res +} + +; Make sure the cast sink logic and OptimizeExtUses don't end up in an infinite +; loop. +define i128 @use_ext_source() { +block1: + %v1 = or i64 undef, undef + %v2 = zext i64 %v1 to i128 + br i1 undef, label %block2, label %block3 + +block2: + %v3 = add i64 %v1, 1 + %v4 = zext i64 %v3 to i128 + br label %block3 + +block3: + %res = phi i128 [ %v2, %block1 ], [ %v4, %block2 ] + ret i128 %res +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll new file mode 100644 index 00000000000..94ab74f9e7b --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll @@ -0,0 +1,29 @@ +; RUN: opt %s -codegenprepare -mattr=+soft-float -S | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFP +; RUN: opt %s -codegenprepare -mattr=-soft-float -S | FileCheck %s -check-prefix=CHECK -check-prefix=HARDFP + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @foo +; CHECK: entry: +; SOFTFP: fcmp +; HARDFP-NOT: fcmp +; CHECK: body: +; SOFTFP-NOT: fcmp +; HARDFP: fcmp +define void @foo(float %a, float %b) { +entry: + %c = fcmp oeq float %a, %b + br label %head +head: + %IND = phi i32 [ 0, %entry ], [ %IND.new, %body1 ] + %CMP = icmp slt i32 %IND, 1250 + br i1 %CMP, label %body, label %tail +body: + br i1 %c, label %body1, label %tail +body1: + %IND.new = add i32 %IND, 1 + br label %head +tail: + ret void +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/lit.local.cfg b/llvm/test/Transforms/CodeGenPrepare/X86/lit.local.cfg new file mode 100644 index 00000000000..e71f3cc4c41 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'X86' in config.root.targets: + config.unsupported = True + diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll b/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll new file mode 100644 index 00000000000..f4c1af5ed46 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll @@ -0,0 +1,18 @@ +; RUN: opt -S -disable-simplify-libcalls -codegenprepare < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; This is a workaround for PR23093: when building with -mkernel/-fno-builtin, +; we still generate fortified library calls. + +; Check that we ignore two things: +; - attribute nobuiltin +; - TLI::has (always returns false thanks to -disable-simplify-libcalls) + +; CHECK-NOT: _chk +; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %len, i1 false) +define void @test_nobuiltin(i8* %dst, i64 %len) { + call i8* @__memset_chk(i8* %dst, i32 0, i64 %len, i64 -1) nobuiltin + ret void +} + +declare i8* @__memset_chk(i8*, i32, i64, i64) diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/multi-extension.ll b/llvm/test/Transforms/CodeGenPrepare/X86/multi-extension.ll new file mode 100644 index 00000000000..950f9f2e04a --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/multi-extension.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.13.0" + +declare void @bar(i64) + +@b = global i16 0, align 2 + +; This test case is extracted from PR38125. +; %or is reachable by both a sext and zext that are going to be promoted. +; It ensures correct operation on PromotedInsts. + +; CHECK: %promoted = trunc i32 %or to i16 +; CHECK-NEXT: %c = sext i16 %promoted to i64 +define i32 @foo(i16 %kkk) { +entry: + %t4 = load i16, i16* @b, align 2 + %conv4 = zext i16 %t4 to i32 + %or = or i16 %kkk, %t4 + %c = sext i16 %or to i64 + call void @bar(i64 %c) + %t5 = and i16 %or, 5 + %z = zext i16 %t5 to i32 + ret i32 %z +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll b/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll new file mode 100644 index 00000000000..dc638425355 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -codegenprepare < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i1 @PR41004(i32 %x, i32 %y, i32 %t1) { +; CHECK-LABEL: @PR41004( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = icmp eq i32 [[Y:%.*]], 1 +; CHECK-NEXT: br i1 [[T0]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]] +; CHECK: select.true.sink: +; CHECK-NEXT: [[REM:%.*]] = srem i32 [[X:%.*]], 2 +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[MUL:%.*]] = phi i32 [ [[REM]], [[SELECT_TRUE_SINK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[T1:%.*]], i32 1) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0 +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MATH]], [[MUL]] +; CHECK-NEXT: ret i1 [[OV]] +; +entry: + %rem = srem i32 %x, 2 + %t0 = icmp eq i32 %y, 1 + %mul = select i1 %t0, i32 %rem, i32 0 + %neg = add i32 %t1, -1 + %add = add i32 %neg, %mul + br label %if + +if: + %tobool = icmp eq i32 %t1, 0 + ret i1 %tobool +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll new file mode 100644 index 00000000000..ab636c39ddb --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll @@ -0,0 +1,519 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -codegenprepare -S < %s | FileCheck %s +; RUN: opt -enable-debugify -codegenprepare -S < %s 2>&1 | FileCheck %s -check-prefix=DEBUG + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +define i64 @uaddo1(i64 %a, i64 %b) nounwind ssp { +; CHECK-LABEL: @uaddo1( +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 +; CHECK-NEXT: ret i64 [[Q]] +; + %add = add i64 %b, %a + %cmp = icmp ult i64 %add, %a + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q +} + +define i64 @uaddo2(i64 %a, i64 %b) nounwind ssp { +; CHECK-LABEL: @uaddo2( +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 +; CHECK-NEXT: ret i64 [[Q]] +; + %add = add i64 %b, %a + %cmp = icmp ult i64 %add, %b + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q +} + +define i64 @uaddo3(i64 %a, i64 %b) nounwind ssp { +; CHECK-LABEL: @uaddo3( +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 +; CHECK-NEXT: ret i64 [[Q]] +; + %add = add i64 %b, %a + %cmp = icmp ugt i64 %b, %add + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q +} + +define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp { +; CHECK-LABEL: @uaddo4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]] +; CHECK: next: +; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 +; CHECK-NEXT: ret i64 [[Q]] +; CHECK: exit: +; CHECK-NEXT: ret i64 0 +; +entry: + %add = add i64 %b, %a + %cmp = icmp ugt i64 %b, %add + br i1 %c, label %next, label %exit + +next: + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q + +exit: + ret i64 0 +} + +define i64 @uaddo5(i64 %a, i64 %b, i64* %ptr, i1 %c) nounwind ssp { +; CHECK-LABEL: @uaddo5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: store i64 [[ADD]], i64* [[PTR:%.*]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]] +; CHECK: next: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]] +; CHECK-NEXT: [[Q:%.*]] = select i1 [[TMP0]], i64 [[B]], i64 42 +; CHECK-NEXT: ret i64 [[Q]] +; CHECK: exit: +; CHECK-NEXT: ret i64 0 +; +entry: + %add = add i64 %b, %a + store i64 %add, i64* %ptr + %cmp = icmp ugt i64 %b, %add + br i1 %c, label %next, label %exit + +next: + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q + +exit: + ret i64 0 +} + +; When adding 1, the general pattern for add-overflow may be different due to icmp canonicalization. +; PR31754: https://bugs.llvm.org/show_bug.cgi?id=31754 + +define i1 @uaddo_i64_increment(i64 %x, i64* %p) { +; CHECK-LABEL: @uaddo_i64_increment( +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 1) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %a = add i64 %x, 1 + %ov = icmp eq i64 %a, 0 + store i64 %a, i64* %p + ret i1 %ov +} + +define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, i8* %p) { +; CHECK-LABEL: @uaddo_i8_increment_noncanonical_1( +; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 1, i8 [[X:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i8 [[MATH]], i8* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %a = add i8 1, %x ; commute + %ov = icmp eq i8 %a, 0 + store i8 %a, i8* %p + ret i1 %ov +} + +define i1 @uaddo_i32_increment_noncanonical_2(i32 %x, i32* %p) { +; CHECK-LABEL: @uaddo_i32_increment_noncanonical_2( +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 1) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i32 [[MATH]], i32* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %a = add i32 %x, 1 + %ov = icmp eq i32 0, %a ; commute + store i32 %a, i32* %p + ret i1 %ov +} + +define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, i16* %p) { +; CHECK-LABEL: @uaddo_i16_increment_noncanonical_3( +; CHECK-NEXT: [[TMP1:%.*]] = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 1, i16 [[X:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i16 [[MATH]], i16* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %a = add i16 1, %x ; commute + %ov = icmp eq i16 0, %a ; commute + store i16 %a, i16* %p + ret i1 %ov +} + +; The overflow check may be against the input rather than the sum. + +define i1 @uaddo_i64_increment_alt(i64 %x, i64* %p) { +; CHECK-LABEL: @uaddo_i64_increment_alt( +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 1) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %a = add i64 %x, 1 + store i64 %a, i64* %p + %ov = icmp eq i64 %x, -1 + ret i1 %ov +} + +; Make sure insertion is done correctly based on dominance. + +define i1 @uaddo_i64_increment_alt_dom(i64 %x, i64* %p) { +; CHECK-LABEL: @uaddo_i64_increment_alt_dom( +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 1) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %ov = icmp eq i64 %x, -1 + %a = add i64 %x, 1 + store i64 %a, i64* %p + ret i1 %ov +} + +; The overflow check may be against the input rather than the sum. + +define i1 @uaddo_i64_decrement_alt(i64 %x, i64* %p) { +; CHECK-LABEL: @uaddo_i64_decrement_alt( +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 -1) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %a = add i64 %x, -1 + store i64 %a, i64* %p + %ov = icmp ne i64 %x, 0 + ret i1 %ov +} + +; Make sure insertion is done correctly based on dominance. + +define i1 @uaddo_i64_decrement_alt_dom(i64 %x, i64* %p) { +; CHECK-LABEL: @uaddo_i64_decrement_alt_dom( +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 -1) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %ov = icmp ne i64 %x, 0 + %a = add i64 %x, -1 + store i64 %a, i64* %p + ret i1 %ov +} + +; No transform for illegal types. + +define i1 @uaddo_i42_increment_illegal_type(i42 %x, i42* %p) { +; CHECK-LABEL: @uaddo_i42_increment_illegal_type( +; CHECK-NEXT: [[A:%.*]] = add i42 [[X:%.*]], 1 +; CHECK-NEXT: [[OV:%.*]] = icmp eq i42 [[A]], 0 +; CHECK-NEXT: store i42 [[A]], i42* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV]] +; + %a = add i42 %x, 1 + %ov = icmp eq i42 %a, 0 + store i42 %a, i42* %p + ret i1 %ov +} + +define i1 @usubo_ult_i64(i64 %x, i64 %y, i64* %p) { +; CHECK-LABEL: @usubo_ult_i64( +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %s = sub i64 %x, %y + store i64 %s, i64* %p + %ov = icmp ult i64 %x, %y + ret i1 %ov +} + +; Verify insertion point for single-BB. Toggle predicate. + +define i1 @usubo_ugt_i32(i32 %x, i32 %y, i32* %p) { +; CHECK-LABEL: @usubo_ugt_i32( +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i32 [[MATH]], i32* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %ov = icmp ugt i32 %y, %x + %s = sub i32 %x, %y + store i32 %s, i32* %p + ret i1 %ov +} + +; Constant operand should match. + +define i1 @usubo_ugt_constant_op0_i8(i8 %x, i8* %p) { +; CHECK-LABEL: @usubo_ugt_constant_op0_i8( +; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 42, i8 [[X:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i8 [[MATH]], i8* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %s = sub i8 42, %x + %ov = icmp ugt i8 %x, 42 + store i8 %s, i8* %p + ret i1 %ov +} + +; Compare with constant operand 0 is canonicalized by commuting, but verify match for non-canonical form. + +define i1 @usubo_ult_constant_op0_i16(i16 %x, i16* %p) { +; CHECK-LABEL: @usubo_ult_constant_op0_i16( +; CHECK-NEXT: [[TMP1:%.*]] = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 43, i16 [[X:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i16 [[MATH]], i16* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %s = sub i16 43, %x + %ov = icmp ult i16 43, %x + store i16 %s, i16* %p + ret i1 %ov +} + +; Subtract with constant operand 1 is canonicalized to add. + +define i1 @usubo_ult_constant_op1_i16(i16 %x, i16* %p) { +; CHECK-LABEL: @usubo_ult_constant_op1_i16( +; CHECK-NEXT: [[TMP1:%.*]] = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 [[X:%.*]], i16 44) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i16 [[MATH]], i16* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %s = add i16 %x, -44 + %ov = icmp ult i16 %x, 44 + store i16 %s, i16* %p + ret i1 %ov +} + +define i1 @usubo_ugt_constant_op1_i8(i8 %x, i8* %p) { +; CHECK-LABEL: @usubo_ugt_constant_op1_i8( +; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[X:%.*]], i8 45) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i8 [[MATH]], i8* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %ov = icmp ugt i8 45, %x + %s = add i8 %x, -45 + store i8 %s, i8* %p + ret i1 %ov +} + +; Special-case: subtract 1 changes the compare predicate and constant. + +define i1 @usubo_eq_constant1_op1_i32(i32 %x, i32* %p) { +; CHECK-LABEL: @usubo_eq_constant1_op1_i32( +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[X:%.*]], i32 1) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i32 [[MATH]], i32* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %s = add i32 %x, -1 + %ov = icmp eq i32 %x, 0 + store i32 %s, i32* %p + ret i1 %ov +} + +; Special-case: subtract from 0 (negate) changes the compare predicate. + +define i1 @usubo_ne_constant0_op1_i32(i32 %x, i32* %p) { +; CHECK-LABEL: @usubo_ne_constant0_op1_i32( +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 [[X:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i32 [[MATH]], i32* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; + %s = sub i32 0, %x + %ov = icmp ne i32 %x, 0 + store i32 %s, i32* %p + ret i1 %ov +} + +; Verify insertion point for multi-BB. + +declare void @call(i1) + +define i1 @usubo_ult_sub_dominates_i64(i64 %x, i64 %y, i64* %p, i1 %cond) { +; CHECK-LABEL: @usubo_ult_sub_dominates_i64( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] +; CHECK: t: +; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[END:%.*]], label [[F]] +; CHECK: f: +; CHECK-NEXT: ret i1 [[COND]] +; CHECK: end: +; CHECK-NEXT: ret i1 [[OV1]] +; +entry: + br i1 %cond, label %t, label %f + +t: + %s = sub i64 %x, %y + store i64 %s, i64* %p + br i1 %cond, label %end, label %f + +f: + ret i1 %cond + +end: + %ov = icmp ult i64 %x, %y + ret i1 %ov +} + +define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, i64* %p, i1 %cond) { +; CHECK-LABEL: @usubo_ult_cmp_dominates_i64( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] +; CHECK: t: +; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @call(i1 [[OV]]) +; CHECK-NEXT: br i1 [[OV]], label [[END:%.*]], label [[F]] +; CHECK: f: +; CHECK-NEXT: ret i1 [[COND]] +; CHECK: end: +; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X]], i64 [[Y]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 +; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]] +; CHECK-NEXT: ret i1 [[OV1]] +; +entry: + br i1 %cond, label %t, label %f + +t: + %ov = icmp ult i64 %x, %y + call void @call(i1 %ov) + br i1 %ov, label %end, label %f + +f: + ret i1 %cond + +end: + %s = sub i64 %x, %y + store i64 %s, i64* %p + ret i1 %ov +} + +; Verify that crazy/non-canonical code does not crash. + +define void @bar() { +; CHECK-LABEL: @bar( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 1, -1 +; CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: unreachable +; + %cmp = icmp eq i64 1, -1 + %frombool = zext i1 %cmp to i8 + unreachable +} + +define void @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[SUB:%.*]] = add nsw i64 1, 1 +; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[SUB]] to i32 +; CHECK-NEXT: unreachable +; + %sub = add nsw i64 1, 1 + %conv = trunc i64 %sub to i32 + unreachable +} + +; Similarly for usubo. + +define i1 @bar2() { +; CHECK-LABEL: @bar2( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 1, 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp eq i64 1, 0 + ret i1 %cmp +} + +define i64 @foo2(i8 *%p) { +; CHECK-LABEL: @foo2( +; CHECK-NEXT: [[SUB:%.*]] = add nsw i64 1, -1 +; CHECK-NEXT: ret i64 [[SUB]] +; + %sub = add nsw i64 1, -1 + ret i64 %sub +} + +; Avoid hoisting a math op into a dominating block which would +; increase the critical path. + +define void @PR41129(i64* %p64) { +; CHECK-LABEL: @PR41129( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[KEY:%.*]] = load i64, i64* [[P64:%.*]], align 8 +; CHECK-NEXT: [[COND17:%.*]] = icmp eq i64 [[KEY]], 0 +; CHECK-NEXT: br i1 [[COND17]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK: false: +; CHECK-NEXT: [[ANDVAL:%.*]] = and i64 [[KEY]], 7 +; CHECK-NEXT: store i64 [[ANDVAL]], i64* [[P64]] +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: true: +; CHECK-NEXT: [[SVALUE:%.*]] = add i64 [[KEY]], -1 +; CHECK-NEXT: store i64 [[SVALUE]], i64* [[P64]] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %key = load i64, i64* %p64, align 8 + %cond17 = icmp eq i64 %key, 0 + br i1 %cond17, label %true, label %false + +false: + %andval = and i64 %key, 7 + store i64 %andval, i64* %p64 + br label %exit + +true: + %svalue = add i64 %key, -1 + store i64 %svalue, i64* %p64 + br label %exit + +exit: + ret void +} + +; Check that every instruction inserted by -codegenprepare has a debug location. +; DEBUG: CheckModuleDebugify: PASS + diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/pr27536.ll b/llvm/test/Transforms/CodeGenPrepare/X86/pr27536.ll new file mode 100644 index 00000000000..7ab1b038e80 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/pr27536.ll @@ -0,0 +1,32 @@ +; RUN: opt -S -codegenprepare < %s | FileCheck %s +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +@rtti = external global i8 + +define void @test1() personality i32 (...)* @__CxxFrameHandler3 { +entry: + %e = alloca i8 + %tmpcast = bitcast i8* %e to i16* + invoke void @_CxxThrowException(i8* null, i8* null) + to label %catchret.dest unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %0 = catchswitch within none [label %catch] unwind to caller + +catch: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* @rtti, i32 0, i16* %tmpcast] + catchret from %1 to label %catchret.dest + +catchret.dest: ; preds = %catch + ret void +} +; CHECK-LABEL: define void @test1( +; CHECK: %[[alloca:.*]] = alloca i8 +; CHECK-NEXT: %[[bc:.*]] = bitcast i8* %[[alloca]] to i16* + +; CHECK: catchpad within {{.*}} [i8* @rtti, i32 0, i16* %[[bc]]] + +declare void @_CxxThrowException(i8*, i8*) + +declare i32 @__CxxFrameHandler3(...) diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/pr35658.ll b/llvm/test/Transforms/CodeGenPrepare/X86/pr35658.ll new file mode 100644 index 00000000000..bf6d0297475 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/pr35658.ll @@ -0,0 +1,21 @@ +; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=true -addr-sink-new-select=true %s | FileCheck %s +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" +define void @f2() { +entry: + %arraydecay = getelementptr inbounds [2 x i16], [2 x i16]* undef, i16 0, i16 0 + %arrayidx1 = getelementptr inbounds [2 x i16], [2 x i16]* undef, i16 0, i16 1 + br label %for.body + +for.body: ; preds = %for.body, %entry + %e.03 = phi i16* [ %arraydecay, %entry ], [ %arrayidx1, %for.body ] + %tobool = icmp eq i16 undef, 0 + br i1 undef, label %for.body, label %for.end + +for.end: ; preds = %for.body +; CHECK: sunkaddr + %e.1.le = select i1 %tobool, i16* %arrayidx1, i16* %e.03 + store i16 0, i16* %e.1.le, align 1 + ret void +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/select.ll b/llvm/test/Transforms/CodeGenPrepare/X86/select.ll new file mode 100644 index 00000000000..7829376e9db --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/select.ll @@ -0,0 +1,205 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -codegenprepare -S < %s | FileCheck %s +; RUN: opt -debugify -codegenprepare -S < %s | FileCheck %s -check-prefix=DEBUG + +target triple = "x86_64-unknown-unknown" + +; Nothing to sink and convert here. + +define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y) { +; CHECK-LABEL: @no_sink( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOAD:%.*]] = load double, double* [[B:%.*]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[LOAD]], [[A:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[X:%.*]], i32 [[Y:%.*]] +; CHECK-NEXT: ret i32 [[SEL]] +; +entry: + %load = load double, double* %b, align 8 + %cmp = fcmp olt double %load, %a + %sel = select i1 %cmp, i32 %x, i32 %y + ret i32 %sel +} + + +; An 'fdiv' is expensive, so sink it rather than speculatively execute it. + +define float @fdiv_true_sink(float %a, float %b) { +; CHECK-LABEL: @fdiv_true_sink( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 1.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]] +; CHECK: select.true.sink: +; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]] +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi float [ [[DIV]], [[SELECT_TRUE_SINK]] ], [ 2.000000e+00, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret float [[SEL]] +; +; DEBUG-LABEL: @fdiv_true_sink( +; DEBUG-NEXT: entry: +; DEBUG-NEXT: [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 1.000000e+00 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]] +; DEBUG-NEXT: br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !dbg +; DEBUG: select.true.sink: +; DEBUG-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]] +; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[DIV]] +; DEBUG-NEXT: br label [[SELECT_END]], !dbg +; DEBUG: select.end: +; DEBUG-NEXT: [[SEL:%.*]] = phi float [ [[DIV]], [[SELECT_TRUE_SINK]] ], [ 2.000000e+00, [[ENTRY:%.*]] ], !dbg +; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[SEL]] +; DEBUG-NEXT: ret float [[SEL]] +; +entry: + %div = fdiv float %a, %b + %cmp = fcmp ogt float %a, 1.0 + %sel = select i1 %cmp, float %div, float 2.0 + ret float %sel +} + +define float @fdiv_false_sink(float %a, float %b) { +; CHECK-LABEL: @fdiv_false_sink( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 3.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[SELECT_END:%.*]], label [[SELECT_FALSE_SINK:%.*]] +; CHECK: select.false.sink: +; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]] +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi float [ 4.000000e+00, [[ENTRY:%.*]] ], [ [[DIV]], [[SELECT_FALSE_SINK]] ] +; CHECK-NEXT: ret float [[SEL]] +; +; DEBUG-LABEL: @fdiv_false_sink( +; DEBUG-NEXT: entry: +; DEBUG-NEXT: [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 3.000000e+00 +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]] +; DEBUG-NEXT: br i1 [[CMP]], label [[SELECT_END:%.*]], label [[SELECT_FALSE_SINK:%.*]], !dbg +; DEBUG: select.false.sink: +; DEBUG-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]] +; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[DIV]] +; DEBUG-NEXT: br label [[SELECT_END]], !dbg +; DEBUG: select.end: +; DEBUG-NEXT: [[SEL:%.*]] = phi float [ 4.000000e+00, [[ENTRY:%.*]] ], [ [[DIV]], [[SELECT_FALSE_SINK]] ], !dbg +; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[SEL]] +; DEBUG-NEXT: ret float [[SEL]], !dbg +; +entry: + %div = fdiv float %a, %b + %cmp = fcmp ogt float %a, 3.0 + %sel = select i1 %cmp, float 4.0, float %div + ret float %sel +} + +define float @fdiv_both_sink(float %a, float %b) { +; CHECK-LABEL: @fdiv_both_sink( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 5.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]] +; CHECK: select.true.sink: +; CHECK-NEXT: [[DIV1:%.*]] = fdiv float [[A]], [[B:%.*]] +; CHECK-NEXT: br label [[SELECT_END:%.*]] +; CHECK: select.false.sink: +; CHECK-NEXT: [[DIV2:%.*]] = fdiv float [[B]], [[A]] +; CHECK-NEXT: br label [[SELECT_END]] +; CHECK: select.end: +; CHECK-NEXT: [[SEL:%.*]] = phi float [ [[DIV1]], [[SELECT_TRUE_SINK]] ], [ [[DIV2]], [[SELECT_FALSE_SINK]] ] +; CHECK-NEXT: ret float [[SEL]] +; +entry: + %div1 = fdiv float %a, %b + %div2 = fdiv float %b, %a + %cmp = fcmp ogt float %a, 5.0 + %sel = select i1 %cmp, float %div1, float %div2 + ret float %sel +} + +; But if the select is marked unpredictable, then don't turn it into a branch. + +define float @unpredictable_select(float %a, float %b) { +; CHECK-LABEL: @unpredictable_select( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[A]], 1.000000e+00 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[DIV]], float 2.000000e+00, !unpredictable !0 +; CHECK-NEXT: ret float [[SEL]] +; +entry: + %div = fdiv float %a, %b + %cmp = fcmp ogt float %a, 1.0 + %sel = select i1 %cmp, float %div, float 2.0, !unpredictable !0 + ret float %sel +} + +!0 = !{} + +; An 'fadd' is not too expensive, so it's ok to speculate. + +define float @fadd_no_sink(float %a, float %b) { +; CHECK-LABEL: @fadd_no_sink( +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float 6.000000e+00, [[A]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[ADD]], float 7.000000e+00 +; CHECK-NEXT: ret float [[SEL]] +; + %add = fadd float %a, %b + %cmp = fcmp ogt float 6.0, %a + %sel = select i1 %cmp, float %add, float 7.0 + ret float %sel +} + +; Possible enhancement: sinkability is only calculated with the direct +; operand of the select, so we don't try to sink this. The fdiv cost is not +; taken into account. + +define float @fdiv_no_sink(float %a, float %b) { +; CHECK-LABEL: @fdiv_no_sink( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[DIV]], [[B]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[A]], 1.000000e+00 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[ADD]], float 8.000000e+00 +; CHECK-NEXT: ret float [[SEL]] +; +entry: + %div = fdiv float %a, %b + %add = fadd float %div, %b + %cmp = fcmp ogt float %a, 1.0 + %sel = select i1 %cmp, float %add, float 8.0 + ret float %sel +} + +; Do not transform the CFG if the select operands may have side effects. + +declare i64* @bar(i32, i32, i32) +declare i64* @baz(i32, i32, i32) + +define i64* @calls_no_sink(i32 %in) { +; CHECK-LABEL: @calls_no_sink( +; CHECK-NEXT: [[CALL1:%.*]] = call i64* @bar(i32 1, i32 2, i32 3) +; CHECK-NEXT: [[CALL2:%.*]] = call i64* @baz(i32 1, i32 2, i32 3) +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[IN:%.*]], 0 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TOBOOL]], i64* [[CALL1]], i64* [[CALL2]] +; CHECK-NEXT: ret i64* [[SEL]] +; + %call1 = call i64* @bar(i32 1, i32 2, i32 3) + %call2 = call i64* @baz(i32 1, i32 2, i32 3) + %tobool = icmp ne i32 %in, 0 + %sel = select i1 %tobool, i64* %call1, i64* %call2 + ret i64* %sel +} + +define i32 @sdiv_no_sink(i32 %a, i32 %b) { +; CHECK-LABEL: @sdiv_no_sink( +; CHECK-NEXT: [[DIV1:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[DIV2:%.*]] = sdiv i32 [[B]], [[A]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], 5 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[DIV1]], i32 [[DIV2]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %div1 = sdiv i32 %a, %b + %div2 = sdiv i32 %b, %a + %cmp = icmp sgt i32 %a, 5 + %sel = select i1 %cmp, i32 %div1, i32 %div2 + ret i32 %sel +} + diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll new file mode 100644 index 00000000000..e914c1a3da6 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll @@ -0,0 +1,543 @@ +; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=true -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-YES +; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NO +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +; Can we sink for different base if there is no phi for base? +define i32 @test1(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test1 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO-NEXT: load + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if there is phi for base? +define i32 @test2(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test2 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %b = phi i64* [%b1, %entry], [%b2, %if.then] + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if there is phi for base but not valid one? +define i32 @test3(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test3 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO: phi +; CHECK-NO-NEXT: load + %b = phi i64* [%b2, %entry], [%b1, %if.then] + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if both addresses are in the same block? +define i32 @test4(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test4 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO-NEXT: load + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if there is phi for base? +; Both addresses are in the same block. +define i32 @test5(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test5 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %b = phi i64* [%b1, %entry], [%b2, %if.then] + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if there is phi for base but not valid one? +; Both addresses are in the same block. +define i32 @test6(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test6 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: load + %b = phi i64* [%b2, %entry], [%b1, %if.then] + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; case with a loop. No phi node. +define i32 @test7(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test7 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-YES: sunk_phi + %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough] + %c3 = phi i32* [%c1, %entry], [%c, %fallthrough] + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO-NEXT: load + %c = phi i32* [%c3, %loop], [%c2, %if.then] + %v = load volatile i32, i32* %c, align 4 + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %v +} + +; case with a loop. There is phi node. +define i32 @test8(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test8 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough] + %c3 = phi i32* [%c1, %entry], [%c, %fallthrough] + %b3 = phi i64* [%b1, %entry], [%b, %fallthrough] + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %c = phi i32* [%c3, %loop], [%c2, %if.then] + %b = phi i64* [%b3, %loop], [%b2, %if.then] + %v = load volatile i32, i32* %c, align 4 + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %v +} + +; case with a loop. There is phi node but it does not fit. +define i32 @test9(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test9 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-YES: sunk_phi + %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough] + %c3 = phi i32* [%c1, %entry], [%c, %fallthrough] + %b3 = phi i64* [%b1, %entry], [%b2, %fallthrough] + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: load + %c = phi i32* [%c3, %loop], [%c2, %if.then] + %b = phi i64* [%b3, %loop], [%b2, %if.then] + %v = load volatile i32, i32* %c, align 4 + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %v +} + +; Case through a loop. No phi node. +define i32 @test10(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test10 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: br + %c = phi i32* [%c1, %entry], [%c2, %if.then] + br label %loop + +loop: + %iv = phi i32 [0, %fallthrough], [%iv.inc, %loop] + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: +; CHECK-YES: sunkaddr + %v = load volatile i32, i32* %c, align 4 + ret i32 %v +} + +; Case through a loop. There is a phi. +define i32 @test11(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test11 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK: phi +; CHECK: phi +; CHECK: br + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %b = phi i64* [%b1, %entry], [%b2, %if.then] + br label %loop + +loop: + %iv = phi i32 [0, %fallthrough], [%iv.inc, %loop] + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: +; CHECK: sunkaddr + %v = load volatile i32, i32* %c, align 4 + ret i32 %v +} + +; Complex case with address value from previous iteration. +define i32 @test12(i32 %N, i1 %cond, i64* %b1, i64* %b2, i64* %b3) { +; CHECK-LABEL: @test12 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-YES: sunk_phi +; CHECK-NO: phi +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: br + %iv = phi i32 [0, %entry], [%iv.inc, %backedge] + %c3 = phi i32* [%c1, %entry], [%c, %backedge] + %b4 = phi i64* [%b1, %entry], [%b5, %backedge] + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-LABEL: fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO: phi +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: load + %c = phi i32* [%c3, %loop], [%c2, %if.then] + %b6 = phi i64* [%b4, %loop], [%b2, %if.then] + %v = load volatile i32, i32* %c, align 4 + %a4 = getelementptr inbounds i64, i64* %b4, i64 5 + %c4 = bitcast i64* %a4 to i32* + %cmp = icmp slt i32 %iv, 20 + br i1 %cmp, label %backedge, label %if.then.2 + +if.then.2: + br label %backedge + +backedge: + %b5 = phi i64* [%b4, %fallthrough], [%b6, %if.then.2] + %iv.inc = add i32 %iv, 1 + %cmp2 = icmp slt i32 %iv.inc, %N + br i1 %cmp2, label %loop, label %exit + +exit: + ret i32 %v +} + +%struct.S = type {i32, i32} +; Case with index +define i32 @test13(i1 %cond, %struct.S* %b1, %struct.S* %b2, i64 %Index) { +; CHECK-LABEL: @test13 +entry: + %a1 = getelementptr inbounds %struct.S, %struct.S* %b1, i64 %Index, i32 1 + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %i2 = mul i64 %Index, 2 + %a2 = getelementptr inbounds %struct.S, %struct.S* %b2, i64 %Index, i32 1 + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: load + %a = phi i32* [%a1, %entry], [%a2, %if.then] + %v = load i32, i32* %a, align 4 + ret i32 %v +} + +; Select of Select case. +define i64 @test14(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) { +; CHECK-LABEL: @test14 +entry: +; CHECK-LABEL: entry: + %g1 = getelementptr inbounds i64, i64* %b1, i64 5 + %g2 = getelementptr inbounds i64, i64* %b2, i64 5 + %g3 = getelementptr inbounds i64, i64* %b3, i64 5 + %s1 = select i1 %c1, i64* %g1, i64* %g2 + %s2 = select i1 %c2, i64* %s1, i64* %g3 +; CHECK: sunkaddr + %v = load i64 , i64* %s2, align 8 + ret i64 %v +} + +; Select of Phi case. +define i64 @test15(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) { +; CHECK-LABEL: @test15 +entry: + %g1 = getelementptr inbounds i64, i64* %b1, i64 5 + %g2 = getelementptr inbounds i64, i64* %b2, i64 5 + %g3 = getelementptr inbounds i64, i64* %b3, i64 5 + br i1 %c1, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK-LABEL: fallthrough: + %p1 = phi i64* [%g1, %entry], [%g2, %if.then] + %s1 = select i1 %c2, i64* %p1, i64* %g3 +; CHECK-YES: sunkaddr +; CHECK-NO: phi +; CHECK-NO-NEXT: select +; CHECK-NO-NEXT: load + %v = load i64 , i64* %s1, align 8 + ret i64 %v +} + +; Select of Phi case. Phi exists +define i64 @test16(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) { +; CHECK-LABEL: @test16 +entry: + %g1 = getelementptr inbounds i64, i64* %b1, i64 5 + %g2 = getelementptr inbounds i64, i64* %b2, i64 5 + %g3 = getelementptr inbounds i64, i64* %b3, i64 5 + br i1 %c1, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK-LABEL: fallthrough: + %p = phi i64* [%b1, %entry], [%b2, %if.then] + %p1 = phi i64* [%g1, %entry], [%g2, %if.then] + %s1 = select i1 %c2, i64* %p1, i64* %g3 +; CHECK: sunkaddr + %v = load i64 , i64* %s1, align 8 + ret i64 %v +} + +; Phi of Select case. +define i64 @test17(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) { +; CHECK-LABEL: @test17 +entry: + %g1 = getelementptr inbounds i64, i64* %b1, i64 5 + %g2 = getelementptr inbounds i64, i64* %b2, i64 5 + %g3 = getelementptr inbounds i64, i64* %b3, i64 5 + %s1 = select i1 %c2, i64* %g1, i64* %g2 + br i1 %c1, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK-LABEL: fallthrough: + %p1 = phi i64* [%s1, %entry], [%g3, %if.then] +; CHECK-YES: sunkaddr +; CHECK-NO: phi +; CHECK-NO-NEXT: load + %v = load i64 , i64* %p1, align 8 + ret i64 %v +} + +; The same two addr modes by different paths +define i32 @test18(i1 %cond1, i1 %cond2, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test18 +entry: + %g1 = getelementptr inbounds i64, i64* %b2, i64 5 + %bc1 = bitcast i64* %g1 to i32* + br i1 %cond1, label %if.then1, label %if.then2 + +if.then1: + %g2 = getelementptr inbounds i64, i64* %b1, i64 5 + %bc2 = bitcast i64* %g2 to i32* + br label %fallthrough + +if.then2: + %bc1_1 = bitcast i64* %g1 to i32* + br i1 %cond2, label %fallthrough, label %if.then3 + +if.then3: + %bc1_2 = bitcast i64* %g1 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO-NEXT: load + %c = phi i32* [%bc2, %if.then1], [%bc1_1, %if.then2], [%bc1_2, %if.then3] + %v1 = load i32, i32* %c, align 4 + %g1_1 = getelementptr inbounds i64, i64* %b2, i64 5 + %bc1_1_1 = bitcast i64* %g1_1 to i32* + %v2 = load i32, i32* %bc1_1_1, align 4 + %v = add i32 %v1, %v2 + ret i32 %v +} + +; Different types but null is the first? +define i32 @test19(i1 %cond1, i1 %cond2, i64* %b2, i8* %b1) { +; CHECK-LABEL: @test19 +entry: + %g1 = getelementptr inbounds i64, i64* %b2, i64 5 + %bc1 = bitcast i64* %g1 to i32* + br i1 %cond1, label %if.then1, label %if.then2 + +if.then1: + %g2 = getelementptr inbounds i8, i8* %b1, i64 40 + %bc2 = bitcast i8* %g2 to i32* + br label %fallthrough + +if.then2: + %bc1_1 = bitcast i64* %g1 to i32* + br i1 %cond2, label %fallthrough, label %if.then3 + +if.then3: + %g3 = getelementptr inbounds i64, i64* null, i64 5 + %bc1_2 = bitcast i64* %g3 to i32* + br label %fallthrough + +fallthrough: +; CHECK-NOT: sunk_phi + %c = phi i32* [%bc2, %if.then1], [%bc1_1, %if.then2], [%bc1_2, %if.then3] + %v1 = load i32, i32* %c, align 4 + %g1_1 = getelementptr inbounds i64, i64* %b2, i64 5 + %bc1_1_1 = bitcast i64* %g1_1 to i32* + %v2 = load i32, i32* %bc1_1_1, align 4 + %v = add i32 %v1, %v2 + ret i32 %v +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll new file mode 100644 index 00000000000..12edf44a03a --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll @@ -0,0 +1,34 @@ +; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +; Select when both offset and scale reg are present. +define i64 @test1(i1 %c, i64* %b, i64 %scale) { +; CHECK-LABEL: @test1 +entry: +; CHECK-LABEL: entry: + %g = getelementptr inbounds i64, i64* %b, i64 %scale + %g1 = getelementptr inbounds i64, i64* %g, i64 8 + %g2 = getelementptr inbounds i64, i64* %g, i64 16 + %s = select i1 %c, i64* %g1, i64* %g2 +; CHECK-NOT: sunkaddr + %v = load i64 , i64* %s, align 8 + ret i64 %v +} + +@gv1 = external global i8, align 16 +@gv2 = external global i8, align 16 + +; Select when both GV and base reg are present. +define i8 @test2(i1 %c, i64 %b) { +; CHECK-LABEL: @test2 +entry: +; CHECK-LABEL: entry: + %g1 = getelementptr inbounds i8, i8* @gv1, i64 %b + %g2 = getelementptr inbounds i8, i8* @gv2, i64 %b + %s = select i1 %c, i8* %g1, i8* %g2 +; CHECK-NOT: sunkaddr + %v = load i8 , i8* %s, align 8 + ret i8 %v +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll new file mode 100644 index 00000000000..817382a07bd --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll @@ -0,0 +1,27 @@ +; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false %s | FileCheck %s --check-prefix=CHECK +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @test() { +entry: + %0 = getelementptr inbounds i64, i64 * null, i64 undef + br label %start + +start: + %val1 = phi i64 * [ %0, %entry ], [ %val4, %exit ] + %val2 = phi i64 * [ null, %entry ], [ %val5, %exit ] + br i1 false, label %slowpath, label %exit + +slowpath: + %elem1 = getelementptr inbounds i64, i64 * undef, i64 undef + br label %exit + +exit: +; CHECK: sunkaddr + %val3 = phi i64 * [ undef, %slowpath ], [ %val2, %start ] + %val4 = phi i64 * [ %elem1, %slowpath ], [ %val1, %start ] + %val5 = phi i64 * [ undef, %slowpath ], [ %val2, %start ] + %loadx = load i64, i64 * %val4, align 8 + br label %start +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll new file mode 100644 index 00000000000..4d28e06f252 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll @@ -0,0 +1,280 @@ +; RUN: opt -S -codegenprepare < %s | FileCheck %s + +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +@x = external global [1 x [2 x <4 x float>]] + +; Can we sink single addressing mode computation to use? +define void @test1(i1 %cond, i64* %base) { +; CHECK-LABEL: @test1 +; CHECK: getelementptr inbounds i8, {{.+}} 40 +entry: + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %v = load i32, i32* %casted, align 4 + br label %fallthrough + +fallthrough: + ret void +} + +declare void @foo(i32) + +; Make sure sinking two copies of addressing mode into different blocks works +define void @test2(i1 %cond, i64* %base) { +; CHECK-LABEL: @test2 +entry: + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: +; CHECK-LABEL: if.then: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %v1 = load i32, i32* %casted, align 4 + call void @foo(i32 %v1) + %cmp = icmp eq i32 %v1, 0 + br i1 %cmp, label %next, label %fallthrough + +next: +; CHECK-LABEL: next: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %v2 = load i32, i32* %casted, align 4 + call void @foo(i32 %v2) + br label %fallthrough + +fallthrough: + ret void +} + +; If we have two loads in the same block, only need one copy of addressing mode +; - instruction selection will duplicate if needed +define void @test3(i1 %cond, i64* %base) { +; CHECK-LABEL: @test3 +entry: + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: +; CHECK-LABEL: if.then: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %v1 = load i32, i32* %casted, align 4 + call void @foo(i32 %v1) +; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 + %v2 = load i32, i32* %casted, align 4 + call void @foo(i32 %v2) + br label %fallthrough + +fallthrough: + ret void +} + +; Can we still sink addressing mode if there's a cold use of the +; address itself? +define void @test4(i1 %cond, i64* %base) { +; CHECK-LABEL: @test4 +entry: + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: +; CHECK-LABEL: if.then: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %v1 = load i32, i32* %casted, align 4 + call void @foo(i32 %v1) + %cmp = icmp eq i32 %v1, 0 + br i1 %cmp, label %rare.1, label %fallthrough + +fallthrough: + ret void + +rare.1: +; CHECK-LABEL: rare.1: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + call void @slowpath(i32 %v1, i32* %casted) cold + br label %fallthrough +} + +; Negative test - don't want to duplicate addressing into hot path +define void @test5(i1 %cond, i64* %base) { +; CHECK-LABEL: @test5 +entry: +; CHECK: %addr = getelementptr inbounds + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: +; CHECK-LABEL: if.then: +; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 + %v1 = load i32, i32* %casted, align 4 + call void @foo(i32 %v1) + %cmp = icmp eq i32 %v1, 0 + br i1 %cmp, label %rare.1, label %fallthrough + +fallthrough: + ret void + +rare.1: + call void @slowpath(i32 %v1, i32* %casted) ;; NOT COLD + br label %fallthrough +} + +; Negative test - opt for size +define void @test6(i1 %cond, i64* %base) minsize { +; CHECK-LABEL: @test6 +entry: +; CHECK: %addr = getelementptr + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: +; CHECK-LABEL: if.then: +; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 + %v1 = load i32, i32* %casted, align 4 + call void @foo(i32 %v1) + %cmp = icmp eq i32 %v1, 0 + br i1 %cmp, label %rare.1, label %fallthrough + +fallthrough: + ret void + +rare.1: + call void @slowpath(i32 %v1, i32* %casted) cold + br label %fallthrough +} + + +; Make sure sinking two copies of addressing mode into different blocks works +; when there are cold paths for each. +define void @test7(i1 %cond, i64* %base) { +; CHECK-LABEL: @test7 +entry: + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: +; CHECK-LABEL: if.then: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %v1 = load i32, i32* %casted, align 4 + call void @foo(i32 %v1) + %cmp = icmp eq i32 %v1, 0 + br i1 %cmp, label %rare.1, label %next + +next: +; CHECK-LABEL: next: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %v2 = load i32, i32* %casted, align 4 + call void @foo(i32 %v2) + %cmp2 = icmp eq i32 %v2, 0 + br i1 %cmp2, label %rare.1, label %fallthrough + +fallthrough: + ret void + +rare.1: +; CHECK-LABEL: rare.1: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + call void @slowpath(i32 %v1, i32* %casted) cold + br label %next + +rare.2: +; CHECK-LABEL: rare.2: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + call void @slowpath(i32 %v2, i32* %casted) cold + br label %fallthrough +} + +declare void @slowpath(i32, i32*) + +; Make sure we don't end up in an infinite loop after we fail to sink. +; CHECK-LABEL: define void @test8 +; CHECK: %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef +define void @test8() { +allocas: + %aFOO_load = load float*, float** undef + %aFOO_load_ptr2int = ptrtoint float* %aFOO_load to i64 + %aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0 + %aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to i8* + %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef + br label %load.i145 + +load.i145: + %ptr.i143 = bitcast i8* %ptr to <4 x float>* + %valall.i144 = load <4 x float>, <4 x float>* %ptr.i143, align 4 + %x_offset = getelementptr [1 x [2 x <4 x float>]], [1 x [2 x <4 x float>]]* @x, i32 0, i64 0 + br label %pl_loop.i.i122 + +pl_loop.i.i122: + br label %pl_loop.i.i122 +} + +; Make sure we can sink address computation even +; if there is a cycle in phi nodes. +define void @test9(i1 %cond, i64* %base) { +; CHECK-LABEL: @test9 +entry: + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = bitcast i64* %addr to i32* + br label %header + +header: + %iv = phi i32 [0, %entry], [%iv.inc, %backedge] + %casted.loop = phi i32* [%casted, %entry], [%casted.merged, %backedge] + br i1 %cond, label %if.then, label %backedge + +if.then: + call void @foo(i32 %iv) + %addr.1 = getelementptr inbounds i64, i64* %base, i64 5 + %casted.1 = bitcast i64* %addr.1 to i32* + br label %backedge + +backedge: +; CHECK-LABEL: backedge: +; CHECK: getelementptr inbounds i8, {{.+}} 40 + %casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then] + %v = load i32, i32* %casted.merged, align 4 + call void @foo(i32 %v) + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, 1000 + br i1 %cmp, label %header, label %exit + +exit: + ret void +} + +; Make sure we can eliminate a select when both arguments perform equivalent +; address computation. +define void @test10(i1 %cond, i64* %base) { +; CHECK-LABEL: @test10 +; CHECK: getelementptr inbounds i8, {{.+}} 40 +; CHECK-NOT: select +entry: + %gep1 = getelementptr inbounds i64, i64* %base, i64 5 + %gep1.casted = bitcast i64* %gep1 to i32* + %base.casted = bitcast i64* %base to i32* + %gep2 = getelementptr inbounds i32, i32* %base.casted, i64 10 + %casted.merged = select i1 %cond, i32* %gep1.casted, i32* %gep2 + %v = load i32, i32* %casted.merged, align 4 + call void @foo(i32 %v) + ret void +} + +; Found by fuzzer, getSExtValue of > 64 bit constant +define void @i96_mul(i1* %base, i96 %offset) { +BB: + ;; RHS = 0x7FFFFFFFFFFFFFFFFFFFFFFF + %B84 = mul i96 %offset, 39614081257132168796771975167 + %G23 = getelementptr i1, i1* %base, i96 %B84 + store i1 false, i1* %G23 + ret void +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll new file mode 100644 index 00000000000..b716ef9b820 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll @@ -0,0 +1,39 @@ +; RUN: opt -S -codegenprepare < %s | FileCheck %s -check-prefix=CHECK -check-prefix=GEP + +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @load_cast_gep +; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)* +; GEP: getelementptr inbounds i8, i8 addrspace(1)* [[CAST]], i64 40 +define void @load_cast_gep(i1 %cond, i64* %base) { +entry: + %addr = getelementptr inbounds i64, i64* %base, i64 5 + %casted = addrspacecast i64* %addr to i32 addrspace(1)* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %v = load i32, i32 addrspace(1)* %casted, align 4 + br label %fallthrough + +fallthrough: + ret void +} + +; CHECK-LABEL: @store_gep_cast +; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)* +; GEP: getelementptr inbounds i8, i8 addrspace(1)* [[CAST]], i64 20 +define void @store_gep_cast(i1 %cond, i64* %base) { +entry: + %casted = addrspacecast i64* %base to i32 addrspace(1)* + %addr = getelementptr inbounds i32, i32 addrspace(1)* %casted, i64 5 + br i1 %cond, label %if.then, label %fallthrough + +if.then: + store i32 0, i32 addrspace(1)* %addr, align 4 + br label %fallthrough + +fallthrough: + ret void +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll b/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll new file mode 100644 index 00000000000..82c9938336e --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll @@ -0,0 +1,103 @@ +;; x86 is chosen to show the transform when 8-bit and 16-bit registers are available. + +; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X86 +; RUN: opt < %s -debugify -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=DEBUG + +; No change for x86 because 16-bit registers are part of the architecture. + +define i32 @widen_switch_i16(i32 %a) { +entry: + %trunc = trunc i32 %a to i16 + switch i16 %trunc, label %sw.default [ + i16 1, label %sw.bb0 + i16 -1, label %sw.bb1 + ] + +sw.bb0: + br label %return + +sw.bb1: + br label %return + +sw.default: + br label %return + +return: + %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ] + ret i32 %retval + +; X86-LABEL: @widen_switch_i16( +; X86: %trunc = trunc i32 %a to i16 +; X86-NEXT: switch i16 %trunc, label %sw.default [ +; X86-NEXT: i16 1, label %sw.bb0 +; X86-NEXT: i16 -1, label %sw.bb1 +} + +; Widen to 32-bit from a smaller, non-native type. + +define i32 @widen_switch_i17(i32 %a) { +entry: + %trunc = trunc i32 %a to i17 + switch i17 %trunc, label %sw.default [ + i17 10, label %sw.bb0 + i17 -1, label %sw.bb1 + ] + +sw.bb0: + br label %return + +sw.bb1: + br label %return + +sw.default: + br label %return + +return: + %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ] + ret i32 %retval + +; X86-LABEL: @widen_switch_i17( +; X86: %0 = zext i17 %trunc to i32 +; X86-NEXT: switch i32 %0, label %sw.default [ +; X86-NEXT: i32 10, label %sw.bb0 +; X86-NEXT: i32 131071, label %sw.bb1 + +; DEBUG-LABEL: @widen_switch_i17( +; DEBUG: zext i17 %trunc to i32, !dbg [[switch_loc:![0-9]+]] +; DEBUG-NEXT: switch i32 {{.*}} [ +; DEBUG-NEXT: label %sw.bb0 +; DEBUG-NEXT: label %sw.bb1 +; DEBUG-NEXT: ], !dbg [[switch_loc]] +} + +; If the switch condition is a sign-extended function argument, then the +; condition and cases should be sign-extended rather than zero-extended +; because the sign-extension can be optimized away. + +define i32 @widen_switch_i16_sext(i2 signext %a) { +entry: + switch i2 %a, label %sw.default [ + i2 1, label %sw.bb0 + i2 -1, label %sw.bb1 + ] + +sw.bb0: + br label %return + +sw.bb1: + br label %return + +sw.default: + br label %return + +return: + %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ] + ret i32 %retval + +; X86-LABEL: @widen_switch_i16_sext( +; X86: %0 = sext i2 %a to i8 +; X86-NEXT: switch i8 %0, label %sw.default [ +; X86-NEXT: i8 1, label %sw.bb0 +; X86-NEXT: i8 -1, label %sw.bb1 +} + diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll new file mode 100644 index 00000000000..112b63dd773 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll @@ -0,0 +1,180 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -codegenprepare -mcpu=corei7 %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE2 +; RUN: opt -S -codegenprepare -mcpu=bdver2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP +; RUN: opt -S -codegenprepare -mcpu=core-avx2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2 +; RUN: opt -S -codegenprepare -mcpu=skylake-avx512 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512BW + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin10.9.0" + +define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) { +; CHECK-LABEL: @test_8bit( +; CHECK-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if_true: +; CHECK-NEXT: ret <16 x i8> [[MASK]] +; CHECK: if_false: +; CHECK-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <16 x i8> %mask + +if_false: + %res = shl <16 x i8> %lhs, %mask + ret <16 x i8> %res +} + +define <8 x i16> @test_16bit(<8 x i16> %lhs, <8 x i16> %tmp, i1 %tst) { +; CHECK-SSE2-LABEL: @test_16bit( +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-SSE2: if_true: +; CHECK-SSE2-NEXT: ret <8 x i16> [[MASK]] +; CHECK-SSE2: if_false: +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-SSE2-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[TMP1]] +; CHECK-SSE2-NEXT: ret <8 x i16> [[RES]] +; +; CHECK-XOP-LABEL: @test_16bit( +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-XOP: if_true: +; CHECK-XOP-NEXT: ret <8 x i16> [[MASK]] +; CHECK-XOP: if_false: +; CHECK-XOP-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[MASK]] +; CHECK-XOP-NEXT: ret <8 x i16> [[RES]] +; +; CHECK-AVX2-LABEL: @test_16bit( +; CHECK-AVX2-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-AVX2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-AVX2: if_true: +; CHECK-AVX2-NEXT: ret <8 x i16> [[MASK]] +; CHECK-AVX2: if_false: +; CHECK-AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-AVX2-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[TMP1]] +; CHECK-AVX2-NEXT: ret <8 x i16> [[RES]] +; +; CHECK-AVX512BW-LABEL: @test_16bit( +; CHECK-AVX512BW-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-AVX512BW-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-AVX512BW: if_true: +; CHECK-AVX512BW-NEXT: ret <8 x i16> [[MASK]] +; CHECK-AVX512BW: if_false: +; CHECK-AVX512BW-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[MASK]] +; CHECK-AVX512BW-NEXT: ret <8 x i16> [[RES]] +; + %mask = shufflevector <8 x i16> %tmp, <8 x i16> undef, <8 x i32> zeroinitializer + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <8 x i16> %mask + +if_false: + %res = shl <8 x i16> %lhs, %mask + ret <8 x i16> %res +} + +define <4 x i32> @test_notsplat(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { +; CHECK-LABEL: @test_notsplat( +; CHECK-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0> +; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if_true: +; CHECK-NEXT: ret <4 x i32> [[MASK]] +; CHECK: if_false: +; CHECK-NEXT: [[RES:%.*]] = shl <4 x i32> [[LHS:%.*]], [[MASK]] +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0> + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <4 x i32> %mask + +if_false: + %res = shl <4 x i32> %lhs, %mask + ret <4 x i32> %res +} + +define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { +; CHECK-SSE2-LABEL: @test_32bit( +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0> +; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-SSE2: if_true: +; CHECK-SSE2-NEXT: ret <4 x i32> [[MASK]] +; CHECK-SSE2: if_false: +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0> +; CHECK-SSE2-NEXT: [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[TMP1]] +; CHECK-SSE2-NEXT: ret <4 x i32> [[RES]] +; +; CHECK-XOP-LABEL: @test_32bit( +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0> +; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-XOP: if_true: +; CHECK-XOP-NEXT: ret <4 x i32> [[MASK]] +; CHECK-XOP: if_false: +; CHECK-XOP-NEXT: [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[MASK]] +; CHECK-XOP-NEXT: ret <4 x i32> [[RES]] +; +; CHECK-AVX-LABEL: @test_32bit( +; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0> +; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-AVX: if_true: +; CHECK-AVX-NEXT: ret <4 x i32> [[MASK]] +; CHECK-AVX: if_false: +; CHECK-AVX-NEXT: [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[MASK]] +; CHECK-AVX-NEXT: ret <4 x i32> [[RES]] +; + %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0> + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <4 x i32> %mask + +if_false: + %res = ashr <4 x i32> %lhs, %mask + ret <4 x i32> %res +} + +define <2 x i64> @test_64bit(<2 x i64> %lhs, <2 x i64> %tmp, i1 %tst) { +; CHECK-SSE2-LABEL: @test_64bit( +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-SSE2: if_true: +; CHECK-SSE2-NEXT: ret <2 x i64> [[MASK]] +; CHECK-SSE2: if_false: +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-SSE2-NEXT: [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[TMP1]] +; CHECK-SSE2-NEXT: ret <2 x i64> [[RES]] +; +; CHECK-XOP-LABEL: @test_64bit( +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-XOP: if_true: +; CHECK-XOP-NEXT: ret <2 x i64> [[MASK]] +; CHECK-XOP: if_false: +; CHECK-XOP-NEXT: [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[MASK]] +; CHECK-XOP-NEXT: ret <2 x i64> [[RES]] +; +; CHECK-AVX-LABEL: @test_64bit( +; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-AVX: if_true: +; CHECK-AVX-NEXT: ret <2 x i64> [[MASK]] +; CHECK-AVX: if_false: +; CHECK-AVX-NEXT: [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[MASK]] +; CHECK-AVX-NEXT: ret <2 x i64> [[RES]] +; + %mask = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer + br i1 %tst, label %if_true, label %if_false + +if_true: + ret <2 x i64> %mask + +if_false: + %res = lshr <2 x i64> %lhs, %mask + ret <2 x i64> %res +} |