summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/CodeGenPrepare/X86
diff options
context:
space:
mode:
authorEric Christopher <echristo@gmail.com>2019-04-17 04:52:47 +0000
committerEric Christopher <echristo@gmail.com>2019-04-17 04:52:47 +0000
commitcee313d288a4faf0355d76fb6e0e927e211d08a5 (patch)
treed386075318d761197779a96e5d8fc0dc7b06342b /llvm/test/Transforms/CodeGenPrepare/X86
parentc3d6a929fdd92fd06d4304675ade8d7210ee711a (diff)
downloadbcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.tar.gz
bcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.zip
Revert "Temporarily Revert "Add basic loop fusion pass.""
The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552
Diffstat (limited to 'llvm/test/Transforms/CodeGenPrepare/X86')
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll117
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll294
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll56
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll128
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll64
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll29
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/lit.local.cfg3
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll18
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/multi-extension.ll25
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll34
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll519
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/pr27536.ll32
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/pr35658.ll21
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/select.ll205
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll543
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll34
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll27
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll280
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll39
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll103
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll180
21 files changed, 2751 insertions, 0 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll b/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
new file mode 100644
index 00000000000..1121abb7314
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
@@ -0,0 +1,117 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+; The following target lines are needed for the test to exercise what it should.
+; Without these lines, CodeGenPrepare does not try to sink the bitcasts.
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @f()
+
+declare void @g(i8*)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+; CodeGenPrepare will want to sink these bitcasts, but it selects the catchpad
+; blocks as the place to which the bitcast should be sunk. Since catchpads
+; do not allow non-phi instructions before the terminator, this isn't possible.
+
+; CHECK-LABEL: @test(
+define void @test(i32* %addr) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %x = getelementptr i32, i32* %addr, i32 1
+ %p1 = bitcast i32* %x to i8*
+ invoke void @f()
+ to label %invoke.cont unwind label %catch1
+
+; CHECK: invoke.cont:
+; CHECK-NEXT: %y = getelementptr i32, i32* %addr, i32 2
+invoke.cont:
+ %y = getelementptr i32, i32* %addr, i32 2
+ %p2 = bitcast i32* %y to i8*
+ invoke void @f()
+ to label %done unwind label %catch2
+
+done:
+ ret void
+
+catch1:
+ %cs1 = catchswitch within none [label %handler1] unwind to caller
+
+handler1:
+ %cp1 = catchpad within %cs1 []
+ br label %catch.shared
+; CHECK: handler1:
+; CHECK-NEXT: catchpad within %cs1
+; CHECK: %[[p1:[0-9]+]] = bitcast i32* %x to i8*
+
+catch2:
+ %cs2 = catchswitch within none [label %handler2] unwind to caller
+
+handler2:
+ %cp2 = catchpad within %cs2 []
+ br label %catch.shared
+; CHECK: handler2:
+; CHECK: catchpad within %cs2
+; CHECK: %[[p2:[0-9]+]] = bitcast i32* %y to i8*
+
+; CHECK: catch.shared:
+; CHECK-NEXT: %p = phi i8* [ %[[p1]], %handler1 ], [ %[[p2]], %handler2 ]
+catch.shared:
+ %p = phi i8* [ %p1, %handler1 ], [ %p2, %handler2 ]
+ call void @g(i8* %p)
+ unreachable
+}
+
+; CodeGenPrepare will want to hoist these llvm.dbg.value calls to the phi, but
+; there is no insertion point in a catchpad block.
+
+; CHECK-LABEL: @test_dbg_value(
+define void @test_dbg_value() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %a = alloca i8
+ %b = alloca i8
+ invoke void @f() to label %next unwind label %catch.dispatch
+next:
+ invoke void @f() to label %ret unwind label %catch.dispatch
+ret:
+ ret void
+
+catch.dispatch:
+ %p = phi i8* [%a, %entry], [%b, %next]
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+ %cp1 = catchpad within %cs1 []
+ tail call void @llvm.dbg.value(metadata i8* %p, i64 0, metadata !11, metadata !13), !dbg !14
+ call void @g(i8* %p)
+ catchret from %cp1 to label %ret
+
+; CHECK: catch.dispatch:
+; CHECK-NEXT: phi i8
+; CHECK-NEXT: catchswitch
+; CHECK-NOT: llvm.dbg.value
+
+; CHECK: catch:
+; CHECK-NEXT: catchpad
+; CHECK-NEXT: call void @llvm.dbg.value
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: null)
+!1 = !DIFile(filename: "t.c", directory: "D:\5Csrc\5Cllvm\5Cbuild")
+!4 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, retainedNodes: null)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)"}
+!11 = !DILocalVariable(name: "p", scope: !4, file: !1, line: 2, type: !12)
+!12 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!13 = !DIExpression(DW_OP_deref)
+!14 = !DILocation(line: 2, column: 8, scope: !4)
+!15 = !DILocation(line: 3, column: 1, scope: !4)
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll b/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll
new file mode 100644
index 00000000000..6a3804f2a75
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll
@@ -0,0 +1,294 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @use(i32) local_unnamed_addr
+declare void @useptr([2 x i8*]*) local_unnamed_addr
+
+; CHECK: @simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16
+@simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16
+
+; CHECK: @multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16
+@multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16
+
+; CHECK: @loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16
+@loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16
+
+; CHECK: @nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16
+@nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16
+
+; CHECK: @noncritical.targets = constant [2 x i8*] [i8* blockaddress(@noncritical, %bb0), i8* blockaddress(@noncritical, %bb1)], align 16
+@noncritical.targets = constant [2 x i8*] [i8* blockaddress(@noncritical, %bb0), i8* blockaddress(@noncritical, %bb1)], align 16
+
+; Check that we break the critical edge when an jump table has only one use.
+define void @simple(i32* nocapture readonly %p) {
+; CHECK-LABEL: @simple(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT: [[INITVAL:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
+; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
+; CHECK-NEXT: i32 0, label [[BB0_CLONE:%.*]]
+; CHECK-NEXT: i32 1, label [[BB1_CLONE:%.*]]
+; CHECK-NEXT: ]
+; CHECK: bb0:
+; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
+; CHECK: .split:
+; CHECK-NEXT: [[MERGE:%.*]] = phi i32* [ [[PTR:%.*]], [[BB0:%.*]] ], [ [[INCDEC_PTR]], [[BB0_CLONE]] ]
+; CHECK-NEXT: [[MERGE2:%.*]] = phi i32 [ 0, [[BB0]] ], [ [[INITVAL]], [[BB0_CLONE]] ]
+; CHECK-NEXT: tail call void @use(i32 [[MERGE2]])
+; CHECK-NEXT: br label [[INDIRECTGOTO:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[DOTSPLIT3:%.*]]
+; CHECK: .split3:
+; CHECK-NEXT: [[MERGE5:%.*]] = phi i32* [ [[PTR]], [[BB1:%.*]] ], [ [[INCDEC_PTR]], [[BB1_CLONE]] ]
+; CHECK-NEXT: [[MERGE7:%.*]] = phi i32 [ 1, [[BB1]] ], [ [[INITVAL]], [[BB1_CLONE]] ]
+; CHECK-NEXT: tail call void @use(i32 [[MERGE7]])
+; CHECK-NEXT: br label [[INDIRECTGOTO]]
+; CHECK: indirectgoto:
+; CHECK-NEXT: [[P_ADDR_SINK:%.*]] = phi i32* [ [[MERGE5]], [[DOTSPLIT3]] ], [ [[MERGE]], [[DOTSPLIT]] ]
+; CHECK-NEXT: [[PTR]] = getelementptr inbounds i32, i32* [[P_ADDR_SINK]], i64 1
+; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[P_ADDR_SINK]], align 4
+; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 [[IDX]]
+; CHECK-NEXT: [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+; CHECK-NEXT: indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: bb0.clone:
+; CHECK-NEXT: br label [[DOTSPLIT]]
+; CHECK: bb1.clone:
+; CHECK-NEXT: br label [[DOTSPLIT3]]
+;
+entry:
+ %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %initval = load i32, i32* %p, align 4
+ %initop = load i32, i32* %incdec.ptr, align 4
+ switch i32 %initop, label %exit [
+ i32 0, label %bb0
+ i32 1, label %bb1
+ ]
+
+bb0:
+ %p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ]
+ %opcode.0 = phi i32 [ %initval, %entry ], [ 0, %indirectgoto ]
+ tail call void @use(i32 %opcode.0)
+ br label %indirectgoto
+
+bb1:
+ %p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ]
+ %opcode.1 = phi i32 [ %initval, %entry ], [ 1, %indirectgoto ]
+ tail call void @use(i32 %opcode.1)
+ br label %indirectgoto
+
+indirectgoto:
+ %p.addr.sink = phi i32* [ %p.addr.1, %bb1 ], [ %p.addr.0, %bb0 ]
+ %ptr = getelementptr inbounds i32, i32* %p.addr.sink, i64 1
+ %newp = load i32, i32* %p.addr.sink, align 4
+ %idx = sext i32 %newp to i64
+ %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 %idx
+ %newop = load i8*, i8** %arrayidx, align 8
+ indirectbr i8* %newop, [label %bb0, label %bb1]
+
+exit:
+ ret void
+}
+
+; Don't try to break critical edges when several indirectbr point to a single block
+define void @multi(i32* nocapture readonly %p) {
+; CHECK-LABEL: @multi(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT: [[INITVAL:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
+; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
+; CHECK-NEXT: i32 0, label [[BB0:%.*]]
+; CHECK-NEXT: i32 1, label [[BB1:%.*]]
+; CHECK-NEXT: ]
+; CHECK: bb0:
+; CHECK-NEXT: [[P_ADDR_0:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY:%.*]] ], [ [[NEXT0:%.*]], [[BB0]] ], [ [[NEXT1:%.*]], [[BB1]] ]
+; CHECK-NEXT: [[OPCODE_0:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ]
+; CHECK-NEXT: tail call void @use(i32 [[OPCODE_0]])
+; CHECK-NEXT: [[NEXT0]] = getelementptr inbounds i32, i32* [[P_ADDR_0]], i64 1
+; CHECK-NEXT: [[NEWP0:%.*]] = load i32, i32* [[P_ADDR_0]], align 4
+; CHECK-NEXT: [[IDX0:%.*]] = sext i32 [[NEWP0]] to i64
+; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX0]]
+; CHECK-NEXT: [[NEWOP0:%.*]] = load i8*, i8** [[ARRAYIDX0]], align 8
+; CHECK-NEXT: indirectbr i8* [[NEWOP0]], [label [[BB0]], label %bb1]
+; CHECK: bb1:
+; CHECK-NEXT: [[P_ADDR_1:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY]] ], [ [[NEXT0]], [[BB0]] ], [ [[NEXT1]], [[BB1]] ]
+; CHECK-NEXT: [[OPCODE_1:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ]
+; CHECK-NEXT: tail call void @use(i32 [[OPCODE_1]])
+; CHECK-NEXT: [[NEXT1]] = getelementptr inbounds i32, i32* [[P_ADDR_1]], i64 1
+; CHECK-NEXT: [[NEWP1:%.*]] = load i32, i32* [[P_ADDR_1]], align 4
+; CHECK-NEXT: [[IDX1:%.*]] = sext i32 [[NEWP1]] to i64
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX1]]
+; CHECK-NEXT: [[NEWOP1:%.*]] = load i8*, i8** [[ARRAYIDX1]], align 8
+; CHECK-NEXT: indirectbr i8* [[NEWOP1]], [label [[BB0]], label %bb1]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %initval = load i32, i32* %p, align 4
+ %initop = load i32, i32* %incdec.ptr, align 4
+ switch i32 %initop, label %exit [
+ i32 0, label %bb0
+ i32 1, label %bb1
+ ]
+
+bb0:
+ %p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ]
+ %opcode.0 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ]
+ tail call void @use(i32 %opcode.0)
+ %next0 = getelementptr inbounds i32, i32* %p.addr.0, i64 1
+ %newp0 = load i32, i32* %p.addr.0, align 4
+ %idx0 = sext i32 %newp0 to i64
+ %arrayidx0 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx0
+ %newop0 = load i8*, i8** %arrayidx0, align 8
+ indirectbr i8* %newop0, [label %bb0, label %bb1]
+
+bb1:
+ %p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ]
+ %opcode.1 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ]
+ tail call void @use(i32 %opcode.1)
+ %next1 = getelementptr inbounds i32, i32* %p.addr.1, i64 1
+ %newp1 = load i32, i32* %p.addr.1, align 4
+ %idx1 = sext i32 %newp1 to i64
+ %arrayidx1 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx1
+ %newop1 = load i8*, i8** %arrayidx1, align 8
+ indirectbr i8* %newop1, [label %bb0, label %bb1]
+
+exit:
+ ret void
+}
+
+; Make sure we do the right thing for cases where the indirectbr branches to
+; the block it terminates.
+define void @loop(i64* nocapture readonly %p) {
+; CHECK-LABEL: @loop(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
+; CHECK: bb0:
+; CHECK-NEXT: br label [[DOTSPLIT]]
+; CHECK: .split:
+; CHECK-NEXT: [[MERGE:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[BB0:%.*]] ], [ 0, [[BB0_CLONE:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[MERGE]]
+; CHECK-NEXT: store i64 [[MERGE]], i64* [[TMP0]], align 4
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[MERGE]], 1
+; CHECK-NEXT: [[IDX:%.*]] = srem i64 [[MERGE]], 2
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 [[IDX]]
+; CHECK-NEXT: [[TARGET:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+; CHECK-NEXT: indirectbr i8* [[TARGET]], [label [[BB0]], label %bb1]
+; CHECK: bb1:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %bb0
+
+bb0:
+ %i = phi i64 [ %i.next, %bb0 ], [ 0, %entry ]
+ %tmp0 = getelementptr inbounds i64, i64* %p, i64 %i
+ store i64 %i, i64* %tmp0, align 4
+ %i.next = add nuw nsw i64 %i, 1
+ %idx = srem i64 %i, 2
+ %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 %idx
+ %target = load i8*, i8** %arrayidx, align 8
+ indirectbr i8* %target, [label %bb0, label %bb1]
+
+bb1:
+ ret void
+}
+
+; Don't do anything for cases that contain no phis.
+define void @nophi(i32* %p) {
+; CHECK-LABEL: @nophi(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
+; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
+; CHECK-NEXT: i32 0, label [[BB0:%.*]]
+; CHECK-NEXT: i32 1, label [[BB1:%.*]]
+; CHECK-NEXT: ]
+; CHECK: bb0:
+; CHECK-NEXT: tail call void @use(i32 0)
+; CHECK-NEXT: br label [[INDIRECTGOTO:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: tail call void @use(i32 1)
+; CHECK-NEXT: br label [[INDIRECTGOTO]]
+; CHECK: indirectgoto:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to i8*
+; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 4
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR]] to i32*
+; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 [[IDX]]
+; CHECK-NEXT: [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+; CHECK-NEXT: indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %initop = load i32, i32* %incdec.ptr, align 4
+ switch i32 %initop, label %exit [
+ i32 0, label %bb0
+ i32 1, label %bb1
+ ]
+
+bb0:
+ tail call void @use(i32 0) br label %indirectgoto
+
+bb1:
+ tail call void @use(i32 1)
+ br label %indirectgoto
+
+indirectgoto:
+ %newp = load i32, i32* %incdec.ptr, align 4
+ %idx = sext i32 %newp to i64
+ %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 %idx
+ %newop = load i8*, i8** %arrayidx, align 8
+ indirectbr i8* %newop, [label %bb0, label %bb1]
+
+exit:
+ ret void
+}
+
+; Don't do anything if the edge isn't critical.
+define i32 @noncritical(i32 %k, i8* %p)
+; CHECK-LABEL: @noncritical(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[D:%.*]] = add i32 [[K:%.*]], 1
+; CHECK-NEXT: indirectbr i8* [[P:%.*]], [label [[BB0:%.*]], label %bb1]
+; CHECK: bb0:
+; CHECK-NEXT: [[R0:%.*]] = sub i32 [[K]], [[D]]
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[R1:%.*]] = sub i32 [[D]], [[K]]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[R0]], [[BB0]] ], [ [[R1]], [[BB1:%.*]] ]
+; CHECK-NEXT: ret i32 0
+;
+{
+entry:
+ %d = add i32 %k, 1
+ indirectbr i8* %p, [label %bb0, label %bb1]
+
+bb0:
+ %v00 = phi i32 [%k, %entry]
+ %v01 = phi i32 [%d, %entry]
+ %r0 = sub i32 %v00, %v01
+ br label %exit
+
+bb1:
+ %v10 = phi i32 [%d, %entry]
+ %v11 = phi i32 [%k, %entry]
+ %r1 = sub i32 %v10, %v11
+ br label %exit
+
+exit:
+ %v = phi i32 [%r0, %bb0], [%r1, %bb1]
+ ret i32 0
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll b/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
new file mode 100644
index 00000000000..72d82e2a162
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s --check-prefix=SLOW
+; RUN: opt -S -codegenprepare -mattr=+bmi < %s | FileCheck %s --check-prefix=FAST_TZ
+; RUN: opt -S -codegenprepare -mattr=+lzcnt < %s | FileCheck %s --check-prefix=FAST_LZ
+
+target triple = "x86_64-unknown-unknown"
+target datalayout = "e-n32:64"
+
+; If the intrinsic is cheap, nothing should change.
+; If the intrinsic is expensive, check if the input is zero to avoid the call.
+; This is undoing speculation that may have been created by SimplifyCFG + InstCombine.
+
+define i64 @cttz(i64 %A) {
+entry:
+ %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+ ret i64 %z
+
+; SLOW-LABEL: @cttz(
+; SLOW: entry:
+; SLOW: %cmpz = icmp eq i64 %A, 0
+; SLOW: br i1 %cmpz, label %cond.end, label %cond.false
+; SLOW: cond.false:
+; SLOW: %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
+; SLOW: br label %cond.end
+; SLOW: cond.end:
+; SLOW: %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+; SLOW: ret i64 %ctz
+
+; FAST_TZ-LABEL: @cttz(
+; FAST_TZ: %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+; FAST_TZ: ret i64 %z
+}
+
+define i64 @ctlz(i64 %A) {
+entry:
+ %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+ ret i64 %z
+
+; SLOW-LABEL: @ctlz(
+; SLOW: entry:
+; SLOW: %cmpz = icmp eq i64 %A, 0
+; SLOW: br i1 %cmpz, label %cond.end, label %cond.false
+; SLOW: cond.false:
+; SLOW: %z = call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+; SLOW: br label %cond.end
+; SLOW: cond.end:
+; SLOW: %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+; SLOW: ret i64 %ctz
+
+; FAST_LZ-LABEL: @ctlz(
+; FAST_LZ: %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+; FAST_LZ: ret i64 %z
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll b/llvm/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll
new file mode 100644
index 00000000000..51d1e0ab676
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll
@@ -0,0 +1,128 @@
+; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+
+@a = global [10 x i8] zeroinitializer, align 1
+declare void @foo()
+
+; ext(and(ld, cst)) -> and(ext(ld), ext(cst))
+define void @test1(i32* %p, i32 %ll) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT: entry:
+; CHECK-NEXT: load
+; CHECK-NEXT: zext
+; CHECK-NEXT: and
+entry:
+ %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
+ %and = and i8 %tmp, 60
+ %cmp = icmp ugt i8 %and, 20
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %conv2 = zext i8 %and to i32
+ %add = add nsw i32 %conv2, %ll
+ store i32 %add, i32* %p, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ tail call void @foo()
+ ret void
+}
+
+; ext(or(ld, cst)) -> or(ext(ld), ext(cst))
+define void @test2(i32* %p, i32 %ll) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: entry:
+; CHECK-NEXT: load
+; CHECK-NEXT: zext
+; CHECK-NEXT: or
+entry:
+ %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
+ %or = or i8 %tmp, 60
+ %cmp = icmp ugt i8 %or, 20
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %conv2 = zext i8 %or to i32
+ %add = add nsw i32 %conv2, %ll
+ store i32 %add, i32* %p, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ tail call void @foo()
+ ret void
+}
+
+; ext(and(shl(ld, cst), cst)) -> and(shl(ext(ld), ext(cst)), ext(cst))
+define void @test3(i32* %p, i32 %ll) {
+; CHECK-LABEL: @test3
+; CHECK-NEXT: entry:
+; CHECK-NEXT: load
+; CHECK-NEXT: zext
+; CHECK-NEXT: shl
+; CHECK-NEXT: and
+entry:
+ %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
+ %shl = shl i8 %tmp, 2
+ %and = and i8 %shl, 60
+ %cmp = icmp ugt i8 %and, 20
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %conv2 = zext i8 %and to i32
+ %add = add nsw i32 %conv2, %ll
+ store i32 %add, i32* %p, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ tail call void @foo()
+ ret void
+}
+
+; zext(shrl(ld, cst)) -> shrl(zext(ld), zext(cst))
+define void @test4(i32* %p, i32 %ll) {
+; CHECK-LABEL: @test4
+; CHECK-NEXT: entry:
+; CHECK-NEXT: load
+; CHECK-NEXT: zext
+; CHECK-NEXT: lshr
+entry:
+ %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
+ %lshr = lshr i8 %tmp, 2
+ %cmp = icmp ugt i8 %lshr, 20
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %conv2 = zext i8 %lshr to i32
+ %add = add nsw i32 %conv2, %ll
+ store i32 %add, i32* %p, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ tail call void @foo()
+ ret void
+}
+
+; ext(xor(ld, cst)) -> xor(ext(ld), ext(cst))
+define void @test5(i32* %p, i32 %ll) {
+; CHECK-LABEL: @test5
+; CHECK-NEXT: entry:
+; CHECK-NEXT: load
+; CHECK-NEXT: zext
+; CHECK-NEXT: xor
+entry:
+ %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
+ %xor = xor i8 %tmp, 60
+ %cmp = icmp ugt i8 %xor, 20
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %conv2 = zext i8 %xor to i32
+ %add = add nsw i32 %conv2, %ll
+ store i32 %add, i32* %p, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ tail call void @foo()
+ ret void
+}
+
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll b/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll
new file mode 100644
index 00000000000..519e1ee2ce6
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll
@@ -0,0 +1,64 @@
+; RUN: opt -codegenprepare -disable-cgp-branch-opts -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The first cast should be sunk into block2, in order that the
+; instruction selector can form an efficient
+; i64 * i64 -> i128 multiplication.
+define i128 @sink(i64* %mem1, i64* %mem2) {
+; CHECK-LABEL: block1:
+; CHECK-NEXT: load
+block1:
+ %l1 = load i64, i64* %mem1
+ %s1 = sext i64 %l1 to i128
+ br label %block2
+
+; CHECK-LABEL: block2:
+; CHECK-NEXT: sext
+; CHECK-NEXT: load
+; CHECK-NEXT: sext
+block2:
+ %l2 = load i64, i64* %mem2
+ %s2 = sext i64 %l2 to i128
+ %res = mul i128 %s1, %s2
+ ret i128 %res
+}
+
+; The first cast should be hoisted into block1, in order that the
+; instruction selector can form an extend-load.
+define i64 @hoist(i32* %mem1, i32* %mem2) {
+; CHECK-LABEL: block1:
+; CHECK-NEXT: load
+; CHECK-NEXT: sext
+block1:
+ %l1 = load i32, i32* %mem1
+ br label %block2
+
+; CHECK-LABEL: block2:
+; CHECK-NEXT: load
+; CHECK-NEXT: sext
+block2:
+ %s1 = sext i32 %l1 to i64
+ %l2 = load i32, i32* %mem2
+ %s2 = sext i32 %l2 to i64
+ %res = mul i64 %s1, %s2
+ ret i64 %res
+}
+
+; Make sure the cast sink logic and OptimizeExtUses don't end up in an infinite
+; loop.
+define i128 @use_ext_source() {
+block1:
+ %v1 = or i64 undef, undef
+ %v2 = zext i64 %v1 to i128
+ br i1 undef, label %block2, label %block3
+
+block2:
+ %v3 = add i64 %v1, 1
+ %v4 = zext i64 %v3 to i128
+ br label %block3
+
+block3:
+ %res = phi i128 [ %v2, %block1 ], [ %v4, %block2 ]
+ ret i128 %res
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll
new file mode 100644
index 00000000000..94ab74f9e7b
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll
@@ -0,0 +1,29 @@
+; RUN: opt %s -codegenprepare -mattr=+soft-float -S | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFP
+; RUN: opt %s -codegenprepare -mattr=-soft-float -S | FileCheck %s -check-prefix=CHECK -check-prefix=HARDFP
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @foo
+; CHECK: entry:
+; SOFTFP: fcmp
+; HARDFP-NOT: fcmp
+; CHECK: body:
+; SOFTFP-NOT: fcmp
+; HARDFP: fcmp
+define void @foo(float %a, float %b) {
+entry:
+ %c = fcmp oeq float %a, %b
+ br label %head
+head:
+ %IND = phi i32 [ 0, %entry ], [ %IND.new, %body1 ]
+ %CMP = icmp slt i32 %IND, 1250
+ br i1 %CMP, label %body, label %tail
+body:
+ br i1 %c, label %body1, label %tail
+body1:
+ %IND.new = add i32 %IND, 1
+ br label %head
+tail:
+ ret void
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/lit.local.cfg b/llvm/test/Transforms/CodeGenPrepare/X86/lit.local.cfg
new file mode 100644
index 00000000000..e71f3cc4c41
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'X86' in config.root.targets:
+ config.unsupported = True
+
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll b/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll
new file mode 100644
index 00000000000..f4c1af5ed46
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll
@@ -0,0 +1,18 @@
+; RUN: opt -S -disable-simplify-libcalls -codegenprepare < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; This is a workaround for PR23093: when building with -mkernel/-fno-builtin,
+; we still generate fortified library calls.
+
+; Check that we ignore two things:
+; - attribute nobuiltin
+; - TLI::has (always returns false thanks to -disable-simplify-libcalls)
+
+; CHECK-NOT: _chk
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %len, i1 false)
+define void @test_nobuiltin(i8* %dst, i64 %len) {
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 %len, i64 -1) nobuiltin
+ ret void
+}
+
+declare i8* @__memset_chk(i8*, i32, i64, i64)
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/multi-extension.ll b/llvm/test/Transforms/CodeGenPrepare/X86/multi-extension.ll
new file mode 100644
index 00000000000..950f9f2e04a
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/multi-extension.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.13.0"
+
+declare void @bar(i64)
+
+@b = global i16 0, align 2
+
+; This test case is extracted from PR38125.
+; %or is reachable by both a sext and zext that are going to be promoted.
+; It ensures correct operation on PromotedInsts.
+
+; CHECK: %promoted = trunc i32 %or to i16
+; CHECK-NEXT: %c = sext i16 %promoted to i64
+define i32 @foo(i16 %kkk) {
+entry:
+ %t4 = load i16, i16* @b, align 2
+ %conv4 = zext i16 %t4 to i32
+ %or = or i16 %kkk, %t4
+ %c = sext i16 %or to i64
+ call void @bar(i64 %c)
+ %t5 = and i16 %or, 5
+ %z = zext i16 %t5 to i32
+ ret i32 %z
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll b/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll
new file mode 100644
index 00000000000..dc638425355
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i1 @PR41004(i32 %x, i32 %y, i32 %t1) {
+; CHECK-LABEL: @PR41004(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[T0:%.*]] = icmp eq i32 [[Y:%.*]], 1
+; CHECK-NEXT: br i1 [[T0]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]]
+; CHECK: select.true.sink:
+; CHECK-NEXT: [[REM:%.*]] = srem i32 [[X:%.*]], 2
+; CHECK-NEXT: br label [[SELECT_END]]
+; CHECK: select.end:
+; CHECK-NEXT: [[MUL:%.*]] = phi i32 [ [[REM]], [[SELECT_TRUE_SINK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[T1:%.*]], i32 1)
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MATH]], [[MUL]]
+; CHECK-NEXT: ret i1 [[OV]]
+;
+entry:
+ %rem = srem i32 %x, 2
+ %t0 = icmp eq i32 %y, 1
+ %mul = select i1 %t0, i32 %rem, i32 0
+ %neg = add i32 %t1, -1
+ %add = add i32 %neg, %mul
+ br label %if
+
+if:
+ %tobool = icmp eq i32 %t1, 0
+ ret i1 %tobool
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
new file mode 100644
index 00000000000..ab636c39ddb
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
@@ -0,0 +1,519 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+; RUN: opt -enable-debugify -codegenprepare -S < %s 2>&1 | FileCheck %s -check-prefix=DEBUG
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define i64 @uaddo1(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: @uaddo1(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
+; CHECK-NEXT: ret i64 [[Q]]
+;
+ %add = add i64 %b, %a
+ %cmp = icmp ult i64 %add, %a
+ %Q = select i1 %cmp, i64 %b, i64 42
+ ret i64 %Q
+}
+
+define i64 @uaddo2(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: @uaddo2(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
+; CHECK-NEXT: ret i64 [[Q]]
+;
+ %add = add i64 %b, %a
+ %cmp = icmp ult i64 %add, %b
+ %Q = select i1 %cmp, i64 %b, i64 42
+ ret i64 %Q
+}
+
+define i64 @uaddo3(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: @uaddo3(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
+; CHECK-NEXT: ret i64 [[Q]]
+;
+ %add = add i64 %b, %a
+ %cmp = icmp ugt i64 %b, %add
+ %Q = select i1 %cmp, i64 %b, i64 42
+ ret i64 %Q
+}
+
+define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
+; CHECK-LABEL: @uaddo4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]]
+; CHECK: next:
+; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
+; CHECK-NEXT: ret i64 [[Q]]
+; CHECK: exit:
+; CHECK-NEXT: ret i64 0
+;
+entry:
+ %add = add i64 %b, %a
+ %cmp = icmp ugt i64 %b, %add
+ br i1 %c, label %next, label %exit
+
+next:
+ %Q = select i1 %cmp, i64 %b, i64 42
+ ret i64 %Q
+
+exit:
+ ret i64 0
+}
+
+define i64 @uaddo5(i64 %a, i64 %b, i64* %ptr, i1 %c) nounwind ssp {
+; CHECK-LABEL: @uaddo5(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT: store i64 [[ADD]], i64* [[PTR:%.*]]
+; CHECK-NEXT: br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]]
+; CHECK: next:
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]]
+; CHECK-NEXT: [[Q:%.*]] = select i1 [[TMP0]], i64 [[B]], i64 42
+; CHECK-NEXT: ret i64 [[Q]]
+; CHECK: exit:
+; CHECK-NEXT: ret i64 0
+;
+entry:
+ %add = add i64 %b, %a
+ store i64 %add, i64* %ptr
+ %cmp = icmp ugt i64 %b, %add
+ br i1 %c, label %next, label %exit
+
+next:
+ %Q = select i1 %cmp, i64 %b, i64 42
+ ret i64 %Q
+
+exit:
+ ret i64 0
+}
+
+; When adding 1, the general pattern for add-overflow may be different due to icmp canonicalization.
+; PR31754: https://bugs.llvm.org/show_bug.cgi?id=31754
+
+define i1 @uaddo_i64_increment(i64 %x, i64* %p) {
+; CHECK-LABEL: @uaddo_i64_increment(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 1)
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %a = add i64 %x, 1
+ %ov = icmp eq i64 %a, 0
+ store i64 %a, i64* %p
+ ret i1 %ov
+}
+
+define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, i8* %p) {
+; CHECK-LABEL: @uaddo_i8_increment_noncanonical_1(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 1, i8 [[X:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i8 [[MATH]], i8* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %a = add i8 1, %x ; commute
+ %ov = icmp eq i8 %a, 0
+ store i8 %a, i8* %p
+ ret i1 %ov
+}
+
+define i1 @uaddo_i32_increment_noncanonical_2(i32 %x, i32* %p) {
+; CHECK-LABEL: @uaddo_i32_increment_noncanonical_2(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 1)
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i32 [[MATH]], i32* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %a = add i32 %x, 1
+ %ov = icmp eq i32 0, %a ; commute
+ store i32 %a, i32* %p
+ ret i1 %ov
+}
+
+define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, i16* %p) {
+; CHECK-LABEL: @uaddo_i16_increment_noncanonical_3(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 1, i16 [[X:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i16 [[MATH]], i16* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %a = add i16 1, %x ; commute
+ %ov = icmp eq i16 0, %a ; commute
+ store i16 %a, i16* %p
+ ret i1 %ov
+}
+
+; The overflow check may be against the input rather than the sum.
+
+define i1 @uaddo_i64_increment_alt(i64 %x, i64* %p) {
+; CHECK-LABEL: @uaddo_i64_increment_alt(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 1)
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %a = add i64 %x, 1
+ store i64 %a, i64* %p
+ %ov = icmp eq i64 %x, -1
+ ret i1 %ov
+}
+
+; Make sure insertion is done correctly based on dominance.
+
+define i1 @uaddo_i64_increment_alt_dom(i64 %x, i64* %p) {
+; CHECK-LABEL: @uaddo_i64_increment_alt_dom(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 1)
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %ov = icmp eq i64 %x, -1
+ %a = add i64 %x, 1
+ store i64 %a, i64* %p
+ ret i1 %ov
+}
+
+; The overflow check may be against the input rather than the sum.
+
+define i1 @uaddo_i64_decrement_alt(i64 %x, i64* %p) {
+; CHECK-LABEL: @uaddo_i64_decrement_alt(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 -1)
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %a = add i64 %x, -1
+ store i64 %a, i64* %p
+ %ov = icmp ne i64 %x, 0
+ ret i1 %ov
+}
+
+; Make sure insertion is done correctly based on dominance.
+
+define i1 @uaddo_i64_decrement_alt_dom(i64 %x, i64* %p) {
+; CHECK-LABEL: @uaddo_i64_decrement_alt_dom(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 -1)
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %ov = icmp ne i64 %x, 0
+ %a = add i64 %x, -1
+ store i64 %a, i64* %p
+ ret i1 %ov
+}
+
+; No transform for illegal types.
+
+define i1 @uaddo_i42_increment_illegal_type(i42 %x, i42* %p) {
+; CHECK-LABEL: @uaddo_i42_increment_illegal_type(
+; CHECK-NEXT: [[A:%.*]] = add i42 [[X:%.*]], 1
+; CHECK-NEXT: [[OV:%.*]] = icmp eq i42 [[A]], 0
+; CHECK-NEXT: store i42 [[A]], i42* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV]]
+;
+ %a = add i42 %x, 1
+ %ov = icmp eq i42 %a, 0
+ store i42 %a, i42* %p
+ ret i1 %ov
+}
+
+define i1 @usubo_ult_i64(i64 %x, i64 %y, i64* %p) {
+; CHECK-LABEL: @usubo_ult_i64(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %s = sub i64 %x, %y
+ store i64 %s, i64* %p
+ %ov = icmp ult i64 %x, %y
+ ret i1 %ov
+}
+
+; Verify insertion point for single-BB. Toggle predicate.
+
+define i1 @usubo_ugt_i32(i32 %x, i32 %y, i32* %p) {
+; CHECK-LABEL: @usubo_ugt_i32(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i32 [[MATH]], i32* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %ov = icmp ugt i32 %y, %x
+ %s = sub i32 %x, %y
+ store i32 %s, i32* %p
+ ret i1 %ov
+}
+
+; Constant operand should match.
+
+define i1 @usubo_ugt_constant_op0_i8(i8 %x, i8* %p) {
+; CHECK-LABEL: @usubo_ugt_constant_op0_i8(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 42, i8 [[X:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i8 [[MATH]], i8* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %s = sub i8 42, %x
+ %ov = icmp ugt i8 %x, 42
+ store i8 %s, i8* %p
+ ret i1 %ov
+}
+
+; Compare with constant operand 0 is canonicalized by commuting, but verify match for non-canonical form.
+
+define i1 @usubo_ult_constant_op0_i16(i16 %x, i16* %p) {
+; CHECK-LABEL: @usubo_ult_constant_op0_i16(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 43, i16 [[X:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i16 [[MATH]], i16* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %s = sub i16 43, %x
+ %ov = icmp ult i16 43, %x
+ store i16 %s, i16* %p
+ ret i1 %ov
+}
+
+; Subtract with constant operand 1 is canonicalized to add.
+
+define i1 @usubo_ult_constant_op1_i16(i16 %x, i16* %p) {
+; CHECK-LABEL: @usubo_ult_constant_op1_i16(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 [[X:%.*]], i16 44)
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i16 [[MATH]], i16* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %s = add i16 %x, -44
+ %ov = icmp ult i16 %x, 44
+ store i16 %s, i16* %p
+ ret i1 %ov
+}
+
+define i1 @usubo_ugt_constant_op1_i8(i8 %x, i8* %p) {
+; CHECK-LABEL: @usubo_ugt_constant_op1_i8(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[X:%.*]], i8 45)
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i8 [[MATH]], i8* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %ov = icmp ugt i8 45, %x
+ %s = add i8 %x, -45
+ store i8 %s, i8* %p
+ ret i1 %ov
+}
+
+; Special-case: subtract 1 changes the compare predicate and constant.
+
+define i1 @usubo_eq_constant1_op1_i32(i32 %x, i32* %p) {
+; CHECK-LABEL: @usubo_eq_constant1_op1_i32(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[X:%.*]], i32 1)
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i32 [[MATH]], i32* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %s = add i32 %x, -1
+ %ov = icmp eq i32 %x, 0
+ store i32 %s, i32* %p
+ ret i1 %ov
+}
+
+; Special-case: subtract from 0 (negate) changes the compare predicate.
+
+define i1 @usubo_ne_constant0_op1_i32(i32 %x, i32* %p) {
+; CHECK-LABEL: @usubo_ne_constant0_op1_i32(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 [[X:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i32 [[MATH]], i32* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+ %s = sub i32 0, %x
+ %ov = icmp ne i32 %x, 0
+ store i32 %s, i32* %p
+ ret i1 %ov
+}
+
+; Verify insertion point for multi-BB.
+
+declare void @call(i1)
+
+define i1 @usubo_ult_sub_dominates_i64(i64 %x, i64 %y, i64* %p, i1 %cond) {
+; CHECK-LABEL: @usubo_ult_sub_dominates_i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; CHECK: t:
+; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT: br i1 [[COND]], label [[END:%.*]], label [[F]]
+; CHECK: f:
+; CHECK-NEXT: ret i1 [[COND]]
+; CHECK: end:
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+entry:
+ br i1 %cond, label %t, label %f
+
+t:
+ %s = sub i64 %x, %y
+ store i64 %s, i64* %p
+ br i1 %cond, label %end, label %f
+
+f:
+ ret i1 %cond
+
+end:
+ %ov = icmp ult i64 %x, %y
+ ret i1 %ov
+}
+
+define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, i64* %p, i1 %cond) {
+; CHECK-LABEL: @usubo_ult_cmp_dominates_i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; CHECK: t:
+; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: call void @call(i1 [[OV]])
+; CHECK-NEXT: br i1 [[OV]], label [[END:%.*]], label [[F]]
+; CHECK: f:
+; CHECK-NEXT: ret i1 [[COND]]
+; CHECK: end:
+; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT: ret i1 [[OV1]]
+;
+entry:
+ br i1 %cond, label %t, label %f
+
+t:
+ %ov = icmp ult i64 %x, %y
+ call void @call(i1 %ov)
+ br i1 %ov, label %end, label %f
+
+f:
+ ret i1 %cond
+
+end:
+ %s = sub i64 %x, %y
+ store i64 %s, i64* %p
+ ret i1 %ov
+}
+
+; Verify that crazy/non-canonical code does not crash.
+
+define void @bar() {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 1, -1
+; CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT: unreachable
+;
+ %cmp = icmp eq i64 1, -1
+ %frombool = zext i1 %cmp to i8
+ unreachable
+}
+
+define void @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: [[SUB:%.*]] = add nsw i64 1, 1
+; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[SUB]] to i32
+; CHECK-NEXT: unreachable
+;
+ %sub = add nsw i64 1, 1
+ %conv = trunc i64 %sub to i32
+ unreachable
+}
+
+; Similarly for usubo.
+
+define i1 @bar2() {
+; CHECK-LABEL: @bar2(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 1, 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = icmp eq i64 1, 0
+ ret i1 %cmp
+}
+
+define i64 @foo2(i8 *%p) {
+; CHECK-LABEL: @foo2(
+; CHECK-NEXT: [[SUB:%.*]] = add nsw i64 1, -1
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %sub = add nsw i64 1, -1
+ ret i64 %sub
+}
+
+; Avoid hoisting a math op into a dominating block which would
+; increase the critical path.
+
+define void @PR41129(i64* %p64) {
+; CHECK-LABEL: @PR41129(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[KEY:%.*]] = load i64, i64* [[P64:%.*]], align 8
+; CHECK-NEXT: [[COND17:%.*]] = icmp eq i64 [[KEY]], 0
+; CHECK-NEXT: br i1 [[COND17]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK: false:
+; CHECK-NEXT: [[ANDVAL:%.*]] = and i64 [[KEY]], 7
+; CHECK-NEXT: store i64 [[ANDVAL]], i64* [[P64]]
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: true:
+; CHECK-NEXT: [[SVALUE:%.*]] = add i64 [[KEY]], -1
+; CHECK-NEXT: store i64 [[SVALUE]], i64* [[P64]]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %key = load i64, i64* %p64, align 8
+ %cond17 = icmp eq i64 %key, 0
+ br i1 %cond17, label %true, label %false
+
+false:
+ %andval = and i64 %key, 7
+ store i64 %andval, i64* %p64
+ br label %exit
+
+true:
+ %svalue = add i64 %key, -1
+ store i64 %svalue, i64* %p64
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Check that every instruction inserted by -codegenprepare has a debug location.
+; DEBUG: CheckModuleDebugify: PASS
+
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/pr27536.ll b/llvm/test/Transforms/CodeGenPrepare/X86/pr27536.ll
new file mode 100644
index 00000000000..7ab1b038e80
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/pr27536.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+@rtti = external global i8
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %e = alloca i8
+ %tmpcast = bitcast i8* %e to i16*
+ invoke void @_CxxThrowException(i8* null, i8* null)
+ to label %catchret.dest unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %0 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [i8* @rtti, i32 0, i16* %tmpcast]
+ catchret from %1 to label %catchret.dest
+
+catchret.dest: ; preds = %catch
+ ret void
+}
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[alloca:.*]] = alloca i8
+; CHECK-NEXT: %[[bc:.*]] = bitcast i8* %[[alloca]] to i16*
+
+; CHECK: catchpad within {{.*}} [i8* @rtti, i32 0, i16* %[[bc]]]
+
+declare void @_CxxThrowException(i8*, i8*)
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/pr35658.ll b/llvm/test/Transforms/CodeGenPrepare/X86/pr35658.ll
new file mode 100644
index 00000000000..bf6d0297475
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/pr35658.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=true -addr-sink-new-select=true %s | FileCheck %s
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+define void @f2() {
+entry:
+ %arraydecay = getelementptr inbounds [2 x i16], [2 x i16]* undef, i16 0, i16 0
+ %arrayidx1 = getelementptr inbounds [2 x i16], [2 x i16]* undef, i16 0, i16 1
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %e.03 = phi i16* [ %arraydecay, %entry ], [ %arrayidx1, %for.body ]
+ %tobool = icmp eq i16 undef, 0
+ br i1 undef, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+; CHECK: sunkaddr
+ %e.1.le = select i1 %tobool, i16* %arrayidx1, i16* %e.03
+ store i16 0, i16* %e.1.le, align 1
+ ret void
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/select.ll b/llvm/test/Transforms/CodeGenPrepare/X86/select.ll
new file mode 100644
index 00000000000..7829376e9db
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/select.ll
@@ -0,0 +1,205 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+; RUN: opt -debugify -codegenprepare -S < %s | FileCheck %s -check-prefix=DEBUG
+
+target triple = "x86_64-unknown-unknown"
+
+; Nothing to sink and convert here.
+
+define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y) {
+; CHECK-LABEL: @no_sink(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[LOAD:%.*]] = load double, double* [[B:%.*]], align 8
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[LOAD]], [[A:%.*]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[X:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[SEL]]
+;
+entry:
+ %load = load double, double* %b, align 8
+ %cmp = fcmp olt double %load, %a
+ %sel = select i1 %cmp, i32 %x, i32 %y
+ ret i32 %sel
+}
+
+
+; An 'fdiv' is expensive, so sink it rather than speculatively execute it.
+
+define float @fdiv_true_sink(float %a, float %b) {
+; CHECK-LABEL: @fdiv_true_sink(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 1.000000e+00
+; CHECK-NEXT: br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]]
+; CHECK: select.true.sink:
+; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]]
+; CHECK-NEXT: br label [[SELECT_END]]
+; CHECK: select.end:
+; CHECK-NEXT: [[SEL:%.*]] = phi float [ [[DIV]], [[SELECT_TRUE_SINK]] ], [ 2.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret float [[SEL]]
+;
+; DEBUG-LABEL: @fdiv_true_sink(
+; DEBUG-NEXT: entry:
+; DEBUG-NEXT: [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 1.000000e+00
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]]
+; DEBUG-NEXT: br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !dbg
+; DEBUG: select.true.sink:
+; DEBUG-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]]
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[DIV]]
+; DEBUG-NEXT: br label [[SELECT_END]], !dbg
+; DEBUG: select.end:
+; DEBUG-NEXT: [[SEL:%.*]] = phi float [ [[DIV]], [[SELECT_TRUE_SINK]] ], [ 2.000000e+00, [[ENTRY:%.*]] ], !dbg
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[SEL]]
+; DEBUG-NEXT: ret float [[SEL]]
+;
+entry:
+ %div = fdiv float %a, %b
+ %cmp = fcmp ogt float %a, 1.0
+ %sel = select i1 %cmp, float %div, float 2.0
+ ret float %sel
+}
+
+define float @fdiv_false_sink(float %a, float %b) {
+; CHECK-LABEL: @fdiv_false_sink(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 3.000000e+00
+; CHECK-NEXT: br i1 [[CMP]], label [[SELECT_END:%.*]], label [[SELECT_FALSE_SINK:%.*]]
+; CHECK: select.false.sink:
+; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]]
+; CHECK-NEXT: br label [[SELECT_END]]
+; CHECK: select.end:
+; CHECK-NEXT: [[SEL:%.*]] = phi float [ 4.000000e+00, [[ENTRY:%.*]] ], [ [[DIV]], [[SELECT_FALSE_SINK]] ]
+; CHECK-NEXT: ret float [[SEL]]
+;
+; DEBUG-LABEL: @fdiv_false_sink(
+; DEBUG-NEXT: entry:
+; DEBUG-NEXT: [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 3.000000e+00
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]]
+; DEBUG-NEXT: br i1 [[CMP]], label [[SELECT_END:%.*]], label [[SELECT_FALSE_SINK:%.*]], !dbg
+; DEBUG: select.false.sink:
+; DEBUG-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]]
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[DIV]]
+; DEBUG-NEXT: br label [[SELECT_END]], !dbg
+; DEBUG: select.end:
+; DEBUG-NEXT: [[SEL:%.*]] = phi float [ 4.000000e+00, [[ENTRY:%.*]] ], [ [[DIV]], [[SELECT_FALSE_SINK]] ], !dbg
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[SEL]]
+; DEBUG-NEXT: ret float [[SEL]], !dbg
+;
+entry:
+ %div = fdiv float %a, %b
+ %cmp = fcmp ogt float %a, 3.0
+ %sel = select i1 %cmp, float 4.0, float %div
+ ret float %sel
+}
+
+define float @fdiv_both_sink(float %a, float %b) {
+; CHECK-LABEL: @fdiv_both_sink(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 5.000000e+00
+; CHECK-NEXT: br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]]
+; CHECK: select.true.sink:
+; CHECK-NEXT: [[DIV1:%.*]] = fdiv float [[A]], [[B:%.*]]
+; CHECK-NEXT: br label [[SELECT_END:%.*]]
+; CHECK: select.false.sink:
+; CHECK-NEXT: [[DIV2:%.*]] = fdiv float [[B]], [[A]]
+; CHECK-NEXT: br label [[SELECT_END]]
+; CHECK: select.end:
+; CHECK-NEXT: [[SEL:%.*]] = phi float [ [[DIV1]], [[SELECT_TRUE_SINK]] ], [ [[DIV2]], [[SELECT_FALSE_SINK]] ]
+; CHECK-NEXT: ret float [[SEL]]
+;
+entry:
+ %div1 = fdiv float %a, %b
+ %div2 = fdiv float %b, %a
+ %cmp = fcmp ogt float %a, 5.0
+ %sel = select i1 %cmp, float %div1, float %div2
+ ret float %sel
+}
+
+; But if the select is marked unpredictable, then don't turn it into a branch.
+
+define float @unpredictable_select(float %a, float %b) {
+; CHECK-LABEL: @unpredictable_select(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[A]], 1.000000e+00
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[DIV]], float 2.000000e+00, !unpredictable !0
+; CHECK-NEXT: ret float [[SEL]]
+;
+entry:
+ %div = fdiv float %a, %b
+ %cmp = fcmp ogt float %a, 1.0
+ %sel = select i1 %cmp, float %div, float 2.0, !unpredictable !0
+ ret float %sel
+}
+
+!0 = !{}
+
+; An 'fadd' is not too expensive, so it's ok to speculate.
+
+define float @fadd_no_sink(float %a, float %b) {
+; CHECK-LABEL: @fadd_no_sink(
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float 6.000000e+00, [[A]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[ADD]], float 7.000000e+00
+; CHECK-NEXT: ret float [[SEL]]
+;
+ %add = fadd float %a, %b
+ %cmp = fcmp ogt float 6.0, %a
+ %sel = select i1 %cmp, float %add, float 7.0
+ ret float %sel
+}
+
+; Possible enhancement: sinkability is only calculated with the direct
+; operand of the select, so we don't try to sink this. The fdiv cost is not
+; taken into account.
+
+define float @fdiv_no_sink(float %a, float %b) {
+; CHECK-LABEL: @fdiv_no_sink(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[DIV]], [[B]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[A]], 1.000000e+00
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[ADD]], float 8.000000e+00
+; CHECK-NEXT: ret float [[SEL]]
+;
+entry:
+ %div = fdiv float %a, %b
+ %add = fadd float %div, %b
+ %cmp = fcmp ogt float %a, 1.0
+ %sel = select i1 %cmp, float %add, float 8.0
+ ret float %sel
+}
+
+; Do not transform the CFG if the select operands may have side effects.
+
+declare i64* @bar(i32, i32, i32)
+declare i64* @baz(i32, i32, i32)
+
+define i64* @calls_no_sink(i32 %in) {
+; CHECK-LABEL: @calls_no_sink(
+; CHECK-NEXT: [[CALL1:%.*]] = call i64* @bar(i32 1, i32 2, i32 3)
+; CHECK-NEXT: [[CALL2:%.*]] = call i64* @baz(i32 1, i32 2, i32 3)
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[IN:%.*]], 0
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TOBOOL]], i64* [[CALL1]], i64* [[CALL2]]
+; CHECK-NEXT: ret i64* [[SEL]]
+;
+ %call1 = call i64* @bar(i32 1, i32 2, i32 3)
+ %call2 = call i64* @baz(i32 1, i32 2, i32 3)
+ %tobool = icmp ne i32 %in, 0
+ %sel = select i1 %tobool, i64* %call1, i64* %call2
+ ret i64* %sel
+}
+
+define i32 @sdiv_no_sink(i32 %a, i32 %b) {
+; CHECK-LABEL: @sdiv_no_sink(
+; CHECK-NEXT: [[DIV1:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[DIV2:%.*]] = sdiv i32 [[B]], [[A]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], 5
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[DIV1]], i32 [[DIV2]]
+; CHECK-NEXT: ret i32 [[SEL]]
+;
+ %div1 = sdiv i32 %a, %b
+ %div2 = sdiv i32 %b, %a
+ %cmp = icmp sgt i32 %a, 5
+ %sel = select i1 %cmp, i32 %div1, i32 %div2
+ ret i32 %sel
+}
+
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll
new file mode 100644
index 00000000000..e914c1a3da6
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll
@@ -0,0 +1,543 @@
+; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=true -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-YES
+; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NO
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Can we sink for different base if there is no phi for base?
+define i32 @test1(i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test1
+entry:
+ %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %c1 = bitcast i64* %a1 to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %c2 = bitcast i64* %a2 to i32*
+ br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO-NEXT: load
+ %c = phi i32* [%c1, %entry], [%c2, %if.then]
+ %v = load i32, i32* %c, align 4
+ ret i32 %v
+}
+
+; Can we sink for different base if there is phi for base?
+define i32 @test2(i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test2
+entry:
+ %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %c1 = bitcast i64* %a1 to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %c2 = bitcast i64* %a2 to i32*
+ br label %fallthrough
+
+fallthrough:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+ %b = phi i64* [%b1, %entry], [%b2, %if.then]
+ %c = phi i32* [%c1, %entry], [%c2, %if.then]
+ %v = load i32, i32* %c, align 4
+ ret i32 %v
+}
+
+; Can we sink for different base if there is phi for base but not valid one?
+define i32 @test3(i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test3
+entry:
+ %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %c1 = bitcast i64* %a1 to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %c2 = bitcast i64* %a2 to i32*
+ br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO: phi
+; CHECK-NO-NEXT: load
+ %b = phi i64* [%b2, %entry], [%b1, %if.then]
+ %c = phi i32* [%c1, %entry], [%c2, %if.then]
+ %v = load i32, i32* %c, align 4
+ ret i32 %v
+}
+
+; Can we sink for different base if both addresses are in the same block?
+define i32 @test4(i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test4
+entry:
+ %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %c1 = bitcast i64* %a1 to i32*
+ %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %c2 = bitcast i64* %a2 to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO-NEXT: load
+ %c = phi i32* [%c1, %entry], [%c2, %if.then]
+ %v = load i32, i32* %c, align 4
+ ret i32 %v
+}
+
+; Can we sink for different base if there is phi for base?
+; Both addresses are in the same block.
+define i32 @test5(i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test5
+entry:
+ %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %c1 = bitcast i64* %a1 to i32*
+ %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %c2 = bitcast i64* %a2 to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ br label %fallthrough
+
+fallthrough:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+ %b = phi i64* [%b1, %entry], [%b2, %if.then]
+ %c = phi i32* [%c1, %entry], [%c2, %if.then]
+ %v = load i32, i32* %c, align 4
+ ret i32 %v
+}
+
+; Can we sink for different base if there is phi for base but not valid one?
+; Both addresses are in the same block.
+define i32 @test6(i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test6
+entry:
+ %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %c1 = bitcast i64* %a1 to i32*
+ %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %c2 = bitcast i64* %a2 to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: load
+ %b = phi i64* [%b2, %entry], [%b1, %if.then]
+ %c = phi i32* [%c1, %entry], [%c2, %if.then]
+ %v = load i32, i32* %c, align 4
+ ret i32 %v
+}
+
+; case with a loop. No phi node.
+define i32 @test7(i32 %N, i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test7
+entry:
+ %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %c1 = bitcast i64* %a1 to i32*
+ br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-YES: sunk_phi
+ %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough]
+ %c3 = phi i32* [%c1, %entry], [%c, %fallthrough]
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %c2 = bitcast i64* %a2 to i32*
+ br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO-NEXT: load
+ %c = phi i32* [%c3, %loop], [%c2, %if.then]
+ %v = load volatile i32, i32* %c, align 4
+ %iv.inc = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.inc, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %v
+}
+
+; case with a loop. There is phi node.
+define i32 @test8(i32 %N, i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test8
+entry:
+ %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %c1 = bitcast i64* %a1 to i32*
+ br label %loop
+
+loop:
+ %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough]
+ %c3 = phi i32* [%c1, %entry], [%c, %fallthrough]
+ %b3 = phi i64* [%b1, %entry], [%b, %fallthrough]
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %c2 = bitcast i64* %a2 to i32*
+ br label %fallthrough
+
+fallthrough:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+ %c = phi i32* [%c3, %loop], [%c2, %if.then]
+ %b = phi i64* [%b3, %loop], [%b2, %if.then]
+ %v = load volatile i32, i32* %c, align 4
+ %iv.inc = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.inc, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %v
+}
+
+; case with a loop. There is phi node but it does not fit.
+define i32 @test9(i32 %N, i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test9
+entry:
+ %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %c1 = bitcast i64* %a1 to i32*
+ br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-YES: sunk_phi
+ %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough]
+ %c3 = phi i32* [%c1, %entry], [%c, %fallthrough]
+ %b3 = phi i64* [%b1, %entry], [%b2, %fallthrough]
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %c2 = bitcast i64* %a2 to i32*
+ br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: load
+ %c = phi i32* [%c3, %loop], [%c2, %if.then]
+ %b = phi i64* [%b3, %loop], [%b2, %if.then]
+ %v = load volatile i32, i32* %c, align 4
+ %iv.inc = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.inc, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %v
+}
+
+; Case through a loop. No phi node.
+define i32 @test10(i32 %N, i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test10
+entry:
+ %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %c1 = bitcast i64* %a1 to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %c2 = bitcast i64* %a2 to i32*
+ br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: br
+ %c = phi i32* [%c1, %entry], [%c2, %if.then]
+ br label %loop
+
+loop:
+ %iv = phi i32 [0, %fallthrough], [%iv.inc, %loop]
+ %iv.inc = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.inc, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+; CHECK-YES: sunkaddr
+ %v = load volatile i32, i32* %c, align 4
+ ret i32 %v
+}
+
+; Case through a loop. There is a phi.
+define i32 @test11(i32 %N, i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test11
+entry:
+ %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %c1 = bitcast i64* %a1 to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %c2 = bitcast i64* %a2 to i32*
+ br label %fallthrough
+
+fallthrough:
+; CHECK: phi
+; CHECK: phi
+; CHECK: br
+ %c = phi i32* [%c1, %entry], [%c2, %if.then]
+ %b = phi i64* [%b1, %entry], [%b2, %if.then]
+ br label %loop
+
+loop:
+ %iv = phi i32 [0, %fallthrough], [%iv.inc, %loop]
+ %iv.inc = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.inc, %N
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+; CHECK: sunkaddr
+ %v = load volatile i32, i32* %c, align 4
+ ret i32 %v
+}
+
+; Complex case with address value from previous iteration.
+define i32 @test12(i32 %N, i1 %cond, i64* %b1, i64* %b2, i64* %b3) {
+; CHECK-LABEL: @test12
+entry:
+ %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %c1 = bitcast i64* %a1 to i32*
+ br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-YES: sunk_phi
+; CHECK-NO: phi
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: br
+ %iv = phi i32 [0, %entry], [%iv.inc, %backedge]
+ %c3 = phi i32* [%c1, %entry], [%c, %backedge]
+ %b4 = phi i64* [%b1, %entry], [%b5, %backedge]
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %c2 = bitcast i64* %a2 to i32*
+ br label %fallthrough
+
+fallthrough:
+; CHECK-LABEL: fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO: phi
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: load
+ %c = phi i32* [%c3, %loop], [%c2, %if.then]
+ %b6 = phi i64* [%b4, %loop], [%b2, %if.then]
+ %v = load volatile i32, i32* %c, align 4
+ %a4 = getelementptr inbounds i64, i64* %b4, i64 5
+ %c4 = bitcast i64* %a4 to i32*
+ %cmp = icmp slt i32 %iv, 20
+ br i1 %cmp, label %backedge, label %if.then.2
+
+if.then.2:
+ br label %backedge
+
+backedge:
+ %b5 = phi i64* [%b4, %fallthrough], [%b6, %if.then.2]
+ %iv.inc = add i32 %iv, 1
+ %cmp2 = icmp slt i32 %iv.inc, %N
+ br i1 %cmp2, label %loop, label %exit
+
+exit:
+ ret i32 %v
+}
+
+%struct.S = type {i32, i32}
+; Case with index
+define i32 @test13(i1 %cond, %struct.S* %b1, %struct.S* %b2, i64 %Index) {
+; CHECK-LABEL: @test13
+entry:
+ %a1 = getelementptr inbounds %struct.S, %struct.S* %b1, i64 %Index, i32 1
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %i2 = mul i64 %Index, 2
+ %a2 = getelementptr inbounds %struct.S, %struct.S* %b2, i64 %Index, i32 1
+ br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: load
+ %a = phi i32* [%a1, %entry], [%a2, %if.then]
+ %v = load i32, i32* %a, align 4
+ ret i32 %v
+}
+
+; Select of Select case.
+define i64 @test14(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) {
+; CHECK-LABEL: @test14
+entry:
+; CHECK-LABEL: entry:
+ %g1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %g2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %g3 = getelementptr inbounds i64, i64* %b3, i64 5
+ %s1 = select i1 %c1, i64* %g1, i64* %g2
+ %s2 = select i1 %c2, i64* %s1, i64* %g3
+; CHECK: sunkaddr
+ %v = load i64 , i64* %s2, align 8
+ ret i64 %v
+}
+
+; Select of Phi case.
+define i64 @test15(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) {
+; CHECK-LABEL: @test15
+entry:
+ %g1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %g2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %g3 = getelementptr inbounds i64, i64* %b3, i64 5
+ br i1 %c1, label %if.then, label %fallthrough
+
+if.then:
+ br label %fallthrough
+
+fallthrough:
+; CHECK-LABEL: fallthrough:
+ %p1 = phi i64* [%g1, %entry], [%g2, %if.then]
+ %s1 = select i1 %c2, i64* %p1, i64* %g3
+; CHECK-YES: sunkaddr
+; CHECK-NO: phi
+; CHECK-NO-NEXT: select
+; CHECK-NO-NEXT: load
+ %v = load i64 , i64* %s1, align 8
+ ret i64 %v
+}
+
+; Select of Phi case. Phi exists
+define i64 @test16(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) {
+; CHECK-LABEL: @test16
+entry:
+ %g1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %g2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %g3 = getelementptr inbounds i64, i64* %b3, i64 5
+ br i1 %c1, label %if.then, label %fallthrough
+
+if.then:
+ br label %fallthrough
+
+fallthrough:
+; CHECK-LABEL: fallthrough:
+ %p = phi i64* [%b1, %entry], [%b2, %if.then]
+ %p1 = phi i64* [%g1, %entry], [%g2, %if.then]
+ %s1 = select i1 %c2, i64* %p1, i64* %g3
+; CHECK: sunkaddr
+ %v = load i64 , i64* %s1, align 8
+ ret i64 %v
+}
+
+; Phi of Select case.
+define i64 @test17(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) {
+; CHECK-LABEL: @test17
+entry:
+ %g1 = getelementptr inbounds i64, i64* %b1, i64 5
+ %g2 = getelementptr inbounds i64, i64* %b2, i64 5
+ %g3 = getelementptr inbounds i64, i64* %b3, i64 5
+ %s1 = select i1 %c2, i64* %g1, i64* %g2
+ br i1 %c1, label %if.then, label %fallthrough
+
+if.then:
+ br label %fallthrough
+
+fallthrough:
+; CHECK-LABEL: fallthrough:
+ %p1 = phi i64* [%s1, %entry], [%g3, %if.then]
+; CHECK-YES: sunkaddr
+; CHECK-NO: phi
+; CHECK-NO-NEXT: load
+ %v = load i64 , i64* %p1, align 8
+ ret i64 %v
+}
+
+; The same two addr modes by different paths
+define i32 @test18(i1 %cond1, i1 %cond2, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test18
+entry:
+ %g1 = getelementptr inbounds i64, i64* %b2, i64 5
+ %bc1 = bitcast i64* %g1 to i32*
+ br i1 %cond1, label %if.then1, label %if.then2
+
+if.then1:
+ %g2 = getelementptr inbounds i64, i64* %b1, i64 5
+ %bc2 = bitcast i64* %g2 to i32*
+ br label %fallthrough
+
+if.then2:
+ %bc1_1 = bitcast i64* %g1 to i32*
+ br i1 %cond2, label %fallthrough, label %if.then3
+
+if.then3:
+ %bc1_2 = bitcast i64* %g1 to i32*
+ br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO-NEXT: load
+ %c = phi i32* [%bc2, %if.then1], [%bc1_1, %if.then2], [%bc1_2, %if.then3]
+ %v1 = load i32, i32* %c, align 4
+ %g1_1 = getelementptr inbounds i64, i64* %b2, i64 5
+ %bc1_1_1 = bitcast i64* %g1_1 to i32*
+ %v2 = load i32, i32* %bc1_1_1, align 4
+ %v = add i32 %v1, %v2
+ ret i32 %v
+}
+
+; Different types but null is the first?
+define i32 @test19(i1 %cond1, i1 %cond2, i64* %b2, i8* %b1) {
+; CHECK-LABEL: @test19
+entry:
+ %g1 = getelementptr inbounds i64, i64* %b2, i64 5
+ %bc1 = bitcast i64* %g1 to i32*
+ br i1 %cond1, label %if.then1, label %if.then2
+
+if.then1:
+ %g2 = getelementptr inbounds i8, i8* %b1, i64 40
+ %bc2 = bitcast i8* %g2 to i32*
+ br label %fallthrough
+
+if.then2:
+ %bc1_1 = bitcast i64* %g1 to i32*
+ br i1 %cond2, label %fallthrough, label %if.then3
+
+if.then3:
+ %g3 = getelementptr inbounds i64, i64* null, i64 5
+ %bc1_2 = bitcast i64* %g3 to i32*
+ br label %fallthrough
+
+fallthrough:
+; CHECK-NOT: sunk_phi
+ %c = phi i32* [%bc2, %if.then1], [%bc1_1, %if.then2], [%bc1_2, %if.then3]
+ %v1 = load i32, i32* %c, align 4
+ %g1_1 = getelementptr inbounds i64, i64* %b2, i64 5
+ %bc1_1_1 = bitcast i64* %g1_1 to i32*
+ %v2 = load i32, i32* %bc1_1_1, align 4
+ %v = add i32 %v1, %v2
+ ret i32 %v
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll
new file mode 100644
index 00000000000..12edf44a03a
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Select when both offset and scale reg are present.
+define i64 @test1(i1 %c, i64* %b, i64 %scale) {
+; CHECK-LABEL: @test1
+entry:
+; CHECK-LABEL: entry:
+ %g = getelementptr inbounds i64, i64* %b, i64 %scale
+ %g1 = getelementptr inbounds i64, i64* %g, i64 8
+ %g2 = getelementptr inbounds i64, i64* %g, i64 16
+ %s = select i1 %c, i64* %g1, i64* %g2
+; CHECK-NOT: sunkaddr
+ %v = load i64 , i64* %s, align 8
+ ret i64 %v
+}
+
+@gv1 = external global i8, align 16
+@gv2 = external global i8, align 16
+
+; Select when both GV and base reg are present.
+define i8 @test2(i1 %c, i64 %b) {
+; CHECK-LABEL: @test2
+entry:
+; CHECK-LABEL: entry:
+ %g1 = getelementptr inbounds i8, i8* @gv1, i64 %b
+ %g2 = getelementptr inbounds i8, i8* @gv2, i64 %b
+ %s = select i1 %c, i8* %g1, i8* %g2
+; CHECK-NOT: sunkaddr
+ %v = load i8 , i8* %s, align 8
+ ret i8 %v
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll
new file mode 100644
index 00000000000..817382a07bd
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false %s | FileCheck %s --check-prefix=CHECK
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test() {
+entry:
+ %0 = getelementptr inbounds i64, i64 * null, i64 undef
+ br label %start
+
+start:
+ %val1 = phi i64 * [ %0, %entry ], [ %val4, %exit ]
+ %val2 = phi i64 * [ null, %entry ], [ %val5, %exit ]
+ br i1 false, label %slowpath, label %exit
+
+slowpath:
+ %elem1 = getelementptr inbounds i64, i64 * undef, i64 undef
+ br label %exit
+
+exit:
+; CHECK: sunkaddr
+ %val3 = phi i64 * [ undef, %slowpath ], [ %val2, %start ]
+ %val4 = phi i64 * [ %elem1, %slowpath ], [ %val1, %start ]
+ %val5 = phi i64 * [ undef, %slowpath ], [ %val2, %start ]
+ %loadx = load i64, i64 * %val4, align 8
+ br label %start
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
new file mode 100644
index 00000000000..4d28e06f252
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
@@ -0,0 +1,280 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@x = external global [1 x [2 x <4 x float>]]
+
+; Can we sink single addressing mode computation to use?
+define void @test1(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test1
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+entry:
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %v = load i32, i32* %casted, align 4
+ br label %fallthrough
+
+fallthrough:
+ ret void
+}
+
+declare void @foo(i32)
+
+; Make sure sinking two copies of addressing mode into different blocks works
+define void @test2(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test2
+entry:
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+ %v1 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v1)
+ %cmp = icmp eq i32 %v1, 0
+ br i1 %cmp, label %next, label %fallthrough
+
+next:
+; CHECK-LABEL: next:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+ %v2 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v2)
+ br label %fallthrough
+
+fallthrough:
+ ret void
+}
+
+; If we have two loads in the same block, only need one copy of addressing mode
+; - instruction selection will duplicate if needed
+define void @test3(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test3
+entry:
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+ %v1 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v1)
+; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
+ %v2 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v2)
+ br label %fallthrough
+
+fallthrough:
+ ret void
+}
+
+; Can we still sink addressing mode if there's a cold use of the
+; address itself?
+define void @test4(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test4
+entry:
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+ %v1 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v1)
+ %cmp = icmp eq i32 %v1, 0
+ br i1 %cmp, label %rare.1, label %fallthrough
+
+fallthrough:
+ ret void
+
+rare.1:
+; CHECK-LABEL: rare.1:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+ call void @slowpath(i32 %v1, i32* %casted) cold
+ br label %fallthrough
+}
+
+; Negative test - don't want to duplicate addressing into hot path
+define void @test5(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test5
+entry:
+; CHECK: %addr = getelementptr inbounds
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
+ %v1 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v1)
+ %cmp = icmp eq i32 %v1, 0
+ br i1 %cmp, label %rare.1, label %fallthrough
+
+fallthrough:
+ ret void
+
+rare.1:
+ call void @slowpath(i32 %v1, i32* %casted) ;; NOT COLD
+ br label %fallthrough
+}
+
+; Negative test - opt for size
+define void @test6(i1 %cond, i64* %base) minsize {
+; CHECK-LABEL: @test6
+entry:
+; CHECK: %addr = getelementptr
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
+ %v1 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v1)
+ %cmp = icmp eq i32 %v1, 0
+ br i1 %cmp, label %rare.1, label %fallthrough
+
+fallthrough:
+ ret void
+
+rare.1:
+ call void @slowpath(i32 %v1, i32* %casted) cold
+ br label %fallthrough
+}
+
+
+; Make sure sinking two copies of addressing mode into different blocks works
+; when there are cold paths for each.
+define void @test7(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test7
+entry:
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+ %v1 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v1)
+ %cmp = icmp eq i32 %v1, 0
+ br i1 %cmp, label %rare.1, label %next
+
+next:
+; CHECK-LABEL: next:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+ %v2 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v2)
+ %cmp2 = icmp eq i32 %v2, 0
+ br i1 %cmp2, label %rare.1, label %fallthrough
+
+fallthrough:
+ ret void
+
+rare.1:
+; CHECK-LABEL: rare.1:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+ call void @slowpath(i32 %v1, i32* %casted) cold
+ br label %next
+
+rare.2:
+; CHECK-LABEL: rare.2:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+ call void @slowpath(i32 %v2, i32* %casted) cold
+ br label %fallthrough
+}
+
+declare void @slowpath(i32, i32*)
+
+; Make sure we don't end up in an infinite loop after we fail to sink.
+; CHECK-LABEL: define void @test8
+; CHECK: %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
+define void @test8() {
+allocas:
+ %aFOO_load = load float*, float** undef
+ %aFOO_load_ptr2int = ptrtoint float* %aFOO_load to i64
+ %aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0
+ %aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to i8*
+ %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
+ br label %load.i145
+
+load.i145:
+ %ptr.i143 = bitcast i8* %ptr to <4 x float>*
+ %valall.i144 = load <4 x float>, <4 x float>* %ptr.i143, align 4
+ %x_offset = getelementptr [1 x [2 x <4 x float>]], [1 x [2 x <4 x float>]]* @x, i32 0, i64 0
+ br label %pl_loop.i.i122
+
+pl_loop.i.i122:
+ br label %pl_loop.i.i122
+}
+
+; Make sure we can sink address computation even
+; if there is a cycle in phi nodes.
+define void @test9(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test9
+entry:
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br label %header
+
+header:
+ %iv = phi i32 [0, %entry], [%iv.inc, %backedge]
+ %casted.loop = phi i32* [%casted, %entry], [%casted.merged, %backedge]
+ br i1 %cond, label %if.then, label %backedge
+
+if.then:
+ call void @foo(i32 %iv)
+ %addr.1 = getelementptr inbounds i64, i64* %base, i64 5
+ %casted.1 = bitcast i64* %addr.1 to i32*
+ br label %backedge
+
+backedge:
+; CHECK-LABEL: backedge:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+ %casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then]
+ %v = load i32, i32* %casted.merged, align 4
+ call void @foo(i32 %v)
+ %iv.inc = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.inc, 1000
+ br i1 %cmp, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; Make sure we can eliminate a select when both arguments perform equivalent
+; address computation.
+define void @test10(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test10
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+; CHECK-NOT: select
+entry:
+ %gep1 = getelementptr inbounds i64, i64* %base, i64 5
+ %gep1.casted = bitcast i64* %gep1 to i32*
+ %base.casted = bitcast i64* %base to i32*
+ %gep2 = getelementptr inbounds i32, i32* %base.casted, i64 10
+ %casted.merged = select i1 %cond, i32* %gep1.casted, i32* %gep2
+ %v = load i32, i32* %casted.merged, align 4
+ call void @foo(i32 %v)
+ ret void
+}
+
+; Found by fuzzer, getSExtValue of > 64 bit constant
+define void @i96_mul(i1* %base, i96 %offset) {
+BB:
+ ;; RHS = 0x7FFFFFFFFFFFFFFFFFFFFFFF
+ %B84 = mul i96 %offset, 39614081257132168796771975167
+ %G23 = getelementptr i1, i1* %base, i96 %B84
+ store i1 false, i1* %G23
+ ret void
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll
new file mode 100644
index 00000000000..b716ef9b820
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s -check-prefix=CHECK -check-prefix=GEP
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @load_cast_gep
+; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)*
+; GEP: getelementptr inbounds i8, i8 addrspace(1)* [[CAST]], i64 40
+define void @load_cast_gep(i1 %cond, i64* %base) {
+entry:
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = addrspacecast i64* %addr to i32 addrspace(1)*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %v = load i32, i32 addrspace(1)* %casted, align 4
+ br label %fallthrough
+
+fallthrough:
+ ret void
+}
+
+; CHECK-LABEL: @store_gep_cast
+; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)*
+; GEP: getelementptr inbounds i8, i8 addrspace(1)* [[CAST]], i64 20
+define void @store_gep_cast(i1 %cond, i64* %base) {
+entry:
+ %casted = addrspacecast i64* %base to i32 addrspace(1)*
+ %addr = getelementptr inbounds i32, i32 addrspace(1)* %casted, i64 5
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ store i32 0, i32 addrspace(1)* %addr, align 4
+ br label %fallthrough
+
+fallthrough:
+ ret void
+}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll b/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll
new file mode 100644
index 00000000000..82c9938336e
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll
@@ -0,0 +1,103 @@
+;; x86 is chosen to show the transform when 8-bit and 16-bit registers are available.
+
+; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: opt < %s -debugify -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=DEBUG
+
+; No change for x86 because 16-bit registers are part of the architecture.
+
+define i32 @widen_switch_i16(i32 %a) {
+entry:
+ %trunc = trunc i32 %a to i16
+ switch i16 %trunc, label %sw.default [
+ i16 1, label %sw.bb0
+ i16 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; X86-LABEL: @widen_switch_i16(
+; X86: %trunc = trunc i32 %a to i16
+; X86-NEXT: switch i16 %trunc, label %sw.default [
+; X86-NEXT: i16 1, label %sw.bb0
+; X86-NEXT: i16 -1, label %sw.bb1
+}
+
+; Widen to 32-bit from a smaller, non-native type.
+
+define i32 @widen_switch_i17(i32 %a) {
+entry:
+ %trunc = trunc i32 %a to i17
+ switch i17 %trunc, label %sw.default [
+ i17 10, label %sw.bb0
+ i17 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; X86-LABEL: @widen_switch_i17(
+; X86: %0 = zext i17 %trunc to i32
+; X86-NEXT: switch i32 %0, label %sw.default [
+; X86-NEXT: i32 10, label %sw.bb0
+; X86-NEXT: i32 131071, label %sw.bb1
+
+; DEBUG-LABEL: @widen_switch_i17(
+; DEBUG: zext i17 %trunc to i32, !dbg [[switch_loc:![0-9]+]]
+; DEBUG-NEXT: switch i32 {{.*}} [
+; DEBUG-NEXT: label %sw.bb0
+; DEBUG-NEXT: label %sw.bb1
+; DEBUG-NEXT: ], !dbg [[switch_loc]]
+}
+
+; If the switch condition is a sign-extended function argument, then the
+; condition and cases should be sign-extended rather than zero-extended
+; because the sign-extension can be optimized away.
+
+define i32 @widen_switch_i16_sext(i2 signext %a) {
+entry:
+ switch i2 %a, label %sw.default [
+ i2 1, label %sw.bb0
+ i2 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; X86-LABEL: @widen_switch_i16_sext(
+; X86: %0 = sext i2 %a to i8
+; X86-NEXT: switch i8 %0, label %sw.default [
+; X86-NEXT: i8 1, label %sw.bb0
+; X86-NEXT: i8 -1, label %sw.bb1
+}
+
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll
new file mode 100644
index 00000000000..112b63dd773
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -codegenprepare -mcpu=corei7 %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE2
+; RUN: opt -S -codegenprepare -mcpu=bdver2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP
+; RUN: opt -S -codegenprepare -mcpu=core-avx2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2
+; RUN: opt -S -codegenprepare -mcpu=skylake-avx512 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512BW
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin10.9.0"
+
+define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) {
+; CHECK-LABEL: @test_8bit(
+; CHECK-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK: if_true:
+; CHECK-NEXT: ret <16 x i8> [[MASK]]
+; CHECK: if_false:
+; CHECK-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]]
+; CHECK-NEXT: ret <16 x i8> [[RES]]
+;
+ %mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer
+ br i1 %tst, label %if_true, label %if_false
+
+if_true:
+ ret <16 x i8> %mask
+
+if_false:
+ %res = shl <16 x i8> %lhs, %mask
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @test_16bit(<8 x i16> %lhs, <8 x i16> %tmp, i1 %tst) {
+; CHECK-SSE2-LABEL: @test_16bit(
+; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-SSE2: if_true:
+; CHECK-SSE2-NEXT: ret <8 x i16> [[MASK]]
+; CHECK-SSE2: if_false:
+; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-SSE2-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[TMP1]]
+; CHECK-SSE2-NEXT: ret <8 x i16> [[RES]]
+;
+; CHECK-XOP-LABEL: @test_16bit(
+; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-XOP: if_true:
+; CHECK-XOP-NEXT: ret <8 x i16> [[MASK]]
+; CHECK-XOP: if_false:
+; CHECK-XOP-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[MASK]]
+; CHECK-XOP-NEXT: ret <8 x i16> [[RES]]
+;
+; CHECK-AVX2-LABEL: @test_16bit(
+; CHECK-AVX2-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-AVX2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-AVX2: if_true:
+; CHECK-AVX2-NEXT: ret <8 x i16> [[MASK]]
+; CHECK-AVX2: if_false:
+; CHECK-AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-AVX2-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[TMP1]]
+; CHECK-AVX2-NEXT: ret <8 x i16> [[RES]]
+;
+; CHECK-AVX512BW-LABEL: @test_16bit(
+; CHECK-AVX512BW-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-AVX512BW-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-AVX512BW: if_true:
+; CHECK-AVX512BW-NEXT: ret <8 x i16> [[MASK]]
+; CHECK-AVX512BW: if_false:
+; CHECK-AVX512BW-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[MASK]]
+; CHECK-AVX512BW-NEXT: ret <8 x i16> [[RES]]
+;
+ %mask = shufflevector <8 x i16> %tmp, <8 x i16> undef, <8 x i32> zeroinitializer
+ br i1 %tst, label %if_true, label %if_false
+
+if_true:
+ ret <8 x i16> %mask
+
+if_false:
+ %res = shl <8 x i16> %lhs, %mask
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @test_notsplat(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) {
+; CHECK-LABEL: @test_notsplat(
+; CHECK-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK: if_true:
+; CHECK-NEXT: ret <4 x i32> [[MASK]]
+; CHECK: if_false:
+; CHECK-NEXT: [[RES:%.*]] = shl <4 x i32> [[LHS:%.*]], [[MASK]]
+; CHECK-NEXT: ret <4 x i32> [[RES]]
+;
+ %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+ br i1 %tst, label %if_true, label %if_false
+
+if_true:
+ ret <4 x i32> %mask
+
+if_false:
+ %res = shl <4 x i32> %lhs, %mask
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) {
+; CHECK-SSE2-LABEL: @test_32bit(
+; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
+; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-SSE2: if_true:
+; CHECK-SSE2-NEXT: ret <4 x i32> [[MASK]]
+; CHECK-SSE2: if_false:
+; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
+; CHECK-SSE2-NEXT: [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[TMP1]]
+; CHECK-SSE2-NEXT: ret <4 x i32> [[RES]]
+;
+; CHECK-XOP-LABEL: @test_32bit(
+; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
+; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-XOP: if_true:
+; CHECK-XOP-NEXT: ret <4 x i32> [[MASK]]
+; CHECK-XOP: if_false:
+; CHECK-XOP-NEXT: [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[MASK]]
+; CHECK-XOP-NEXT: ret <4 x i32> [[RES]]
+;
+; CHECK-AVX-LABEL: @test_32bit(
+; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
+; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-AVX: if_true:
+; CHECK-AVX-NEXT: ret <4 x i32> [[MASK]]
+; CHECK-AVX: if_false:
+; CHECK-AVX-NEXT: [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[MASK]]
+; CHECK-AVX-NEXT: ret <4 x i32> [[RES]]
+;
+ %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
+ br i1 %tst, label %if_true, label %if_false
+
+if_true:
+ ret <4 x i32> %mask
+
+if_false:
+ %res = ashr <4 x i32> %lhs, %mask
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @test_64bit(<2 x i64> %lhs, <2 x i64> %tmp, i1 %tst) {
+; CHECK-SSE2-LABEL: @test_64bit(
+; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-SSE2: if_true:
+; CHECK-SSE2-NEXT: ret <2 x i64> [[MASK]]
+; CHECK-SSE2: if_false:
+; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-SSE2-NEXT: [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[TMP1]]
+; CHECK-SSE2-NEXT: ret <2 x i64> [[RES]]
+;
+; CHECK-XOP-LABEL: @test_64bit(
+; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-XOP: if_true:
+; CHECK-XOP-NEXT: ret <2 x i64> [[MASK]]
+; CHECK-XOP: if_false:
+; CHECK-XOP-NEXT: [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[MASK]]
+; CHECK-XOP-NEXT: ret <2 x i64> [[RES]]
+;
+; CHECK-AVX-LABEL: @test_64bit(
+; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-AVX: if_true:
+; CHECK-AVX-NEXT: ret <2 x i64> [[MASK]]
+; CHECK-AVX: if_false:
+; CHECK-AVX-NEXT: [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[MASK]]
+; CHECK-AVX-NEXT: ret <2 x i64> [[RES]]
+;
+ %mask = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
+ br i1 %tst, label %if_true, label %if_false
+
+if_true:
+ ret <2 x i64> %mask
+
+if_false:
+ %res = lshr <2 x i64> %lhs, %mask
+ ret <2 x i64> %res
+}
OpenPOWER on IntegriCloud